From d4981b5d8094d3eeee7ac73e3f33fcd78c94771a Mon Sep 17 00:00:00 2001
From: Cole Medin <cole@dynamous.ai>
Date: Sat, 18 Apr 2026 15:02:35 -0500
Subject: [PATCH 01/14] fix(workflows): fail loudly on SDK isError results
 (#1208) (#1291)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, `dag-executor` only failed nodes/iterations when the SDK
returned an `error_max_budget_usd` result. Every other `isError: true`
subtype — including `error_during_execution` — was silently `break`ed
out of the stream with whatever partial output had accumulated, letting
failed runs masquerade as successful ones with empty output.

This is the most likely explanation for the "5-second crash" symptom in
#1208: iterations finish instantly with empty text, the loop keeps
going, and only the `claude.result_is_error` log tips the user off.

Changes:
- Capture the SDK's `errors: string[]` detail on result messages
  (previously discarded) and surface it through `MessageChunk.errors`.
- Log `errors`, `stopReason` alongside `errorSubtype` in
  `claude.result_is_error` so users can see what actually failed.
- Throw from both the general node path and the loop iteration path
  on any `isError: true` result, including the subtype and SDK errors
  detail in the thrown message.

Note: this does not implement auto-retry. See PR comments on #1121 and
the analysis on #1208 — a retry-with-fresh-session approach for loop
iterations is not obviously correct until we see what
`error_during_execution` actually carries in the reporter's env.
This change is the observability + fail-loud step that has to come
first so that signal is no longer silent.

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
(cherry picked from commit 4c6ddd994f4dce2683f8cd08a68d95f86122cc12)
---
 packages/providers/src/claude/provider.ts   |  10 +-
 packages/providers/src/types.ts             |   2 +
 packages/workflows/src/dag-executor.test.ts | 116 ++++++++++++++++++++
 packages/workflows/src/dag-executor.ts      |  41 +++++++
 4 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts
index 26935bf373..0821319317 100644
--- a/packages/providers/src/claude/provider.ts
+++ b/packages/providers/src/claude/provider.ts
@@ -740,6 +740,7 @@ async function* streamClaudeMessages(
         total_cost_usd?: number;
         stop_reason?: string | null;
         num_turns?: number;
+        errors?: string[];
         model_usage?: Record<
           string,
           {
@@ -751,9 +752,15 @@ async function* streamClaudeMessages(
         >;
       };
       const tokens = normalizeClaudeUsage(resultMsg.usage);
+      const sdkErrors = Array.isArray(resultMsg.errors) ? resultMsg.errors : undefined;
       if (resultMsg.is_error) {
         getLog().error(
-          { sessionId: resultMsg.session_id, errorSubtype: resultMsg.subtype },
+          {
+            sessionId: resultMsg.session_id,
+            errorSubtype: resultMsg.subtype,
+            stopReason: resultMsg.stop_reason,
+            errors: sdkErrors,
+          },
           'claude.result_is_error'
         );
       }
@@ -765,6 +772,7 @@ async function* streamClaudeMessages(
           ? { structuredOutput: resultMsg.structured_output }
           : {}),
         ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}),
+        ...(resultMsg.is_error && sdkErrors?.length ? { errors: sdkErrors } : {}),
         ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}),
         ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}),
         ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}),
diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts
index 330669e0c5..5fdf48de17 100644
--- a/packages/providers/src/types.ts
+++ b/packages/providers/src/types.ts
@@ -62,6 +62,8 @@ export type MessageChunk =
       structuredOutput?: unknown;
       isError?: boolean;
       errorSubtype?: string;
+      /** SDK-provided error detail strings. Populated when isError is true. */
+      errors?: string[];
       cost?: number;
       stopReason?: string;
       numTurns?: number;
diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts
index c5822197e5..0c745b39e5 100644
--- a/packages/workflows/src/dag-executor.test.ts
+++ b/packages/workflows/src/dag-executor.test.ts
@@ -3594,6 +3594,70 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => {
       expect(sessionArg).toBe('loop-session-1');
     });
 
+    it('loop iteration fails loudly when SDK returns error_during_execution', async () => {
+      // Regression test for #1208: previously the loop silently broke on isError
+      // results and kept iterating with empty output, producing "5-second crashes"
+      // that masqueraded as successful iterations.
+      mockSendQueryDag.mockImplementation(function* () {
+        yield {
+          type: 'result',
+          isError: true,
+          errorSubtype: 'error_during_execution',
+          errors: ['Subprocess crashed mid-turn'],
+          sessionId: 'bad-session',
+        };
+      });
+
+      const store = createMockStore();
+      const mockDeps = createMockDeps(store);
+      const platform = createMockPlatform();
+      const workflowRun = makeWorkflowRun();
+
+      await executeDagWorkflow(
+        mockDeps,
+        platform,
+        'conv-dag',
+        testDir,
+        {
+          name: 'loop-iteration-err',
+          nodes: [
+            {
+              id: 'work',
+              loop: {
+                prompt: 'Do the work. Say DONE.',
+                until: 'DONE',
+                max_iterations: 5,
+              },
+            },
+          ],
+        },
+        workflowRun,
+        'claude',
+        undefined,
+        join(testDir, 'artifacts'),
+        join(testDir, 'logs'),
+        'main',
+        'docs/',
+        minimalConfig
+      );
+
+      // Should fail after one iteration rather than burning through max_iterations
+      expect(mockSendQueryDag.mock.calls.length).toBe(1);
+      // The loop_iteration_failed event should carry the subtype and SDK errors detail
+      const eventCalls = (store.createWorkflowEvent as ReturnType<typeof mock>).mock.calls;
+      const iterFailedEvents = eventCalls.filter(
+        (call: unknown[]) =>
+          (call[0] as Record<string, unknown>).event_type === 'loop_iteration_failed'
+      );
+      expect(iterFailedEvents.length).toBeGreaterThan(0);
+      const failedData = (iterFailedEvents[0][0] as Record<string, unknown>).data as Record<
+        string,
+        unknown
+      >;
+      expect(failedData.error).toContain('error_during_execution');
+      expect(failedData.error).toContain('Subprocess crashed mid-turn');
+    });
+
     it('non-interactive loop is unaffected (no pause)', async () => {
       mockSendQueryDag.mockImplementation(function* () {
         yield { type: 'assistant', content: 'Still working...' };
@@ -4617,6 +4681,58 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => {
     expect(capMessage).toBeDefined();
   });
 
+  it('fails node when SDK returns error_during_execution result', async () => {
+    // Regression test for #1208: previously we only failed on error_max_budget_usd
+    // and silently broke on all other isError subtypes, letting failed nodes
+    // masquerade as successes with empty output.
+    mockSendQueryDag.mockImplementation(function* () {
+      yield {
+        type: 'result',
+        isError: true,
+        errorSubtype: 'error_during_execution',
+        errors: ['Tool call failed: permission denied'],
+        sessionId: 'sid-err',
+      };
+    });
+
+    const store = createMockStore();
+    const mockDeps = createMockDeps(store);
+    const platform = createMockPlatform();
+    const workflowRun = makeWorkflowRun();
+
+    await executeDagWorkflow(
+      mockDeps,
+      platform,
+      'conv-dag',
+      testDir,
+      {
+        name: 'err-exec-test',
+        nodes: [{ id: 'step1', command: 'my-cmd' }],
+      },
+      workflowRun,
+      'claude',
+      undefined,
+      join(testDir, 'artifacts'),
+      join(testDir, 'logs'),
+      'main',
+      'docs/',
+      minimalConfig
+    );
+
+    // The node_failed event should carry the subtype and SDK errors detail
+    const eventCalls = (store.createWorkflowEvent as ReturnType<typeof mock>).mock.calls;
+    const nodeFailedEvents = eventCalls.filter(
+      (call: unknown[]) => (call[0] as Record<string, unknown>).event_type === 'node_failed'
+    );
+    expect(nodeFailedEvents.length).toBeGreaterThan(0);
+    const failedData = (nodeFailedEvents[0][0] as Record<string, unknown>).data as Record<
+      string,
+      unknown
+    >;
+    expect(failedData.error).toContain('error_during_execution');
+    expect(failedData.error).toContain('permission denied');
+  });
+
   it('forwards workflow-level effort to node when no per-node override', async () => {
     const mockDeps = createMockDeps();
     const platform = createMockPlatform();
diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts
index 432a784385..c363f4ce3f 100644
--- a/packages/workflows/src/dag-executor.ts
+++ b/packages/workflows/src/dag-executor.ts
@@ -767,6 +767,25 @@ async function executeNodeInternal(
             `Node '${node.id}' exceeded cost cap${cap !== undefined ? ` of $${cap.toFixed(2)}` : ''}.`
           );
         }
+        // Fail loudly on any other SDK error result. Previously we broke out of
+        // the stream silently, producing empty/partial output without signaling
+        // failure — which let failed iterations masquerade as successes (#1208).
+        if (msg.isError) {
+          const subtype = msg.errorSubtype ?? 'unknown';
+          const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : '';
+          getLog().error(
+            {
+              nodeId: node.id,
+              errorSubtype: subtype,
+              errors: msg.errors,
+              sessionId: msg.sessionId,
+              stopReason: msg.stopReason,
+              durationMs: Date.now() - nodeStartTime,
+            },
+            'dag.node_sdk_error_result'
+          );
+          throw new Error(`Node '${node.id}' failed: SDK returned ${subtype}${errorsDetail}`);
+        }
         break; // Result is the "I'm done" signal — don't wait for subprocess to exit
       } else if (msg.type === 'system' && msg.content) {
         // Forward provider warnings (⚠️) and MCP connection failures to the user.
@@ -1640,6 +1659,28 @@ async function executeLoopNode(
           if (msg.numTurns !== undefined) {
             loopTotalNumTurns = (loopTotalNumTurns ?? 0) + msg.numTurns;
           }
+          // Fail the iteration loudly on SDK error results. Previously we broke
+          // silently, producing empty output and continuing to the next iteration —
+          // which made `error_during_execution` on resumed interactive loops look
+          // like a "5-second crash" that kept burning iterations (#1208).
+          if (msg.isError) {
+            const subtype = msg.errorSubtype ?? 'unknown';
+            const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : '';
+            getLog().error(
+              {
+                nodeId: node.id,
+                iteration: i,
+                errorSubtype: subtype,
+                errors: msg.errors,
+                sessionId: msg.sessionId,
+                stopReason: msg.stopReason,
+              },
+              'loop_node.iteration_sdk_error'
+            );
+            throw new Error(
+              `Loop '${node.id}' iteration ${String(i)} failed: SDK returned ${subtype}${errorsDetail}`
+            );
+          }
           break; // Result is the "I'm done" signal — don't wait for subprocess to exit
         } else if (msg.type === 'tool' && msg.toolName) {
           const now = Date.now();

From 9251787ca71222980040281fee183c8c8a733b45 Mon Sep 17 00:00:00 2001
From: Kagura <kagura.chen28@gmail.com>
Date: Mon, 20 Apr 2026 21:19:50 +0800
Subject: [PATCH 02/14] fix(db): throw on corrupt commands JSON instead of
 silent empty fallback (#1033)

* fix(db): throw on corrupt commands JSON instead of silent empty fallback (#967)

getCodebaseCommands() silently returned {} when the commands column
contained corrupt JSON. Callers had no way to distinguish 'no commands'
from 'unreadable data', violating fail-fast principles.

Now throws a descriptive error with the codebase ID and a recovery hint.
The error is still logged for observability before throwing.

Adds two test cases: corrupt JSON throws, valid JSON string parses.

* fix: include parse error in log for better diagnostics

(cherry picked from commit 39a05b762f3f1b759119c72633d5caa1b7a4d0b2)
---
 packages/core/src/db/codebases.test.ts | 16 ++++++++++++++++
 packages/core/src/db/codebases.ts      |  9 ++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/packages/core/src/db/codebases.test.ts b/packages/core/src/db/codebases.test.ts
index 26c269a085..b9bdbb6f1f 100644
--- a/packages/core/src/db/codebases.test.ts
+++ b/packages/core/src/db/codebases.test.ts
@@ -189,6 +189,22 @@ describe('codebases', () => {
       // Original frozen object should be unchanged
       expect(frozenCommands).not.toHaveProperty('new-command');
     });
+
+    test('throws on corrupt JSON string (SQLite TEXT column)', async () => {
+      mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: '{not valid json' }]));
+
+      await expect(getCodebaseCommands('codebase-123')).rejects.toThrow(
+        /Corrupt commands JSON for codebase codebase-123/
+      );
+    });
+
+    test('parses valid JSON string from SQLite TEXT column', async () => {
+      const commands = { plan: { path: 'plan.md', description: 'Plan' } };
+      mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: JSON.stringify(commands) }]));
+
+      const result = await getCodebaseCommands('codebase-123');
+      expect(result).toEqual(commands);
+    });
   });
 
   describe('registerCommand', () => {
diff --git a/packages/core/src/db/codebases.ts b/packages/core/src/db/codebases.ts
index f3947fb6c1..27adc91557 100644
--- a/packages/core/src/db/codebases.ts
+++ b/packages/core/src/db/codebases.ts
@@ -59,9 +59,12 @@ export async function getCodebaseCommands(
   if (typeof raw === 'string') {
     try {
       parsed = JSON.parse(raw);
-    } catch {
-      getLog().error({ codebaseId: id, raw }, 'db.codebase_commands_json_parse_failed');
-      return {};
+    } catch (err) {
+      getLog().error({ codebaseId: id, raw, err }, 'db.codebase_commands_json_parse_failed');
+      throw new Error(
+        `Corrupt commands JSON for codebase ${id}: unable to parse stored data. ` +
+          `Run UPDATE remote_agent_codebases SET commands = '{}' WHERE id = '${id}' to reset.`
+      );
     }
   } else {
     parsed = raw ?? {};

From c5d5663b5407cd0a3e6a187f5dab9bb04ca12d8e Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Mon, 20 Apr 2026 21:45:24 +0300
Subject: [PATCH 03/14] fix(isolation): raise worktree git-operation timeout to
 5m (#1306)

All 15 worktree git-subprocess timeouts in WorktreeProvider were hardcoded
at 30000ms. Repos with heavy post-checkout hooks (lint, dependency install,
submodule init) routinely exceed that budget and fail worktree creation.

Consolidate them onto a single GIT_OPERATION_TIMEOUT_MS constant at 5 min.
Generous enough to cover reported cases while still catching genuine hangs
(credential prompts in non-TTY, stalled fetches).

Chosen over the config-key approach in #1029 to avoid adding permanent
.archon/config.yaml surface for a problem a raised default solves cleanly.
If 5 min turns out to also be too tight for real-world use, we'll revisit.

Closes #1119
Supersedes #1029

Co-authored-by: Shay Elmualem <12733941+norbinsh@users.noreply.github.com>
(cherry picked from commit cc78071ff62b6df20a50925d1117c4ddf6b44138)
---
 packages/isolation/src/providers/worktree.ts | 39 ++++++++++++--------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/packages/isolation/src/providers/worktree.ts b/packages/isolation/src/providers/worktree.ts
index aad76ad6c4..9d15196f7f 100644
--- a/packages/isolation/src/providers/worktree.ts
+++ b/packages/isolation/src/providers/worktree.ts
@@ -49,6 +49,13 @@ function getLog(): ReturnType<typeof createLogger> {
   return cachedLog;
 }
 
+/**
+ * Ceiling for a single git subprocess in worktree operations (create/fetch/checkout/remove/branch-delete).
+ * Generous enough for repos with heavy post-checkout hooks (lint/install) while still catching genuine
+ * hangs (e.g. credential prompts in non-TTY, stalled network fetches). See #1119, #1029.
+ */
+const GIT_OPERATION_TIMEOUT_MS = 5 * 60 * 1000;
+
 export class WorktreeProvider implements IIsolationProvider {
   readonly providerType = 'worktree';
 
@@ -150,7 +157,7 @@ export class WorktreeProvider implements IIsolationProvider {
       gitArgs.push(worktreePath);
 
       try {
-        await execFileAsync('git', gitArgs, { timeout: 30000 });
+        await execFileAsync('git', gitArgs, { timeout: GIT_OPERATION_TIMEOUT_MS });
         result.worktreeRemoved = true;
       } catch (error) {
         if (!this.isWorktreeMissingError(error)) {
@@ -266,7 +273,9 @@ export class WorktreeProvider implements IIsolationProvider {
     result: DestroyResult
   ): Promise<boolean> {
     try {
-      await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { timeout: 30000 });
+      await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], {
+        timeout: GIT_OPERATION_TIMEOUT_MS,
+      });
       getLog().debug({ repoPath, branchName }, 'branch_deleted');
       return true;
     } catch (error) {
@@ -301,7 +310,7 @@ export class WorktreeProvider implements IIsolationProvider {
   ): Promise<boolean> {
     try {
       await execFileAsync('git', ['-C', repoPath, 'push', 'origin', '--delete', branchName], {
-        timeout: 30000,
+        timeout: GIT_OPERATION_TIMEOUT_MS,
       });
       getLog().debug({ repoPath, branchName }, 'remote_branch_deleted');
       return true;
@@ -850,7 +859,7 @@ export class WorktreeProvider implements IIsolationProvider {
   ): Promise<void> {
     // Fetch the PR's actual branch
     await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', prBranch], {
-      timeout: 30000,
+      timeout: GIT_OPERATION_TIMEOUT_MS,
     });
 
     // Try to create worktree with the branch
@@ -859,14 +868,14 @@ export class WorktreeProvider implements IIsolationProvider {
       await execFileAsync(
         'git',
         ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', prBranch, `origin/${prBranch}`],
-        { timeout: 30000 }
+        { timeout: GIT_OPERATION_TIMEOUT_MS }
       );
     } catch (error) {
       const err = error as Error & { stderr?: string };
       // Branch already exists locally - use it directly
       if (err.stderr?.includes('already exists')) {
         await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prBranch], {
-          timeout: 30000,
+          timeout: GIT_OPERATION_TIMEOUT_MS,
         });
       } else {
         throw error;
@@ -878,7 +887,7 @@ export class WorktreeProvider implements IIsolationProvider {
       await execFileAsync(
         'git',
         ['-C', worktreePath, 'branch', '--set-upstream-to', `origin/${prBranch}`],
-        { timeout: 30000 }
+        { timeout: GIT_OPERATION_TIMEOUT_MS }
       );
     } catch (trackingError) {
       getLog().warn({ err: trackingError, worktreePath, prBranch }, 'upstream_tracking_failed');
@@ -903,11 +912,11 @@ export class WorktreeProvider implements IIsolationProvider {
     if (prSha) {
       // SHA provided: create at specific commit for reproducible reviews
       await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head`], {
-        timeout: 30000,
+        timeout: GIT_OPERATION_TIMEOUT_MS,
       });
 
       await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prSha], {
-        timeout: 30000,
+        timeout: GIT_OPERATION_TIMEOUT_MS,
       });
 
       // Create a local tracking branch so it's not detached HEAD
@@ -915,7 +924,7 @@ export class WorktreeProvider implements IIsolationProvider {
         repoPath,
         () =>
           execFileAsync('git', ['-C', worktreePath, 'checkout', '-b', reviewBranch, prSha], {
-            timeout: 30000,
+            timeout: GIT_OPERATION_TIMEOUT_MS,
           }),
         reviewBranch
       );
@@ -927,13 +936,13 @@ export class WorktreeProvider implements IIsolationProvider {
           execFileAsync(
             'git',
             ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head:${reviewBranch}`],
-            { timeout: 30000 }
+            { timeout: GIT_OPERATION_TIMEOUT_MS }
           ),
         reviewBranch
       );
 
       await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, reviewBranch], {
-        timeout: 30000,
+        timeout: GIT_OPERATION_TIMEOUT_MS,
       });
     }
   }
@@ -954,7 +963,7 @@ export class WorktreeProvider implements IIsolationProvider {
       if (err.stderr?.includes('already exists')) {
         getLog().debug({ repoPath, branchName }, 'stale_branch_retry');
         await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], {
-          timeout: 30000,
+          timeout: GIT_OPERATION_TIMEOUT_MS,
         });
         await createCommand();
       } else {
@@ -988,7 +997,7 @@ export class WorktreeProvider implements IIsolationProvider {
         'git',
         ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', branchName, startPoint],
         {
-          timeout: 30000,
+          timeout: GIT_OPERATION_TIMEOUT_MS,
         }
       );
     } catch (error) {
@@ -1016,7 +1025,7 @@ export class WorktreeProvider implements IIsolationProvider {
           timeout: 10000,
         });
         await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, branchName], {
-          timeout: 30000,
+          timeout: GIT_OPERATION_TIMEOUT_MS,
         });
       } else {
         throw error;

From 063061c9a2f00d6407744b16e3c66a6f83c499e4 Mon Sep 17 00:00:00 2001
From: Lior Franko <lior.franko@ironsrc.com>
Date: Tue, 21 Apr 2026 11:47:32 +0300
Subject: [PATCH 04/14] fix(web,server): show real platform connection status
 in Settings (#1061)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Settings page's Platform Connections section hardcoded every platform
except Web to 'Not configured', so users couldn't tell whether their Slack/
Telegram/Discord/GitHub/Gitea/GitLab adapters had actually started.

- Server: /api/health now returns an activePlatforms array populated live
  as each adapter's start() resolves. Passed into registerApiRoutes so the
  reference stays mutable — Telegram starts after the HTTP listener is
  already accepting requests, so a snapshot would miss it.
- Web: SettingsPage.PlatformConnectionsSection now reads activePlatforms
  from /api/health and looks each platform up in a Set. Also adds Gitea
  and GitLab to the list (they already ship as adapters).

Closes #1031

Co-authored-by: Lior Franko <liorfr@dreamgroup.com>
(cherry picked from commit 08de8ee5c6fb5828401e082b0e92a1ba111bcb09)
---
 packages/server/src/index.ts             | 22 ++++++++++++----------
 packages/server/src/routes/api.ts        |  5 ++++-
 packages/web/src/lib/api.ts              |  1 +
 packages/web/src/routes/SettingsPage.tsx | 19 +++++++++++--------
 4 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index 18c173cc66..d1738ce678 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -262,6 +262,11 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
   await webAdapter.start();
   persistence.startPeriodicFlush();
 
+  // Mutable — pushed to as each adapter starts, read by the /api/health endpoint.
+  // Must be a live reference because Telegram starts after the HTTP listener begins
+  // accepting requests, so a snapshot taken at registration time would miss it.
+  const activePlatforms: string[] = ['Web'];
+
   // Platform adapters (skipped in CLI serve mode or when not configured)
   let github: GitHubAdapter | null = null;
   let gitea: GiteaAdapter | null = null;
@@ -294,6 +299,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
         botMention
       );
       await github.start();
+      activePlatforms.push('GitHub');
     } else {
       getLog().info('github_adapter_skipped');
     }
@@ -310,6 +316,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
         giteaBotMention
       );
       await gitea.start();
+      activePlatforms.push('Gitea');
     } else {
       getLog().info('gitea_adapter_skipped');
     }
@@ -326,6 +333,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
         gitlabBotMention
       );
       await gitlab.start();
+      activePlatforms.push('GitLab');
     } else {
       getLog().info('gitlab_adapter_skipped');
     }
@@ -388,6 +396,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
       });
 
       await discord.start();
+      activePlatforms.push('Discord');
     } else {
       getLog().info('discord_adapter_skipped');
     }
@@ -443,6 +452,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
       });
 
       await slack.start();
+      activePlatforms.push('Slack');
     } else {
       getLog().info('slack_adapter_skipped');
     }
@@ -461,7 +471,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
   });
 
   // Register Web UI API routes
-  registerApiRoutes(app, webAdapter, lockManager);
+  registerApiRoutes(app, webAdapter, lockManager, activePlatforms);
 
   // GitHub webhook endpoint
   if (github) {
@@ -617,6 +627,7 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
 
     try {
       await telegramAdapter.start();
+      activePlatforms.push('Telegram');
     } catch (err) {
       const error = err instanceof Error ? err : new Error(String(err));
       getLog().error({ err: error, errorType: error.constructor.name }, 'telegram.start_failed');
@@ -679,15 +690,6 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
   // the try/catch in claude.ts). These are SDK cleanup races, not fatal app errors.
   process.on('unhandledRejection', handleUnhandledRejection);
 
-  // Show active platforms
-  const activePlatforms = ['Web'];
-  if (telegram) activePlatforms.push('Telegram');
-  if (discord) activePlatforms.push('Discord');
-  if (slack) activePlatforms.push('Slack');
-  if (github) activePlatforms.push('GitHub');
-  if (gitea) activePlatforms.push('Gitea');
-  if (gitlab) activePlatforms.push('GitLab');
-
   getLog().info({ activePlatforms, port }, 'server_ready');
 
   // Non-blocking: warn at startup if gh CLI auth is unavailable
diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts
index 7ac7c60474..6448c77318 100644
--- a/packages/server/src/routes/api.ts
+++ b/packages/server/src/routes/api.ts
@@ -821,6 +821,7 @@ const getHealthRoute = createRoute({
               runningWorkflows: z.number(),
               version: z.string().optional(),
               is_docker: z.boolean(),
+              activePlatforms: z.array(z.string()).optional(),
             })
             .openapi('HealthResponse'),
         },
@@ -868,7 +869,8 @@ const getCostAnalyticsRoute = createRoute({
 export function registerApiRoutes(
   app: OpenAPIHono,
   webAdapter: WebAdapter,
-  lockManager: ConversationLockManager
+  lockManager: ConversationLockManager,
+  activePlatforms?: readonly string[]
 ): void {
   function apiError(
     c: Context,
@@ -2675,6 +2677,7 @@ export function registerApiRoutes(
       runningWorkflows: runningWorkflowRows.length,
       version: appVersion,
       is_docker: isDocker(),
+      activePlatforms: activePlatforms ? [...activePlatforms] : ['Web'],
     });
   });
 
diff --git a/packages/web/src/lib/api.ts b/packages/web/src/lib/api.ts
index cc52724301..8748878512 100644
--- a/packages/web/src/lib/api.ts
+++ b/packages/web/src/lib/api.ts
@@ -56,6 +56,7 @@ export interface HealthResponse {
   runningWorkflows: number;
   version?: string;
   is_docker: boolean;
+  activePlatforms?: string[];
 }
 
 async function fetchJSON<T>(url: string, options?: RequestInit): Promise<T> {
diff --git a/packages/web/src/routes/SettingsPage.tsx b/packages/web/src/routes/SettingsPage.tsx
index 9ff8c33058..9add58d245 100644
--- a/packages/web/src/routes/SettingsPage.tsx
+++ b/packages/web/src/routes/SettingsPage.tsx
@@ -607,16 +607,19 @@ function AssistantConfigSection({ config }: { config: SafeConfigResponse }): Rea
 }
 
 function PlatformConnectionsSection({
-  adapter,
+  activePlatforms,
 }: {
-  adapter: string | undefined;
+  activePlatforms: string[] | undefined;
 }): React.ReactElement {
+  const active = new Set(activePlatforms ?? []);
   const platforms = [
-    { name: 'Web', connected: adapter === 'web' },
-    { name: 'Slack', connected: false },
-    { name: 'Telegram', connected: false },
-    { name: 'Discord', connected: false },
-    { name: 'GitHub', connected: false },
+    { name: 'Web', connected: active.has('Web') },
+    { name: 'Slack', connected: active.has('Slack') },
+    { name: 'Telegram', connected: active.has('Telegram') },
+    { name: 'Discord', connected: active.has('Discord') },
+    { name: 'GitHub', connected: active.has('GitHub') },
+    { name: 'Gitea', connected: active.has('Gitea') },
+    { name: 'GitLab', connected: active.has('GitLab') },
   ];
 
   return (
@@ -717,7 +720,7 @@ export function SettingsPage(): React.ReactElement {
 
           <div className="grid grid-cols-1 gap-6 lg:grid-cols-2">
             {configData && <AssistantConfigSection config={configData.config} />}
-            <PlatformConnectionsSection adapter={health?.adapter} />
+            <PlatformConnectionsSection activePlatforms={health?.activePlatforms} />
           </div>
 
           <ProjectsSection />

From 821f51b4ad92f3e1d4e1220ade406fc6ace60b28 Mon Sep 17 00:00:00 2001
From: Alex Siri <alexsiri7@gmail.com>
Date: Tue, 21 Apr 2026 12:52:56 +0100
Subject: [PATCH 05/14] fix: initialize options.hooks before merging YAML node
 hooks (#1177)

When a workflow node defines hooks (PreToolUse/PostToolUse) in YAML but
no hooks exist yet on the options object, applyNodeConfig crashes with
"undefined is not an object" because it tries to assign properties on
the undefined options.hooks.

Initialize options.hooks to {} before the merge loop.

Reproduces with: archon workflow run archon-architect (which uses
per-node hooks extensively).

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
(cherry picked from commit 7ea321419f0cd48e71e9ebf12968f539bc4166bc)
---
 packages/providers/src/claude/provider.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts
index 0821319317..7202f4e19e 100644
--- a/packages/providers/src/claude/provider.ts
+++ b/packages/providers/src/claude/provider.ts
@@ -381,6 +381,9 @@ async function applyNodeConfig(
     if (Object.keys(builtHooks).length > 0) {
       // Merge with existing hooks (PostToolUse capture hook)
       const existingHooks = options.hooks as SDKHooksMap | undefined;
+      if (!options.hooks) {
+        (options as Record<string, unknown>).hooks = {};
+      }
       for (const [event, matchers] of Object.entries(builtHooks)) {
         if (!matchers) continue;
         const existing = existingHooks?.[event] as HookCallbackMatcher[] | undefined;

From c77ae63e10e04e291ad901b6fe04dcd28202fdef Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Wed, 22 Apr 2026 08:47:46 +0300
Subject: [PATCH 06/14] fix: detect completion signal in any XML tag, not just
 <promise> (#1126) (#1184)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: detect completion signal in any XML tag, not just <promise> (#1126)

Loop nodes with `until:` reported max_iterations_reached when the AI wrapped
the completion signal in XML tags other than `<promise>` (e.g.,
`<COMPLETE>ALL_CLEAN</COMPLETE>`). The three existing regex patterns all missed
this format, causing the loop to exhaust iterations and fail.

Changes:
- Add generic XML-wrapped signal pattern to `detectCompletionSignal`
- Extend `stripCompletionTags` to strip matched XML-wrapped signals from output
- Pass `loop.until` to `stripCompletionTags` call site in dag-executor
- Add unit tests for detection and stripping of XML-wrapped signals
- Add integration test for loop completing on final iteration with XML tags

Fixes #1126

* fix: address review findings for completion signal detection

- Update detectCompletionSignal JSDoc to document all three detection formats
- Update stripCompletionTags JSDoc to mention the `until` parameter
- Remove superfluous `m` flag from xmlWrappedPattern (no anchors, no effect)
- Document that XML tag names are matched independently (intentional permissiveness)
- Add test: detects signal in mismatched XML tags (permissive behavior)
- Add test: strips both <promise> and XML-tagged signal in same chunk
- Add assertion in DAG integration test that raw XML tags don't appear in sent messages

* simplify: reduce complexity in changed files

* fix: require matching XML tag names in completion-signal detection

Follow-up to the initial broadening in this PR. The first version of the
regex accepted mismatched open/close tags (e.g. `<COMPLETE>X</done>`)
which was a small false-positive surface when the AI interleaves tags
in prose. Tightens both detectCompletionSignal and stripCompletionTags
to capture the tag name and enforce it on the close via \1
backreference. Case-insensitivity on the tag name is preserved.

Test updates:
- Flip the "permissive mismatch" case to assert strict rejection with a
  comment explaining the guard.
- Add a case-insensitive matching case to lock that behavior in.

No behavior change for workflows that use matching tags (the
overwhelming common case) or for <promise>...</promise>. Behavior change
is limited to the narrow "open tag and close tag disagree" case, which
only happens when the AI is confused — in which case we'd rather report
max_iterations_reached and let the author inspect than silently call
the loop complete.

(cherry picked from commit bc25deefbaf38a115815e631940d2989bad9381f)
---
 packages/workflows/src/dag-executor.test.ts   | 69 +++++++++++++++++++
 packages/workflows/src/dag-executor.ts        |  2 +-
 .../workflows/src/executor-shared.test.ts     | 64 +++++++++++++++++
 packages/workflows/src/executor-shared.ts     | 45 +++++++++---
 4 files changed, 168 insertions(+), 12 deletions(-)

diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts
index 0c745b39e5..03b4e77f91 100644
--- a/packages/workflows/src/dag-executor.test.ts
+++ b/packages/workflows/src/dag-executor.test.ts
@@ -2935,6 +2935,75 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => {
       ).toBe(1);
     });
 
+    it('completes on final iteration with XML-wrapped signal (<COMPLETE>SIGNAL</COMPLETE>)', async () => {
+      let callCount = 0;
+      mockSendQueryDag.mockImplementation(function* () {
+        callCount++;
+        if (callCount < 3) {
+          yield { type: 'assistant', content: `Iteration ${String(callCount)} progress` };
+          yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
+        } else {
+          // Final iteration uses <COMPLETE> tag instead of <promise>
+          yield { type: 'assistant', content: 'All clean! <COMPLETE>ALL_CLEAN</COMPLETE>' };
+          yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
+        }
+      });
+
+      const mockDeps = createMockDeps();
+      const platform = createMockPlatform();
+      const workflowRun = makeWorkflowRun();
+
+      await executeDagWorkflow(
+        mockDeps,
+        platform,
+        'conv-dag',
+        testDir,
+        {
+          name: 'dag-loop-xml-tag',
+          nodes: [
+            {
+              id: 'fix-and-review',
+              loop: {
+                prompt: 'Fix and review. When done, output <COMPLETE>ALL_CLEAN</COMPLETE>.',
+                until: 'ALL_CLEAN',
+                max_iterations: 3,
+              },
+            },
+          ],
+        },
+        workflowRun,
+        'claude',
+        undefined,
+        join(testDir, 'artifacts'),
+        join(testDir, 'logs'),
+        'main',
+        'docs/',
+        minimalConfig
+      );
+
+      // 3 iterations run, signal found on iteration 3 → completed, NOT failed
+      expect(mockSendQueryDag.mock.calls.length).toBe(3);
+      expect(
+        (
+          mockDeps.store.completeWorkflowRun as Mock<
+            (id: string, metadata?: Record<string, unknown>) => Promise<void>
+          >
+        ).mock.calls.length
+      ).toBe(1);
+      expect(
+        (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise<void>>).mock
+          .calls.length
+      ).toBe(0);
+      // Verify stripping: raw XML completion tags must not appear in user-visible output
+      const allSentMessages = (
+        platform.sendMessage as Mock<(...args: unknown[]) => Promise<void>>
+      ).mock.calls
+        .map((call: unknown[]) => call[1] as string)
+        .join('');
+      expect(allSentMessages).not.toContain('<COMPLETE>');
+      expect(allSentMessages).not.toContain('</COMPLETE>');
+    });
+
     it('loop node output available to downstream nodes via $nodeId.output', async () => {
       let loopCallCount = 0;
       mockSendQueryDag.mockImplementation(function* (prompt: string) {
diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts
index c363f4ce3f..a60f4b7b72 100644
--- a/packages/workflows/src/dag-executor.ts
+++ b/packages/workflows/src/dag-executor.ts
@@ -1619,7 +1619,7 @@ async function executeLoopNode(
       })) {
         if (msg.type === 'assistant') {
           fullOutput += msg.content;
-          const cleaned = stripCompletionTags(msg.content);
+          const cleaned = stripCompletionTags(msg.content, loop.until);
           cleanOutput += cleaned;
           if (platform.getStreamingMode() === 'stream' && cleaned) {
             await safeSendMessage(platform, conversationId, cleaned, msgContext);
diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts
index bb6456383f..413e8bbc47 100644
--- a/packages/workflows/src/executor-shared.test.ts
+++ b/packages/workflows/src/executor-shared.test.ts
@@ -22,6 +22,8 @@ import {
   substituteWorkflowVariables,
   buildPromptWithContext,
   detectCreditExhaustion,
+  detectCompletionSignal,
+  stripCompletionTags,
   isInlineScript,
 } from './executor-shared';
 
@@ -414,3 +416,65 @@ describe('isInlineScript', () => {
     expect(isInlineScript('')).toBe(false);
   });
 });
+
+describe('detectCompletionSignal', () => {
+  it('detects <promise>SIGNAL</promise> format', () => {
+    expect(detectCompletionSignal('<promise>COMPLETE</promise>', 'COMPLETE')).toBe(true);
+  });
+
+  it('detects signal in custom XML tags: <COMPLETE>SIGNAL</COMPLETE>', () => {
+    expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe(true);
+  });
+
+  it('detects signal in other XML tag names', () => {
+    expect(detectCompletionSignal('<done>COMPLETE</done>', 'COMPLETE')).toBe(true);
+    expect(detectCompletionSignal('<status>DONE</status>', 'DONE')).toBe(true);
+  });
+
+  it('detects plain signal at end of output', () => {
+    expect(detectCompletionSignal('Work done. COMPLETE', 'COMPLETE')).toBe(true);
+  });
+
+  it('detects plain signal on its own line', () => {
+    expect(detectCompletionSignal('Work done.\nCOMPLETE\nExtra text', 'COMPLETE')).toBe(true);
+  });
+
+  it('does not detect signal embedded in prose', () => {
+    expect(detectCompletionSignal('The status is not COMPLETE yet.', 'COMPLETE')).toBe(false);
+  });
+
+  it('does not detect signal when wrong value is in tags', () => {
+    expect(detectCompletionSignal('<COMPLETE>WRONG</COMPLETE>', 'ALL_CLEAN')).toBe(false);
+  });
+
+  it('does NOT detect signal when XML tag names do not match (strict)', () => {
+    // Open/close tag names must agree — guards against AI prose that
+    // interleaves tags (e.g. "<COMPLETE>ALL_CLEAN</other-tag>") being
+    // treated as a completion.
+    expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</done>', 'ALL_CLEAN')).toBe(false);
+  });
+
+  it('detects signal when tag names match case-insensitively', () => {
+    expect(detectCompletionSignal('<Complete>ALL_CLEAN</complete>', 'ALL_CLEAN')).toBe(true);
+  });
+});
+
+describe('stripCompletionTags', () => {
+  it('strips <promise> tags', () => {
+    expect(stripCompletionTags('Done. <promise>COMPLETE</promise>')).toBe('Done.');
+  });
+
+  it('strips XML-wrapped signal when until is provided', () => {
+    expect(stripCompletionTags('Done. <COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe('Done.');
+  });
+
+  it('does not strip XML tags when until is not provided', () => {
+    const input = 'Done. <COMPLETE>ALL_CLEAN</COMPLETE>';
+    expect(stripCompletionTags(input)).toBe(input.trim());
+  });
+
+  it('strips both <promise> and XML-tagged signal when until is provided', () => {
+    const input = 'Done. <promise>ALL_CLEAN</promise> <COMPLETE>ALL_CLEAN</COMPLETE>';
+    expect(stripCompletionTags(input, 'ALL_CLEAN')).toBe('Done.');
+  });
+});
diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts
index f00f5c6ad6..b60ceacc35 100644
--- a/packages/workflows/src/executor-shared.ts
+++ b/packages/workflows/src/executor-shared.ts
@@ -384,18 +384,26 @@ function escapeRegExp(str: string): string {
 /**
  * Detect whether the AI output contains a completion signal.
  *
- * Supports two formats:
+ * Supports three formats, checked in order:
  * 1. <promise>SIGNAL</promise> - Recommended; prevents false positives in prose
- * 2. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
+ * 2. <anytag>SIGNAL</anytag> - Any XML-wrapped tag; case-insensitive on tag names
+ * 3. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
  *
- * The <promise> tag format uses case-insensitive matching for the tags.
- * Plain signal detection is restrictive to prevent false positives.
+ * Tag matching uses a backreference (\1) so opening and closing tag names must
+ * agree — `<COMPLETE>X</done>` is not treated as a completion, which avoids
+ * false positives when the AI interleaves tags in prose.
+ *
+ * Plain signal detection is restrictive to prevent false positives like "not SIGNAL yet".
  */
 export function detectCompletionSignal(output: string, signal: string): boolean {
-  // Check for <promise>SIGNAL</promise> format (recommended - prevents false positives)
-  // Case-insensitive for tags
-  const promisePattern = new RegExp(`<promise>\\s*${escapeRegExp(signal)}\\s*</promise>`, 'i');
-  if (promisePattern.test(output)) {
+  // Check for XML-like tag wrapping with matching open/close names: <tag>SIGNAL</tag>.
+  // Catches <promise>COMPLETE</promise>, <COMPLETE>ALL_CLEAN</COMPLETE>, <done>X</done>.
+  // The `([a-zA-Z][\w-]*)` capture plus `</\1>` backreference requires tag names to match.
+  const xmlWrappedPattern = new RegExp(
+    `<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapeRegExp(signal)}\\s*</\\1>`,
+    'i'
+  );
+  if (xmlWrappedPattern.test(output)) {
     return true;
   }
   // Plain signal detection - restrictive to prevent false positives like "not COMPLETE yet"
@@ -407,9 +415,24 @@ export function detectCompletionSignal(output: string, signal: string): boolean
   return endPattern.test(output) || ownLinePattern.test(output);
 }
 
-/** Strip internal completion signal tags before sending to user-facing output. */
-export function stripCompletionTags(content: string): string {
-  return content.replace(/<promise>[\s\S]*?<\/promise>/gi, '').trim();
+/**
+ * Strip internal completion signal tags before sending to user-facing output.
+ * Always strips `<promise>…</promise>` (any content). When `until` is provided,
+ * also strips any XML-wrapped form of that signal with matching tag names
+ * (e.g. `<COMPLETE>ALL_CLEAN</COMPLETE>`). Mismatched tag names are left alone
+ * so regular prose (`<note>ALL_CLEAN</warning>`) isn't accidentally rewritten.
+ */
+export function stripCompletionTags(content: string, until?: string): string {
+  let result = content.replace(/<promise>[\s\S]*?<\/promise>/gi, '');
+  if (until) {
+    // Strip XML-tagged completion signals with matching open/close tag names.
+    const escapedSignal = escapeRegExp(until);
+    result = result.replace(
+      new RegExp(`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapedSignal}\\s*</\\1>`, 'gi'),
+      ''
+    );
+  }
+  return result.trim();
 }
 
 /**

From 4c863d4c448bd2b1b08489591484c44a32f5599a Mon Sep 17 00:00:00 2001
From: Ahmed <44034059+medevs@users.noreply.github.com>
Date: Wed, 22 Apr 2026 08:13:18 +0200
Subject: [PATCH 07/14] fix(web): allow deleting nodes from Workflow Builder
 (#971) (#1113)

* fix(web): allow deleting nodes from Workflow Builder (#971)

Three independent gaps prevented users from deleting nodes added to the
Workflow Builder canvas: dropped nodes were never auto-selected so
keyboard shortcuts silently no-oped, no right-click context menu
existed, and the Delete Node button was buried in the Advanced tab
(hidden below the viewport for Prompt/Command, completely absent for
Bash since bash nodes have no Advanced tab).

Fixes #971.

* fix(web): push undo snapshot before adding nodes on canvas

Call onPushSnapshot() before setNodes() in both onDrop and quick-add
handlers so that node additions are captured by undo/redo history.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(web): address PR #1113 review feedback

- Hold nodes/edges in refs so handleNodeDeleteById and onPushSnapshot
  can't capture stale pre-drop state (fixes undo-stack correctness).
- Clamp context-menu x/y to viewport so right-click near edges stays
  fully on-screen.
- Drop non-conformant role=menu/menuitem from the single-item context
  menu; rely on the native button for accessibility.
- Extend isInputTarget() to cover ARIA combobox/textbox/searchbox so
  Backspace in Radix/shadcn widgets never nukes a node.
- Extract handleBuilderKeydown as a pure function and add tests
  covering the Delete/Backspace + isInputTarget invariant.
- Remove issue-number references from code comments per CLAUDE.md.
- Document the new delete affordances in the Workflow Builder docs.
- Inline context-menu dismissal, rename pointer handler, drop unused
  deps in keyboardActions useMemo.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
(cherry picked from commit d7f36b22ddcaa337cc1ab4ec152497b19c176056)
---
 .../docs-web/src/content/docs/adapters/web.md |   1 +
 packages/web/package.json                     |   2 +-
 .../components/workflows/NodeInspector.tsx    |  25 +--
 .../components/workflows/WorkflowBuilder.tsx  |  46 +++--
 .../components/workflows/WorkflowCanvas.tsx   |  80 +++++++-
 .../web/src/hooks/useBuilderKeyboard.test.ts  | 136 +++++++++++++
 packages/web/src/hooks/useBuilderKeyboard.ts  | 186 ++++++++++--------
 7 files changed, 362 insertions(+), 114 deletions(-)
 create mode 100644 packages/web/src/hooks/useBuilderKeyboard.test.ts

diff --git a/packages/docs-web/src/content/docs/adapters/web.md b/packages/docs-web/src/content/docs/adapters/web.md
index 7a3aeebb86..0025ca0219 100644
--- a/packages/docs-web/src/content/docs/adapters/web.md
+++ b/packages/docs-web/src/content/docs/adapters/web.md
@@ -172,6 +172,7 @@ The Workflow Builder at `/workflows/builder` provides a visual editor for creati
 - **Command picker** -- Browse available commands when configuring command nodes
 - **Validation panel** -- Real-time validation feedback as you build
 - **Undo/redo** -- Full undo/redo stack with keyboard shortcuts
+- **Delete node** -- Remove a selected node with `Delete` or `Backspace`, the Delete button in the inspector header, or the right-click context menu on any node
 - **Save** -- Saves the workflow YAML to your project's `.archon/workflows/` directory
 
 You can also browse existing workflows on the `/workflows` page and open any of them in the builder to edit.
diff --git a/packages/web/package.json b/packages/web/package.json
index 8deb2ed573..ad976cff54 100644
--- a/packages/web/package.json
+++ b/packages/web/package.json
@@ -8,7 +8,7 @@
     "build": "tsc --noEmit && vite build",
     "preview": "vite preview",
     "type-check": "tsc --noEmit",
-    "test": "bun test src/lib/ && bun test src/stores/",
+    "test": "bun test src/lib/ && bun test src/stores/ && bun test src/hooks/",
     "generate:types": "openapi-typescript http://localhost:3090/api/openapi.json -o src/lib/api.generated.d.ts"
   },
   "dependencies": {
diff --git a/packages/web/src/components/workflows/NodeInspector.tsx b/packages/web/src/components/workflows/NodeInspector.tsx
index 1dfd797570..1d4748fecc 100644
--- a/packages/web/src/components/workflows/NodeInspector.tsx
+++ b/packages/web/src/components/workflows/NodeInspector.tsx
@@ -642,11 +642,9 @@ function JsonTextareaField({
 function AdvancedTab({
   node,
   onUpdate,
-  onDelete,
 }: {
   node: DagNodeData;
   onUpdate: (updates: Partial<DagNodeData>) => void;
-  onDelete: () => void;
 }): React.ReactElement {
   return (
     <div className="flex flex-col gap-3 p-3">
@@ -696,12 +694,6 @@ function AdvancedTab({
           onUpdate({ hooks: v });
         }}
       />
-
-      <div className="border-t border-border pt-3 mt-2">
-        <Button variant="destructive" size="sm" onClick={onDelete} className="w-full">
-          Delete Node
-        </Button>
-      </div>
     </div>
   );
 }
@@ -718,14 +710,23 @@ function DagInspector({
   return (
     <div key={node.id} className="flex flex-col h-full border-l border-border bg-surface">
       {/* Header */}
-      <div className="flex items-center justify-between px-3 py-2 border-b border-border">
-        <span className="text-xs font-semibold text-text-primary truncate">
+      <div className="flex items-center gap-2 px-3 py-2 border-b border-border">
+        <span className="flex-1 truncate text-xs font-semibold text-text-primary">
           {node.label || node.id}
         </span>
+        <Button
+          variant="destructive"
+          size="sm"
+          onClick={onDelete}
+          className="h-6 shrink-0 px-2 text-[10px]"
+          aria-label="Delete node"
+        >
+          Delete
+        </Button>
         <button
           type="button"
           onClick={onClose}
-          className="text-text-tertiary hover:text-text-primary text-sm leading-none px-1"
+          className="shrink-0 px-1 text-sm leading-none text-text-tertiary hover:text-text-primary"
           title="Close inspector"
         >
           x
@@ -770,7 +771,7 @@ function DagInspector({
 
           {!isBash && (
             <TabsContent value="advanced">
-              <AdvancedTab key={node.id} node={node} onUpdate={onUpdate} onDelete={onDelete} />
+              <AdvancedTab key={node.id} node={node} onUpdate={onUpdate} />
             </TabsContent>
           )}
         </ScrollArea>
diff --git a/packages/web/src/components/workflows/WorkflowBuilder.tsx b/packages/web/src/components/workflows/WorkflowBuilder.tsx
index 9acfb37f7a..674b081d8e 100644
--- a/packages/web/src/components/workflows/WorkflowBuilder.tsx
+++ b/packages/web/src/components/workflows/WorkflowBuilder.tsx
@@ -172,6 +172,19 @@ function WorkflowBuilderInner(): React.ReactElement {
     setHasUnsavedChanges(true);
   }, []);
 
+  // Refs mirror the latest nodes/edges so snapshot-taking callbacks don't
+  // close over stale values when events fire in the same tick as a render.
+  const nodesRef = useRef(nodes);
+  const edgesRef = useRef(edges);
+  useEffect(() => {
+    nodesRef.current = nodes;
+    edgesRef.current = edges;
+  }, [nodes, edges]);
+
+  const pushSnapshotLatest = useCallback((): void => {
+    pushSnapshot({ nodes: nodesRef.current, edges: edgesRef.current });
+  }, [pushSnapshot]);
+
   const buildDefinition = useCallback((): WorkflowDefinition => {
     const name = workflowName.trim() || 'untitled';
     const description = workflowDescription;
@@ -236,14 +249,21 @@ function WorkflowBuilderInner(): React.ReactElement {
     [selectedNodeId, setNodes, markDirty]
   );
 
+  const handleNodeDeleteById = useCallback(
+    (nodeId: string): void => {
+      pushSnapshotLatest();
+      setNodes(nds => nds.filter(n => n.id !== nodeId));
+      setEdges(eds => eds.filter(e => e.source !== nodeId && e.target !== nodeId));
+      setSelectedNodeId(prev => (prev === nodeId ? null : prev));
+      markDirty();
+    },
+    [setNodes, setEdges, markDirty, pushSnapshotLatest]
+  );
+
   const handleNodeDelete = useCallback((): void => {
     if (!selectedNodeId) return;
-    pushSnapshot({ nodes, edges });
-    setNodes(nds => nds.filter(n => n.id !== selectedNodeId));
-    setEdges(eds => eds.filter(e => e.source !== selectedNodeId && e.target !== selectedNodeId));
-    setSelectedNodeId(null);
-    markDirty();
-  }, [selectedNodeId, setNodes, setEdges, markDirty, pushSnapshot, nodes, edges]);
+    handleNodeDeleteById(selectedNodeId);
+  }, [selectedNodeId, handleNodeDeleteById]);
 
   // Toolbar action handlers
   const handleValidate = useCallback(async (): Promise<void> => {
@@ -361,7 +381,7 @@ function WorkflowBuilderInner(): React.ReactElement {
           position: { x: 200, y: 200 },
           data: { id, label: 'Prompt', nodeType: 'prompt' },
         };
-        pushSnapshot({ nodes, edges });
+        pushSnapshotLatest();
         setNodes(nds => [...nds, newNode]);
         markDirty();
       },
@@ -373,7 +393,7 @@ function WorkflowBuilderInner(): React.ReactElement {
           position: { x: 200, y: 200 },
           data: { id, label: 'Shell', nodeType: 'bash' },
         };
-        pushSnapshot({ nodes, edges });
+        pushSnapshotLatest();
         setNodes(nds => [...nds, newNode]);
         markDirty();
       },
@@ -393,7 +413,7 @@ function WorkflowBuilderInner(): React.ReactElement {
           position: { x: sourceNode.position.x + 30, y: sourceNode.position.y + 30 },
           data: { ...sourceNode.data, id },
         };
-        pushSnapshot({ nodes, edges });
+        pushSnapshotLatest();
         setNodes(nds => [...nds, newNode]);
         markDirty();
       },
@@ -405,9 +425,8 @@ function WorkflowBuilderInner(): React.ReactElement {
       handleToggleValidationPanel,
       handleNodeDelete,
       nodes,
-      edges,
       selectedNodeId,
-      pushSnapshot,
+      pushSnapshotLatest,
       setNodes,
       markDirty,
     ]
@@ -482,10 +501,9 @@ function WorkflowBuilderInner(): React.ReactElement {
                   setNodes={setNodes}
                   setEdges={setEdges}
                   onNodeSelect={setSelectedNodeId}
+                  onNodeDelete={handleNodeDeleteById}
                   onDirty={markDirty}
-                  onPushSnapshot={(): void => {
-                    pushSnapshot({ nodes, edges });
-                  }}
+                  onPushSnapshot={pushSnapshotLatest}
                   commands={commandList}
                 />
               </div>
diff --git a/packages/web/src/components/workflows/WorkflowCanvas.tsx b/packages/web/src/components/workflows/WorkflowCanvas.tsx
index f784c67c4f..e1c6170b16 100644
--- a/packages/web/src/components/workflows/WorkflowCanvas.tsx
+++ b/packages/web/src/components/workflows/WorkflowCanvas.tsx
@@ -82,6 +82,7 @@ interface WorkflowCanvasProps {
   setNodes: React.Dispatch<React.SetStateAction<DagFlowNode[]>>;
   setEdges: React.Dispatch<React.SetStateAction<Edge[]>>;
   onNodeSelect: (nodeId: string | null) => void;
+  onNodeDelete: (nodeId: string) => void;
   onDirty: () => void;
   onPushSnapshot?: () => void;
   commands: CommandEntry[];
@@ -100,12 +101,19 @@ export function WorkflowCanvas({
   setNodes,
   setEdges,
   onNodeSelect,
+  onNodeDelete,
   onDirty,
   onPushSnapshot,
   commands,
 }: WorkflowCanvasProps): React.ReactElement {
   const { screenToFlowPosition } = useReactFlow();
   const [quickAddPosition, setQuickAddPosition] = useState<QuickAddPosition | null>(null);
+  const [contextMenu, setContextMenu] = useState<{
+    x: number;
+    y: number;
+    nodeId: string;
+  } | null>(null);
+  const contextMenuRef = useRef<HTMLDivElement | null>(null);
 
   const nodeTypes: NodeTypes = useMemo(() => ({ dagNode: dagNodeComponent }), []);
 
@@ -164,10 +172,12 @@ export function WorkflowCanvas({
         },
       };
 
+      onPushSnapshot?.();
       setNodes(nds => [...nds, newNode]);
+      onNodeSelect(id);
       onDirty();
     },
-    [screenToFlowPosition, setNodes, onDirty]
+    [screenToFlowPosition, setNodes, onNodeSelect, onDirty, onPushSnapshot]
   );
 
   // Track whether we've already pushed a snapshot for the current drag gesture
@@ -278,17 +288,63 @@ export function WorkflowCanvas({
         },
       };
 
+      onPushSnapshot?.();
       setNodes(nds => [...nds, newNode]);
+      onNodeSelect(id);
       onDirty();
       setQuickAddPosition(null);
     },
-    [quickAddPosition, setNodes, onDirty]
+    [quickAddPosition, setNodes, onNodeSelect, onDirty, onPushSnapshot]
   );
 
   const handleQuickAddClose = useCallback(() => {
     setQuickAddPosition(null);
   }, []);
 
+  // Approximate menu size used for viewport-edge clamping.
+  const CONTEXT_MENU_WIDTH = 160;
+  const CONTEXT_MENU_HEIGHT = 40;
+
+  const handleNodeContextMenu = useCallback(
+    (e: React.MouseEvent, node: DagFlowNode) => {
+      e.preventDefault();
+      onNodeSelect(node.id);
+      const x = Math.min(e.clientX, window.innerWidth - CONTEXT_MENU_WIDTH);
+      const y = Math.min(e.clientY, window.innerHeight - CONTEXT_MENU_HEIGHT);
+      setContextMenu({ x, y, nodeId: node.id });
+    },
+    [onNodeSelect]
+  );
+
+  // Dismiss the context menu on Escape or any click/contextmenu outside it.
+  useEffect(() => {
+    if (!contextMenu) return;
+
+    const onKey = (e: KeyboardEvent): void => {
+      if (e.key === 'Escape') setContextMenu(null);
+    };
+    const onClickOutside = (e: MouseEvent): void => {
+      if (
+        contextMenuRef.current &&
+        e.target instanceof Node &&
+        contextMenuRef.current.contains(e.target)
+      ) {
+        return;
+      }
+      setContextMenu(null);
+    };
+
+    window.addEventListener('keydown', onKey);
+    // Use capture so we beat ReactFlow's own handlers and any stopPropagation.
+    window.addEventListener('mousedown', onClickOutside, true);
+    window.addEventListener('contextmenu', onClickOutside, true);
+    return (): void => {
+      window.removeEventListener('keydown', onKey);
+      window.removeEventListener('mousedown', onClickOutside, true);
+      window.removeEventListener('contextmenu', onClickOutside, true);
+    };
+  }, [contextMenu]);
+
   return (
     <div className="relative w-full h-full">
       <ReactFlow
@@ -302,6 +358,7 @@ export function WorkflowCanvas({
         onNodeClick={(_e, node): void => {
           onNodeSelect(node.id);
         }}
+        onNodeContextMenu={handleNodeContextMenu}
         onPaneClick={handlePaneClick}
         nodeTypes={nodeTypes}
         panOnDrag
@@ -324,6 +381,25 @@ export function WorkflowCanvas({
           commands={commands}
         />
       )}
+
+      {contextMenu && (
+        <div
+          ref={contextMenuRef}
+          className="fixed z-50 min-w-[140px] rounded-md border border-border bg-surface-elevated py-1 shadow-md"
+          style={{ left: contextMenu.x, top: contextMenu.y }}
+        >
+          <button
+            type="button"
+            onClick={(): void => {
+              onNodeDelete(contextMenu.nodeId);
+              setContextMenu(null);
+            }}
+            className="w-full px-3 py-1.5 text-left text-xs text-error hover:bg-surface"
+          >
+            Delete node
+          </button>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/packages/web/src/hooks/useBuilderKeyboard.test.ts b/packages/web/src/hooks/useBuilderKeyboard.test.ts
new file mode 100644
index 0000000000..8239741657
--- /dev/null
+++ b/packages/web/src/hooks/useBuilderKeyboard.test.ts
@@ -0,0 +1,136 @@
+import { describe, test, expect, mock, beforeEach } from 'bun:test';
+import {
+  handleBuilderKeydown,
+  isInputTarget,
+  type BuilderKeyboardActions,
+} from './useBuilderKeyboard';
+
+function makeActions(): BuilderKeyboardActions & {
+  calls: Record<string, number>;
+} {
+  const calls: Record<string, number> = {};
+  const bump = (name: string): (() => void) => {
+    return (): void => {
+      calls[name] = (calls[name] ?? 0) + 1;
+    };
+  };
+  return {
+    calls,
+    onSave: bump('onSave'),
+    onUndo: bump('onUndo'),
+    onRedo: bump('onRedo'),
+    onToggleLibrary: bump('onToggleLibrary'),
+    onToggleYaml: bump('onToggleYaml'),
+    onToggleValidation: bump('onToggleValidation'),
+    onAddPrompt: bump('onAddPrompt'),
+    onAddBash: bump('onAddBash'),
+    onDeleteSelected: bump('onDeleteSelected'),
+    onDuplicateSelected: bump('onDuplicateSelected'),
+    onQuickAdd: bump('onQuickAdd'),
+    onFitView: bump('onFitView'),
+    onSelectAll: bump('onSelectAll'),
+  };
+}
+
+function makeEvent(
+  key: string,
+  target: { tagName?: string; isContentEditable?: boolean; role?: string } | null
+): KeyboardEvent {
+  const el =
+    target === null
+      ? null
+      : ({
+          tagName: target.tagName ?? 'DIV',
+          isContentEditable: target.isContentEditable ?? false,
+          getAttribute: (name: string): string | null =>
+            name === 'role' ? (target.role ?? null) : null,
+        } as unknown as HTMLElement);
+  return {
+    key,
+    target: el,
+    metaKey: false,
+    ctrlKey: false,
+    shiftKey: false,
+    preventDefault: mock(() => {}),
+  } as unknown as KeyboardEvent;
+}
+
+describe('isInputTarget', () => {
+  test('returns true for INPUT, TEXTAREA, SELECT', () => {
+    expect(isInputTarget(makeEvent('a', { tagName: 'INPUT' }))).toBe(true);
+    expect(isInputTarget(makeEvent('a', { tagName: 'TEXTAREA' }))).toBe(true);
+    expect(isInputTarget(makeEvent('a', { tagName: 'SELECT' }))).toBe(true);
+  });
+
+  test('returns true for contentEditable elements', () => {
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV', isContentEditable: true }))).toBe(true);
+  });
+
+  test('returns true for ARIA editable roles (combobox, textbox, searchbox)', () => {
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'combobox' }))).toBe(true);
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'textbox' }))).toBe(true);
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'searchbox' }))).toBe(true);
+  });
+
+  test('returns false for regular elements without editable role', () => {
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV' }))).toBe(false);
+    expect(isInputTarget(makeEvent('a', { tagName: 'BUTTON' }))).toBe(false);
+    expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'menu' }))).toBe(false);
+  });
+
+  test('returns false when target is null', () => {
+    expect(isInputTarget(makeEvent('a', null))).toBe(false);
+  });
+});
+
+describe('handleBuilderKeydown — delete invariant', () => {
+  let actions: ReturnType<typeof makeActions>;
+
+  beforeEach(() => {
+    actions = makeActions();
+  });
+
+  test('Delete key on canvas triggers onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV' }), actions);
+    expect(actions.calls.onDeleteSelected).toBe(1);
+  });
+
+  test('Backspace key on canvas triggers onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV' }), actions);
+    expect(actions.calls.onDeleteSelected).toBe(1);
+  });
+
+  test('Backspace in INPUT does NOT trigger onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Backspace', { tagName: 'INPUT' }), actions);
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+
+  test('Backspace in TEXTAREA does NOT trigger onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Backspace', { tagName: 'TEXTAREA' }), actions);
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+
+  test('Backspace in contentEditable does NOT trigger onDeleteSelected', () => {
+    handleBuilderKeydown(
+      makeEvent('Backspace', { tagName: 'DIV', isContentEditable: true }),
+      actions
+    );
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+
+  test('Backspace in ARIA combobox does NOT trigger onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV', role: 'combobox' }), actions);
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+
+  test('Delete in ARIA textbox does NOT trigger onDeleteSelected', () => {
+    handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV', role: 'textbox' }), actions);
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+
+  test('enabled=false suppresses all shortcuts', () => {
+    handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV' }), actions, false);
+    handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV' }), actions, false);
+    expect(actions.calls.onDeleteSelected).toBeUndefined();
+  });
+});
diff --git a/packages/web/src/hooks/useBuilderKeyboard.ts b/packages/web/src/hooks/useBuilderKeyboard.ts
index 192f29bd2b..89343331bd 100644
--- a/packages/web/src/hooks/useBuilderKeyboard.ts
+++ b/packages/web/src/hooks/useBuilderKeyboard.ts
@@ -1,6 +1,6 @@
 import { useEffect, useCallback } from 'react';
 
-interface BuilderKeyboardActions {
+export interface BuilderKeyboardActions {
   onSave: () => void;
   onUndo: () => void;
   onRedo: () => void;
@@ -16,97 +16,113 @@ interface BuilderKeyboardActions {
   onSelectAll?: () => void;
 }
 
-function isInputTarget(e: KeyboardEvent): boolean {
-  const tag = (e.target as HTMLElement).tagName;
-  return (
-    tag === 'INPUT' ||
-    tag === 'TEXTAREA' ||
-    tag === 'SELECT' ||
-    (e.target as HTMLElement).isContentEditable
-  );
+const EDITABLE_ARIA_ROLES = new Set(['combobox', 'textbox', 'searchbox']);
+
+export function isInputTarget(e: KeyboardEvent): boolean {
+  const target = e.target as HTMLElement | null;
+  if (!target) return false;
+  const tag = target.tagName;
+  if (tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT') return true;
+  if (target.isContentEditable) return true;
+  const role = target.getAttribute?.('role');
+  if (role && EDITABLE_ARIA_ROLES.has(role)) return true;
+  return false;
 }
 
-export function useBuilderKeyboard(actions: BuilderKeyboardActions, enabled = true): void {
-  const handleKeyDown = useCallback(
-    (e: KeyboardEvent) => {
-      if (!enabled) return;
+export function handleBuilderKeydown(
+  e: KeyboardEvent,
+  actions: BuilderKeyboardActions,
+  enabled = true
+): void {
+  if (!enabled) return;
 
-      const mod = e.metaKey || e.ctrlKey;
-      const inInput = isInputTarget(e);
+  const mod = e.metaKey || e.ctrlKey;
+  const inInput = isInputTarget(e);
 
-      // --- Always-active shortcuts (even in inputs) ---
-      if (mod) {
-        if (e.key === 's') {
-          e.preventDefault();
-          actions.onSave();
-          return;
-        }
-        if (e.key === 'z' && e.shiftKey) {
-          e.preventDefault();
-          actions.onRedo();
-          return;
-        }
-        if (e.key === 'z') {
-          e.preventDefault();
-          actions.onUndo();
-          return;
-        }
-        if (e.key === '\\') {
-          e.preventDefault();
-          actions.onToggleLibrary();
-          return;
-        }
-        if (e.key === 'j') {
-          e.preventDefault();
-          actions.onToggleYaml();
-          return;
-        }
-        if (e.key === '.') {
-          e.preventDefault();
-          actions.onToggleValidation();
-          return;
-        }
-      }
+  // --- Always-active shortcuts (even in inputs) ---
+  if (mod) {
+    if (e.key === 's') {
+      e.preventDefault();
+      actions.onSave();
+      return;
+    }
+    if (e.key === 'z' && e.shiftKey) {
+      e.preventDefault();
+      actions.onRedo();
+      return;
+    }
+    if (e.key === 'z') {
+      e.preventDefault();
+      actions.onUndo();
+      return;
+    }
+    if (e.key === '\\') {
+      e.preventDefault();
+      actions.onToggleLibrary();
+      return;
+    }
+    if (e.key === 'j') {
+      e.preventDefault();
+      actions.onToggleYaml();
+      return;
+    }
+    if (e.key === '.') {
+      e.preventDefault();
+      actions.onToggleValidation();
+      return;
+    }
+  }
 
-      // --- Only when NOT in input/textarea ---
-      if (inInput) return;
+  // --- Only when NOT in input/textarea ---
+  if (inInput) return;
 
-      if (mod) {
-        if (e.key === 'd') {
-          e.preventDefault();
-          actions.onDuplicateSelected();
-          return;
-        }
-        if (e.key === '0') {
-          e.preventDefault();
-          actions.onFitView?.();
-          return;
-        }
-        if (e.key === 'a') {
-          e.preventDefault();
-          actions.onSelectAll?.();
-          return;
-        }
-      }
+  if (mod) {
+    if (e.key === 'd') {
+      e.preventDefault();
+      actions.onDuplicateSelected();
+      return;
+    }
+    if (e.key === '0') {
+      e.preventDefault();
+      actions.onFitView?.();
+      return;
+    }
+    if (e.key === 'a') {
+      e.preventDefault();
+      actions.onSelectAll?.();
+      return;
+    }
+  }
 
-      // Single-key shortcuts
-      switch (e.key) {
-        case 'n':
-          actions.onQuickAdd?.();
-          break;
-        case 'p':
-          actions.onAddPrompt();
-          break;
-        case 'b':
-          actions.onAddBash();
-          break;
-        case 'Delete':
-          actions.onDeleteSelected();
-          break;
-        case 'f':
-          actions.onFitView?.();
-          break;
-      }
+  // Single-key shortcuts
+  switch (e.key) {
+    case 'n':
+      actions.onQuickAdd?.();
+      break;
+    case 'p':
+      actions.onAddPrompt();
+      break;
+    case 'b':
+      actions.onAddBash();
+      break;
+    case 'Delete':
+    case 'Backspace':
+      // Backspace is the natural delete key on macOS keyboards, which lack
+      // a dedicated Delete key. The isInputTarget() guard above prevents
+      // this from interfering with text fields.
+      e.preventDefault();
+      actions.onDeleteSelected();
+      break;
+    case 'f':
+      actions.onFitView?.();
+      break;
+  }
+}
+
+export function useBuilderKeyboard(actions: BuilderKeyboardActions, enabled = true): void {
+  const handleKeyDown = useCallback(
+    (e: KeyboardEvent) => {
+      handleBuilderKeydown(e, actions, enabled);
     },
     [actions, enabled]
   );

From 883258df772f75c550888b42086fce9cf086c069 Mon Sep 17 00:00:00 2001
From: CauchYoung <2024302072042@whu.edu.cn>
Date: Wed, 22 Apr 2026 15:18:27 +0800
Subject: [PATCH 08/14] fix(workflows): make archon-adversarial-dev sed
 replacement macOS-safe (#1155)

* fix(workflows): make adversarial init sed portable on macOS

* chore: regenerate bundled-defaults after adversarial-dev sed fix

Sync generated bundle with the new temp-file sed pattern in
archon-adversarial-dev.yaml so check:bundled passes and binary
distributions ship the macOS-safe version.

---------

Co-authored-by: laplace young <yangqk12@whu.edu.cn>
Co-authored-by: Rasmus Widing <rasmus.widing@gmail.com>
(cherry picked from commit 817186d446ed5e01cd13d393abfa734ef5ac730f)
---
 .archon/workflows/defaults/archon-adversarial-dev.yaml   | 4 +++-
 .../workflows/src/defaults/bundled-defaults.generated.ts | 2 +-
 packages/workflows/src/defaults/bundled-defaults.test.ts | 9 +++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml
index 2ab207dc03..68722c8b1a 100644
--- a/.archon/workflows/defaults/archon-adversarial-dev.yaml
+++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml
@@ -101,7 +101,9 @@ nodes:
         "status": "running"
       }
       STATEEOF
-      sed -i "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json"
+      STATE_TMP="$ARTIFACTS/state.json.tmp"
+      sed "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" > "$STATE_TMP"
+      mv "$STATE_TMP" "$ARTIFACTS/state.json"
 
       echo "{\"totalSprints\": $SPRINT_COUNT, \"appDir\": \"$ARTIFACTS/app\", \"artifactsDir\": \"$ARTIFACTS\"}"
     timeout: 30000
diff --git a/packages/workflows/src/defaults/bundled-defaults.generated.ts b/packages/workflows/src/defaults/bundled-defaults.generated.ts
index 79f7059f0a..c214874c3f 100644
--- a/packages/workflows/src/defaults/bundled-defaults.generated.ts
+++ b/packages/workflows/src/defaults/bundled-defaults.generated.ts
@@ -55,7 +55,7 @@ export const BUNDLED_COMMANDS: Record<string, string> = {
 
 // Bundled default workflows (22 total)
 export const BUNDLED_WORKFLOWS: Record<string, string> = {
-  "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n  Use when: User wants to build a complete application from scratch using adversarial development.\n  Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n            \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n  Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n        loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n        thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n        below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n        failure after max retries.\n  NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n  Based on Anthropic's harness design article for long-running application development.\n  Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n  # ─── Phase 1: Planning ───────────────────────────────────────────────\n  - id: plan\n    prompt: |\n      You are a product planning expert. Your job is to take a short user prompt and expand it\n      into a comprehensive product specification.\n\n      ## User Request\n\n      $ARGUMENTS\n\n      ## Your Task\n\n      Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n      The spec MUST include ALL of the following sections:\n\n      ### 1. Product Overview\n      What the product does, who it's for, core value proposition.\n\n      ### 2. Tech Stack\n      Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n      not \"a modern framework.\" Include exact package names and versions where relevant.\n\n      ### 3. Design Language\n      Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n      ### 4. Feature List\n      Every feature organized by priority. Be exhaustive.\n\n      ### 5. Sprint Plan\n      Features broken into 3-6 sprints, ordered by dependency and importance:\n      - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n      - Each subsequent sprint builds on the previous\n      - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n      - List the specific features/deliverables for each sprint\n\n      Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n      named libraries), the better the generator can build and the evaluator can test.\n\n      IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n      it as conversation text.\n    allowed_tools: [Read, Write, Glob, Grep]\n\n  # ─── Phase 2: Workspace Initialization ───────────────────────────────\n  - id: init-workspace\n    depends_on: [plan]\n    bash: |\n      ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n      # Create directory structure for harness communication\n      mkdir -p \"$ARTIFACTS/contracts\"\n      mkdir -p \"$ARTIFACTS/feedback\"\n      mkdir -p \"$ARTIFACTS/app\"\n\n      # Initialize isolated git repo in app directory\n      cd \"$ARTIFACTS/app\"\n      git init -q\n      git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n      # Extract sprint count from spec (find highest \"Sprint N\" reference)\n      SPEC=\"$ARTIFACTS/spec.md\"\n      SPRINT_COUNT=3\n      if [ -f \"$SPEC\" ]; then\n        FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n        if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n          SPRINT_COUNT=$FOUND\n        fi\n        if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n          SPRINT_COUNT=10\n        fi\n      fi\n\n      # Write initial state machine file\n      cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n      {\n        \"phase\": \"negotiating\",\n        \"sprint\": 1,\n        \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n        \"retry\": 0,\n        \"maxRetries\": 3,\n        \"passThreshold\": 7,\n        \"completedSprints\": [],\n        \"status\": \"running\"\n      }\n      STATEEOF\n      sed -i \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\"\n\n      echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n    timeout: 30000\n\n  # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n  #\n  # State machine driven by $ARTIFACTS_DIR/state.json\n  # Each iteration plays ONE role: negotiator, generator, or evaluator\n  # fresh_context ensures genuine separation between roles\n  #\n  - id: adversarial-sprint\n    depends_on: [init-workspace]\n    idle_timeout: 600000\n    model: claude-opus-4-6[1m]\n    loop:\n      prompt: |\n        # Adversarial Development — Sprint Loop\n\n        You are part of a GAN-inspired adversarial development system with three distinct roles.\n        Each iteration you play ONE role, determined by the current phase in the state file.\n\n        ## FIRST: Read State\n\n        Read `$ARTIFACTS_DIR/state.json` to determine:\n        - `phase` — which role you play this iteration\n        - `sprint` — current sprint number\n        - `totalSprints` — how many sprints total\n        - `retry` — current retry attempt (0 = first try)\n        - `maxRetries` — max retries before hard failure (default 3)\n        - `passThreshold` — minimum score to pass (default 7)\n\n        Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n        ## Directory Layout\n\n        - App source code: `$ARTIFACTS_DIR/app/`\n        - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n        - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n        - State machine: `$ARTIFACTS_DIR/state.json`\n\n        ---\n\n        ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n        You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n        **Step 1 — Generator's Proposal:**\n        Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n        Propose a sprint contract with 5-15 specific, testable criteria.\n\n        Each criterion MUST be concrete and verifiable. Examples:\n        - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n        - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n        - BAD: \"The API works well\"\n        - BAD: \"Tasks can be managed\"\n\n        **Step 2 — Evaluator's Tightening:**\n        Now review your proposal as an adversary. For EACH criterion ask:\n        - Is it specific enough to test programmatically?\n        - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n        - Is the bar high enough, or would sloppy code pass?\n\n        Tighten vague criteria. Add edge cases. Raise the bar.\n\n        **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n        ```json\n        {\n          \"sprintNumber\": <N>,\n          \"features\": [\"feature1\", \"feature2\", ...],\n          \"criteria\": [\n            {\n              \"name\": \"short-kebab-name\",\n              \"description\": \"Specific, testable description of what must be true\",\n              \"threshold\": 7\n            }\n          ]\n        }\n        ```\n\n        **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n        ---\n\n        ## ROLE: GENERATOR (phase = \"building\")\n\n        You are a software engineer. Build features that MUST survive an adversarial evaluator\n        who will actively try to break your code.\n\n        **Read these files:**\n        1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n        2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n        3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n           evaluator's previous feedback\n\n        **If this is a RETRY (retry > 0):**\n        Read the feedback CAREFULLY. Every failed criterion must be addressed.\n        - If scores were close (5-6) and trending up: REFINE your approach\n        - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n        - Address EVERY feedback item — the evaluator WILL check\n        - Re-verify each fix by running the code before committing\n\n        **Build rules:**\n        - All code goes in `$ARTIFACTS_DIR/app/`\n        - Build ONE feature at a time, verify it works, then commit:\n          ```bash\n          cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n          ```\n        - Install dependencies as needed (npm/bun/pip/etc)\n        - Test your code — start the server, hit the endpoints, verify the UI renders\n        - Think about what the evaluator will attack: edge cases, error handling, input validation\n        - Build defensively — the evaluator's job is to break you\n\n        **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n        ---\n\n        ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n        You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n        You are not helpful. You are not generous. You are an attacker.\n\n        **CRITICAL CONSTRAINTS:**\n        - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n        - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n        - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n          Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n        - You MUST score EVERY criterion in the contract. No skipping.\n\n        **Scoring guidelines:**\n        - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n        - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n        - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n        - **3-4**: Weak. Barely functional. Major gaps.\n        - **1-2**: Broken. Does not work or is not implemented.\n\n        Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n        If something is broken, say it's broken.\n\n        **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n        **For each criterion:**\n        1. Read the relevant source code\n        2. Run the application (start server, test endpoints, check rendered UI)\n        3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n        4. Score it honestly\n\n        **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n        ```json\n        {\n          \"passed\": <true if ALL scores >= passThreshold, false otherwise>,\n          \"scores\": {\n            \"criterion-name\": <score>,\n            ...\n          },\n          \"feedback\": [\n            {\n              \"criterion\": \"criterion-name\",\n              \"score\": <1-10>,\n              \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n            }\n          ],\n          \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n        }\n        ```\n\n        **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n        **Update state.json based on result:**\n\n        **If PASSED (all criteria >= threshold):**\n        - Add current sprint number to `completedSprints` array\n        - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n        - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n        **If FAILED:**\n        - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n        - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n        **IMPORTANT**: Kill all background processes before finishing:\n        ```bash\n        pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n        ```\n\n        ---\n\n        ## COMPLETION\n\n        After updating state.json, check the `status` field:\n        - If `\"status\": \"complete\"` → all sprints passed! Output: `<promise>ALL_SPRINTS_COMPLETE</promise>`\n        - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `<promise>ALL_SPRINTS_COMPLETE</promise>`\n        - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n      until: ALL_SPRINTS_COMPLETE\n      max_iterations: 60\n      fresh_context: true\n      until_bash: |\n        grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n  # ─── Phase 4: Report ─────────────────────────────────────────────────\n  - id: report\n    depends_on: [adversarial-sprint]\n    trigger_rule: all_done\n    context: fresh\n    model: haiku\n    prompt: |\n      You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n      ## Read ALL of these files:\n      1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n      2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n      3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n      4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n      ## Generate a report covering:\n\n      ### Build Summary\n      - What application was built (from the spec)\n      - Final status: did all sprints pass or did it fail? On which sprint?\n      - Total sprints completed vs planned\n\n      ### Per-Sprint Breakdown\n      For each sprint that was attempted:\n      - What the contract required (features + key criteria)\n      - How many attempts were needed (retry count)\n      - Final scores for each criterion\n      - Key feedback that drove retries and improvements\n\n      ### Quality Metrics\n      - Average score across all final-round criteria\n      - Which criteria required the most retries\n      - Where the adversarial evaluator pushed quality the highest\n\n      ### How to Run\n      - The application code lives in: `$ARTIFACTS_DIR/app/`\n      - Include the tech stack and how to start the app (from the spec)\n      - Include any setup steps (install deps, env vars, etc.)\n\n      Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n      sees it directly.\n    allowed_tools: [Read, Write, Glob, Grep]\n",
+  "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n  Use when: User wants to build a complete application from scratch using adversarial development.\n  Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n            \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n  Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n        loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n        thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n        below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n        failure after max retries.\n  NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n  Based on Anthropic's harness design article for long-running application development.\n  Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n  # ─── Phase 1: Planning ───────────────────────────────────────────────\n  - id: plan\n    prompt: |\n      You are a product planning expert. Your job is to take a short user prompt and expand it\n      into a comprehensive product specification.\n\n      ## User Request\n\n      $ARGUMENTS\n\n      ## Your Task\n\n      Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n      The spec MUST include ALL of the following sections:\n\n      ### 1. Product Overview\n      What the product does, who it's for, core value proposition.\n\n      ### 2. Tech Stack\n      Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n      not \"a modern framework.\" Include exact package names and versions where relevant.\n\n      ### 3. Design Language\n      Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n      ### 4. Feature List\n      Every feature organized by priority. Be exhaustive.\n\n      ### 5. Sprint Plan\n      Features broken into 3-6 sprints, ordered by dependency and importance:\n      - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n      - Each subsequent sprint builds on the previous\n      - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n      - List the specific features/deliverables for each sprint\n\n      Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n      named libraries), the better the generator can build and the evaluator can test.\n\n      IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n      it as conversation text.\n    allowed_tools: [Read, Write, Glob, Grep]\n\n  # ─── Phase 2: Workspace Initialization ───────────────────────────────\n  - id: init-workspace\n    depends_on: [plan]\n    bash: |\n      ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n      # Create directory structure for harness communication\n      mkdir -p \"$ARTIFACTS/contracts\"\n      mkdir -p \"$ARTIFACTS/feedback\"\n      mkdir -p \"$ARTIFACTS/app\"\n\n      # Initialize isolated git repo in app directory\n      cd \"$ARTIFACTS/app\"\n      git init -q\n      git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n      # Extract sprint count from spec (find highest \"Sprint N\" reference)\n      SPEC=\"$ARTIFACTS/spec.md\"\n      SPRINT_COUNT=3\n      if [ -f \"$SPEC\" ]; then\n        FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n        if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n          SPRINT_COUNT=$FOUND\n        fi\n        if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n          SPRINT_COUNT=10\n        fi\n      fi\n\n      # Write initial state machine file\n      cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n      {\n        \"phase\": \"negotiating\",\n        \"sprint\": 1,\n        \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n        \"retry\": 0,\n        \"maxRetries\": 3,\n        \"passThreshold\": 7,\n        \"completedSprints\": [],\n        \"status\": \"running\"\n      }\n      STATEEOF\n      STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n      sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n      mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n      echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n    timeout: 30000\n\n  # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n  #\n  # State machine driven by $ARTIFACTS_DIR/state.json\n  # Each iteration plays ONE role: negotiator, generator, or evaluator\n  # fresh_context ensures genuine separation between roles\n  #\n  - id: adversarial-sprint\n    depends_on: [init-workspace]\n    idle_timeout: 600000\n    model: claude-opus-4-6[1m]\n    loop:\n      prompt: |\n        # Adversarial Development — Sprint Loop\n\n        You are part of a GAN-inspired adversarial development system with three distinct roles.\n        Each iteration you play ONE role, determined by the current phase in the state file.\n\n        ## FIRST: Read State\n\n        Read `$ARTIFACTS_DIR/state.json` to determine:\n        - `phase` — which role you play this iteration\n        - `sprint` — current sprint number\n        - `totalSprints` — how many sprints total\n        - `retry` — current retry attempt (0 = first try)\n        - `maxRetries` — max retries before hard failure (default 3)\n        - `passThreshold` — minimum score to pass (default 7)\n\n        Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n        ## Directory Layout\n\n        - App source code: `$ARTIFACTS_DIR/app/`\n        - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n        - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n        - State machine: `$ARTIFACTS_DIR/state.json`\n\n        ---\n\n        ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n        You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n        **Step 1 — Generator's Proposal:**\n        Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n        Propose a sprint contract with 5-15 specific, testable criteria.\n\n        Each criterion MUST be concrete and verifiable. Examples:\n        - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n        - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n        - BAD: \"The API works well\"\n        - BAD: \"Tasks can be managed\"\n\n        **Step 2 — Evaluator's Tightening:**\n        Now review your proposal as an adversary. For EACH criterion ask:\n        - Is it specific enough to test programmatically?\n        - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n        - Is the bar high enough, or would sloppy code pass?\n\n        Tighten vague criteria. Add edge cases. Raise the bar.\n\n        **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n        ```json\n        {\n          \"sprintNumber\": <N>,\n          \"features\": [\"feature1\", \"feature2\", ...],\n          \"criteria\": [\n            {\n              \"name\": \"short-kebab-name\",\n              \"description\": \"Specific, testable description of what must be true\",\n              \"threshold\": 7\n            }\n          ]\n        }\n        ```\n\n        **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n        ---\n\n        ## ROLE: GENERATOR (phase = \"building\")\n\n        You are a software engineer. Build features that MUST survive an adversarial evaluator\n        who will actively try to break your code.\n\n        **Read these files:**\n        1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n        2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n        3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n           evaluator's previous feedback\n\n        **If this is a RETRY (retry > 0):**\n        Read the feedback CAREFULLY. Every failed criterion must be addressed.\n        - If scores were close (5-6) and trending up: REFINE your approach\n        - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n        - Address EVERY feedback item — the evaluator WILL check\n        - Re-verify each fix by running the code before committing\n\n        **Build rules:**\n        - All code goes in `$ARTIFACTS_DIR/app/`\n        - Build ONE feature at a time, verify it works, then commit:\n          ```bash\n          cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n          ```\n        - Install dependencies as needed (npm/bun/pip/etc)\n        - Test your code — start the server, hit the endpoints, verify the UI renders\n        - Think about what the evaluator will attack: edge cases, error handling, input validation\n        - Build defensively — the evaluator's job is to break you\n\n        **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n        ---\n\n        ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n        You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n        You are not helpful. You are not generous. You are an attacker.\n\n        **CRITICAL CONSTRAINTS:**\n        - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n        - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n        - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n          Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n        - You MUST score EVERY criterion in the contract. No skipping.\n\n        **Scoring guidelines:**\n        - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n        - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n        - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n        - **3-4**: Weak. Barely functional. Major gaps.\n        - **1-2**: Broken. Does not work or is not implemented.\n\n        Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n        If something is broken, say it's broken.\n\n        **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n        **For each criterion:**\n        1. Read the relevant source code\n        2. Run the application (start server, test endpoints, check rendered UI)\n        3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n        4. Score it honestly\n\n        **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n        ```json\n        {\n          \"passed\": <true if ALL scores >= passThreshold, false otherwise>,\n          \"scores\": {\n            \"criterion-name\": <score>,\n            ...\n          },\n          \"feedback\": [\n            {\n              \"criterion\": \"criterion-name\",\n              \"score\": <1-10>,\n              \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n            }\n          ],\n          \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n        }\n        ```\n\n        **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n        **Update state.json based on result:**\n\n        **If PASSED (all criteria >= threshold):**\n        - Add current sprint number to `completedSprints` array\n        - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n        - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n        **If FAILED:**\n        - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n        - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n        **IMPORTANT**: Kill all background processes before finishing:\n        ```bash\n        pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n        ```\n\n        ---\n\n        ## COMPLETION\n\n        After updating state.json, check the `status` field:\n        - If `\"status\": \"complete\"` → all sprints passed! Output: `<promise>ALL_SPRINTS_COMPLETE</promise>`\n        - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `<promise>ALL_SPRINTS_COMPLETE</promise>`\n        - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n      until: ALL_SPRINTS_COMPLETE\n      max_iterations: 60\n      fresh_context: true\n      until_bash: |\n        grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n  # ─── Phase 4: Report ─────────────────────────────────────────────────\n  - id: report\n    depends_on: [adversarial-sprint]\n    trigger_rule: all_done\n    context: fresh\n    model: haiku\n    prompt: |\n      You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n      ## Read ALL of these files:\n      1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n      2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n      3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n      4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n      ## Generate a report covering:\n\n      ### Build Summary\n      - What application was built (from the spec)\n      - Final status: did all sprints pass or did it fail? On which sprint?\n      - Total sprints completed vs planned\n\n      ### Per-Sprint Breakdown\n      For each sprint that was attempted:\n      - What the contract required (features + key criteria)\n      - How many attempts were needed (retry count)\n      - Final scores for each criterion\n      - Key feedback that drove retries and improvements\n\n      ### Quality Metrics\n      - Average score across all final-round criteria\n      - Which criteria required the most retries\n      - Where the adversarial evaluator pushed quality the highest\n\n      ### How to Run\n      - The application code lives in: `$ARTIFACTS_DIR/app/`\n      - Include the tech stack and how to start the app (from the spec)\n      - Include any setup steps (install deps, env vars, etc.)\n\n      Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n      sees it directly.\n    allowed_tools: [Read, Write, Glob, Grep]\n",
   "archon-architect": "name: archon-architect\ndescription: |\n  Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n  Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n            \"clean up architecture\", \"codebase health\", \"fix architecture\".\n  Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n        simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n  NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n  DAG workflow showcasing per-node hooks:\n  - PostToolUse hooks create organic quality loops (lint after write, self-review)\n  - PreToolUse hooks inject architectural principles before changes\n  - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 1: MEASURE\n  # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: scan-metrics\n    bash: |\n      echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n      find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n        -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n      echo \"\"\n      echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n      for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n        count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n        if [ \"$count\" -gt 8 ]; then\n          echo \"$count imports: $f\"\n        fi\n      done | sort -rn | head -20\n\n      echo \"\"\n      echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n      for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n        count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n        if [ \"$count\" -gt 5 ]; then\n          echo \"$count exports: $f\"\n        fi\n      done | sort -rn | head -20\n\n      echo \"\"\n      echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n      grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n        --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n        | head -30\n\n      echo \"\"\n      echo \"=== TYPE SAFETY GAPS ===\"\n      echo \"any usage:\"\n      grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n      echo \"eslint-disable comments:\"\n      grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n    timeout: 60000\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 2: ANALYZE\n  # Read through hotspots with an architectural lens\n  # Hooks inject assessment criteria after every file read\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: analyze\n    prompt: |\n      You are a senior software architect performing a codebase health assessment.\n\n      ## Codebase Metrics\n\n      $scan-metrics.output\n\n      ## User Focus\n\n      $ARGUMENTS\n\n      ## Instructions\n\n      1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n      2. For each file, assess the criteria injected after you read it (you'll see them)\n      3. Build a running list of architectural concerns\n      4. Focus on:\n         - Modules doing too many things (SRP violations)\n         - Abstractions that don't earn their complexity\n         - Duplicated patterns that should be consolidated (Rule of Three)\n         - God files or god functions\n         - Leaky abstractions or tight coupling between layers\n         - Dead code or unused exports\n      5. Do NOT suggest changes yet — only diagnose\n\n      ## Output\n\n      Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n      - Executive summary (3-5 sentences)\n      - Top findings ranked by impact\n      - For each finding: file, what's wrong, why it matters, estimated effort\n    depends_on: [scan-metrics]\n    context: fresh\n    denied_tools: [Write, Edit, Bash]\n    hooks:\n      PostToolUse:\n        - matcher: \"Read\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PostToolUse\n              additionalContext: >\n                For the file you just read, assess:\n                (1) Single responsibility — does this module do exactly one thing?\n                (2) Cognitive load — could a new team member understand this in 5 minutes?\n                (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n                (4) Dependency direction — does this file depend on things at its own level or below, not above?\n                Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 3: PLAN\n  # Prioritize and scope the changes — pure reasoning, no tools\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: plan\n    prompt: |\n      You are planning targeted architectural improvements.\n\n      ## Assessment\n\n      $analyze.output\n\n      ## Principles\n\n      - KISS: prefer straightforward over clever\n      - YAGNI: remove speculative abstractions\n      - Rule of Three: only extract when a pattern appears 3+ times\n      - Each change must be independently revertable\n      - Do NOT mix refactoring with behavior changes\n      - Scope to what can be done safely in one pass (max 5-7 files)\n\n      ## Instructions\n\n      1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n      2. For each, write a precise plan: which file, what to change, why\n      3. Order them so each change is independent (no cascading dependencies between changes)\n      4. Estimate blast radius — how many other files are affected\n\n      ## Output\n\n      Write the plan as a numbered list. Be specific about exactly what code to change.\n      Keep it concise — the implement node will follow this literally.\n    depends_on: [analyze]\n    allowed_tools: [Read]\n    context: fresh\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 4: EXECUTE\n  # Make the changes with hooks creating quality feedback loops\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: simplify\n    prompt: |\n      You are implementing targeted architectural simplifications.\n\n      ## Plan\n\n      $plan.output\n\n      ## Rules\n\n      - Follow the plan exactly — do not add extra improvements you notice along the way\n      - Each change must preserve existing behavior (refactor only, no feature changes)\n      - After each file edit, you'll be prompted to validate — follow those instructions\n      - If a change turns out to be harder than expected, skip it and move on\n      - Commit each logical change separately with a clear commit message\n\n      ## Instructions\n\n      1. Work through the plan items in order\n      2. For each item: read the file, make the change, follow the post-edit checklist\n      3. After all changes, do a final `git diff --stat` to verify scope\n    depends_on: [plan]\n    context: fresh\n    hooks:\n      PreToolUse:\n        - matcher: \"Write|Edit\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PreToolUse\n              additionalContext: >\n                Before writing: Is this file in your plan? If not, explain why you're\n                touching it. Check how many files import from this module — changes to\n                widely-imported modules need extra scrutiny.\n      PostToolUse:\n        - matcher: \"Write|Edit\"\n          response:\n            systemMessage: >\n              You just modified a file. Do these things NOW before moving on:\n              1. Run the type checker to verify your change compiles\n              2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n              3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n        - matcher: \"Read\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PostToolUse\n              additionalContext: >\n                Before modifying this file, consider: will your change reduce or increase\n                the number of concepts a reader needs to hold in their head?\n        - matcher: \"Bash\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PostToolUse\n              additionalContext: >\n                Check the exit code. If the command failed, diagnose the root cause\n                before attempting a fix. Do not blindly retry.\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 5: VALIDATE\n  # Run full validation suite — bash only, cannot edit to \"fix\" failures\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: validate\n    bash: |\n      echo \"=== TYPE CHECK ===\"\n      bun run type-check 2>&1\n      TC_EXIT=$?\n\n      echo \"\"\n      echo \"=== LINT ===\"\n      bun run lint 2>&1\n      LINT_EXIT=$?\n\n      echo \"\"\n      echo \"=== TESTS ===\"\n      bun run test 2>&1\n      TEST_EXIT=$?\n\n      echo \"\"\n      echo \"=== RESULTS ===\"\n      echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n      echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n      echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n      # Always exit 0 so downstream nodes can read output and decide\n      if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n        echo \"VALIDATION_STATUS: PASS\"\n      else\n        echo \"VALIDATION_STATUS: FAIL\"\n      fi\n    depends_on: [simplify]\n    timeout: 300000\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 6: FIX VALIDATION FAILURES (if any)\n  # Only runs if validate failed — focused fix with same quality hooks\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: fix-failures\n    prompt: |\n      Review the validation output below.\n\n      ## Validation Output\n\n      $validate.output\n\n      ## Instructions\n\n      If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n      \"All checks passed — no fixes needed.\" and stop.\n\n      If there are failures:\n\n      1. Read the validation failures carefully\n      2. Fix ONLY what's broken — do not make additional improvements\n      3. If a fix requires changing behavior (not just fixing a type/lint error),\n         revert the original change instead\n      4. Run the specific failing check after each fix to confirm it passes\n      5. After all fixes, run the full validation suite: `bun run validate`\n    depends_on: [validate]\n    context: fresh\n    hooks:\n      PostToolUse:\n        - matcher: \"Write|Edit\"\n          response:\n            systemMessage: >\n              You just made a fix. Run the specific failing validation check NOW\n              to verify your fix works. Do not batch fixes — verify each one.\n      PreToolUse:\n        - matcher: \"Write|Edit\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PreToolUse\n              additionalContext: >\n                You are fixing validation failures only. Do not make any changes\n                beyond what's needed to pass the failing checks. If in doubt, revert\n                the original change that caused the failure.\n\n  # ═══════════════════════════════════════════════════════════════\n  # PHASE 7: CREATE PR\n  # Hooks ensure this node only does git operations\n  # ═══════════════════════════════════════════════════════════════\n\n  - id: create-pr\n    prompt: |\n      Create a pull request for the architectural improvements.\n\n      ## Context\n\n      - Architecture assessment: $analyze.output\n      - Plan: $plan.output\n      - Validation: $validate.output\n\n      ## Instructions\n\n      1. Stage all changes and create a single commit (or verify existing commits)\n      2. Push the branch: `git push -u origin HEAD`\n      3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n      4. Create the PR with:\n         - Title: concise description of what was simplified (under 70 chars)\n         - Body: use the format below\n      5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n      ## PR Body Format\n\n      ```markdown\n      ## Architectural Sweep\n\n      **Focus**: $ARGUMENTS\n\n      ### Assessment\n\n      [3-5 sentence summary from the architecture assessment]\n\n      ### Changes\n\n      [For each change: what file, what was simplified, why]\n\n      ### Validation\n\n      - [x] Type check passes\n      - [x] Lint passes\n      - [x] Tests pass\n      - [x] Each change preserves existing behavior\n      ```\n    depends_on: [fix-failures]\n    context: fresh\n    hooks:\n      PreToolUse:\n        - matcher: \"Write|Edit\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PreToolUse\n              permissionDecision: deny\n              permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n      PostToolUse:\n        - matcher: \"Bash\"\n          response:\n            hookSpecificOutput:\n              hookEventName: PostToolUse\n              additionalContext: >\n                Verify this command succeeded. If git push or gh pr create failed,\n                read the error message carefully before retrying.\n",
   "archon-assist": "name: archon-assist\ndescription: |\n  Use when: No other workflow matches the request.\n  Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help.\n  Capability: Full Claude Code agent with all tools available.\n  Note: Will inform user when assist mode is used for tracking.\n\nnodes:\n  - id: assist\n    command: archon-assist\n",
   "archon-comprehensive-pr-review": "name: archon-comprehensive-pr-review\ndescription: |\n  Use when: User wants a comprehensive code review of a pull request with automatic fixes.\n  Triggers: \"review this PR\", \"review PR #123\", \"comprehensive review\", \"full PR review\",\n            \"review and fix\", \"check this PR\", \"code review\".\n  Does: Syncs PR with main (rebase if needed) -> runs 5 specialized review agents in parallel ->\n        synthesizes findings -> auto-fixes CRITICAL/HIGH issues -> reports remaining issues.\n  NOT for: Quick questions about a PR, checking CI status, simple \"what changed\" queries.\n\n  This workflow produces artifacts in $ARTIFACTS_DIR/../reviews/pr-{number}/ and posts\n  a comprehensive review comment to the GitHub PR.\n\nnodes:\n  - id: scope\n    command: archon-pr-review-scope\n\n  - id: sync\n    command: archon-sync-pr-with-main\n    depends_on: [scope]\n\n  - id: code-review\n    command: archon-code-review-agent\n    depends_on: [sync]\n\n  - id: error-handling\n    command: archon-error-handling-agent\n    depends_on: [sync]\n\n  - id: test-coverage\n    command: archon-test-coverage-agent\n    depends_on: [sync]\n\n  - id: comment-quality\n    command: archon-comment-quality-agent\n    depends_on: [sync]\n\n  - id: docs-impact\n    command: archon-docs-impact-agent\n    depends_on: [sync]\n\n  - id: synthesize\n    command: archon-synthesize-review\n    depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n    trigger_rule: one_success\n\n  - id: implement-fixes\n    command: archon-implement-review-fixes\n    depends_on: [synthesize]\n",
diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts
index 1455b2ca0c..ef8887072d 100644
--- a/packages/workflows/src/defaults/bundled-defaults.test.ts
+++ b/packages/workflows/src/defaults/bundled-defaults.test.ts
@@ -101,6 +101,15 @@ describe('bundled-defaults', () => {
       expect(content).toContain('workflow_name');
     });
 
+    it('archon-adversarial-dev init-workspace should avoid non-portable sed -i', () => {
+      const content = BUNDLED_WORKFLOWS['archon-adversarial-dev'];
+      expect(content).toContain('STATE_TMP="$ARTIFACTS/state.json.tmp"');
+      expect(content).toContain(
+        'sed "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" > "$STATE_TMP"'
+      );
+      expect(content).not.toContain('sed -i "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/"');
+    });
+
     it('should have valid YAML structure', () => {
       for (const content of Object.values(BUNDLED_WORKFLOWS)) {
         expect(content).toContain('name:');

From b4f67f9f9531d1949ae95584af4247eb82ef5bcd Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Wed, 22 Apr 2026 11:54:25 +0300
Subject: [PATCH 09/14] fix(deps): override transitive axios to ^1.15.0 for
 CVE-2025-62718 (#1330)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

axios <1.15.0 can be coerced to bypass NO_PROXY rules via hostname
normalization, enabling SSRF in the right network shape. Archon pulls
axios transitively through @slack/bolt (^1.12.0) and @slack/web-api
(^1.13.5); before this change bun.lock resolved axios@1.13.6 — within
the vulnerable range.

Adding "axios": "^1.15.0" to the root package.json overrides bumps the
transitive resolution to axios@1.15.1 (latest compatible 1.x). Both
Slack range specs accept it without API surface changes — no downstream
code touches axios directly.

Supersedes #1153. Credits @stefans71 for identifying and reporting the
vulnerability; their PR was stale on the lockfile (0.3.5 → 0.3.6 drift
on dev), so this is a fresh one-line re-do on current dev.

Closes #1053.

Co-authored-by: Stefans71 <stefans71@users.noreply.github.com>
(cherry picked from commit ae2d9361bc3a063fd483aa89124d15a934a13a00)
---
 CHANGELOG.md |  1 +
 bun.lock     | 26 +++++++++++++-------------
 package.json |  3 ++-
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a5efeb66b..6ff712f205 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053.
 - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon <run-id>` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216.
 
 ### Changed
diff --git a/bun.lock b/bun.lock
index 8599602c73..8f1fcec74e 100644
--- a/bun.lock
+++ b/bun.lock
@@ -1,6 +1,5 @@
 {
   "lockfileVersion": 1,
-  "configVersion": 1,
   "workspaces": {
     "": {
       "name": "archon",
@@ -23,7 +22,7 @@
     },
     "packages/adapters": {
       "name": "@archon/adapters",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/core": "workspace:*",
         "@archon/git": "workspace:*",
@@ -41,7 +40,7 @@
     },
     "packages/cli": {
       "name": "@archon/cli",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "bin": {
         "archon": "./src/cli.ts",
       },
@@ -63,7 +62,7 @@
     },
     "packages/core": {
       "name": "@archon/core",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/git": "workspace:*",
         "@archon/isolation": "workspace:*",
@@ -83,7 +82,7 @@
     },
     "packages/docs-web": {
       "name": "@archon/docs-web",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@astrojs/starlight": "^0.38.0",
         "astro": "^6.1.0",
@@ -92,7 +91,7 @@
     },
     "packages/git": {
       "name": "@archon/git",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/paths": "workspace:*",
       },
@@ -102,7 +101,7 @@
     },
     "packages/isolation": {
       "name": "@archon/isolation",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/git": "workspace:*",
         "@archon/paths": "workspace:*",
@@ -113,7 +112,7 @@
     },
     "packages/paths": {
       "name": "@archon/paths",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "dotenv": "^17",
         "pino": "^9",
@@ -141,7 +140,7 @@
     },
     "packages/server": {
       "name": "@archon/server",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/adapters": "workspace:*",
         "@archon/core": "workspace:*",
@@ -160,7 +159,7 @@
     },
     "packages/web": {
       "name": "@archon/web",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@dagrejs/dagre": "^2.0.4",
         "@radix-ui/react-alert-dialog": "^1.1.15",
@@ -212,7 +211,7 @@
     },
     "packages/workflows": {
       "name": "@archon/workflows",
-      "version": "0.3.6",
+      "version": "0.4.0",
       "dependencies": {
         "@archon/git": "workspace:*",
         "@archon/paths": "workspace:*",
@@ -226,6 +225,7 @@
     },
   },
   "overrides": {
+    "axios": "^1.15.0",
     "test-exclude": "^7.0.1",
   },
   "packages": {
@@ -1043,7 +1043,7 @@
 
     "atomic-sleep": ["atomic-sleep@1.0.0", "", {}, "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ=="],
 
-    "axios": ["axios@1.13.6", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ=="],
+    "axios": ["axios@1.15.1", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^2.1.0" } }, "sha512-WOG+Jj8ZOvR0a3rAn+Tuf1UQJRxw5venr6DgdbJzngJE3qG7X0kL83CZGpdHMxEm+ZK3seAbvFsw4FfOfP9vxg=="],
 
     "axobject-query": ["axobject-query@4.1.0", "", {}, "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ=="],
 
@@ -2033,7 +2033,7 @@
 
     "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="],
 
-    "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="],
+    "proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="],
 
     "pump": ["pump@3.0.4", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA=="],
 
diff --git a/package.json b/package.json
index d20a7583dd..2fceb51a72 100644
--- a/package.json
+++ b/package.json
@@ -48,7 +48,8 @@
     "bun": "^1.3.0"
   },
   "overrides": {
-    "test-exclude": "^7.0.1"
+    "test-exclude": "^7.0.1",
+    "axios": "^1.15.0"
   },
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.2.74"

From 9ce26d991535ae5187f76583b6167d0e704db28b Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:15:24 +0300
Subject: [PATCH 10/14] fix(cli): surface stale-workspace registration error
 instead of fake "not a git repo" (#1332)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(cli): surface stale-workspace registration error instead of fake "not a git repo"

When workflowRunCommand auto-registers an unregistered repo, a stale
~/.archon/workspaces/<owner>/<repo>/source symlink (pointing to an old
checkout) causes createProjectSourceSymlink() in @archon/paths to throw:

  Source symlink at <linkPath> already points to <existing>, expected <target>

The CLI caught that in a try/catch, logged it at warn level, continued
with `codebase = null`, and then the isolation / resume branches hit
their "codebase missing" fallback and threw the generic:

  Cannot create worktree: not in a git repository.

That message is false — the repo is valid; the Archon workspace entry
is stale. It sends users down the wrong diagnostic path (checking git
config, permissions, etc.) instead of pointing at the workspace dir.

Fix: preserve the registration error on a new `codebaseRegistrationError`
local, and at both fallback sites (resume + worktree-creation) check it
before the generic "not a git repo" branch. When set, throw a truthful:

  Cannot {create worktree,resume}: repository registration failed.
  Error: <original message>
  Hint: Remove the stale workspace entry at <dir> and retry, or
        use --no-worktree to skip isolation.

The hint's exact path comes from a small parser that extracts the
workspace directory from the known "Source symlink at …" format; when
the message shape doesn't match (future error text changes), the parser
returns null and we fall back to a generic "check registration under
<archon-home>/workspaces" hint — safe degradation.

Regression test in workflow.test.ts asserts the new error message and
negatively asserts the old "not in a git repository" string is gone.

Supersedes #1157 — that PR was draft + CONFLICTING against current dev,
and also mentioned Windows test-compat changes that weren't in the diff
(pruned scope). This is a fresh re-do focused strictly on #1146.

Closes #1146.

Co-authored-by: Bortlesboat <Bortlesboat@users.noreply.github.com>

* review: add resume-path test, null-fallback test, update troubleshooting docs

Addresses multi-agent review feedback on this PR:

- Add regression test for the --resume fallback site (the worktree-create
  site was already covered; the resume site had identical wiring but zero
  test coverage).
- Add test for the unrecognized-error-shape branch of
  buildRegistrationFailureError so the generic workspace hint is pinned
  (prevents accidental inversion of the stale-entry vs generic-hint
  ternary).
- Update the troubleshooting page to key on the new
  "Cannot create worktree: repository registration failed." message.
  Users hitting the new error won't find the page under the old heading,
  and the "In the future..." note is obsolete now that the error itself
  contains the cleanup path.
- Trim both new docblocks: keep the load-bearing cross-package error
  string contract in extractStaleWorkspaceEntry, drop narration of what
  the code already shows. Drop the "Before this helper existed..."
  paragraph from buildRegistrationFailureError — that's CHANGELOG
  material. Drop PR-reference suffix from the test section divider.

* review: guard getArchonHome in hint + export parser for direct tests

Two follow-up fixes to the multi-agent review commit (f32f002f):

CodeRabbit finding — unguarded getArchonHome() in the fallback hint.
If getArchonHome() ever throws (misconfigured env vars, permission issues
on the resolution path), the registration-failure Error would never get
constructed: we'd throw a secondary home-resolution error that masks the
root cause. Wrap the fallback branch in try/catch — prefer losing the
exact path in the hint over replacing the actionable registration error.
A safe generic hint ("Check your Archon workspace registration and retry")
takes over when getArchonHome() throws. The original error.message is
always embedded verbatim in the re-thrown Error.

S2 — export extractStaleWorkspaceEntry for direct table tests. The parser
is where the cross-package string contract with @archon/paths actually
lives; direct tests against it are cheaper than end-to-end CLI tests and
pin the edge cases:

- POSIX path with forward slashes (typical unix user)
- Windows path with backslashes (verifies Math.max(lastIndexOf / , lastIndexOf \))
- Unrelated error message (no prefix) → null
- Prefix matches but delimiter missing → null
- Source path without any separator → null (guards against returning
  empty string, which would produce a nonsense "Remove the stale
  workspace entry at " hint)
- Empty string → null

Six new cases in the test file. The claim of Windows support in the
PR description is now actually verified.

* fix(test): make generic-hint assertion path-separator agnostic

Windows test runner (CI) hit:
  Expected to contain: "Check your Archon workspace registration under /home/test/.archon/workspaces"
  Received: "... under \home\test\.archon\workspaces and retry, ..."

path.join normalizes to `\` on Windows and `/` on POSIX. The test hardcoded
forward slashes in the expected substring. Split into two separator-agnostic
asserts: the prefix up to "under", then `/workspaces\b/` regex for the final
path segment. Behavior doesn't change — the hint still gets the full
path.join'd workspaces dir on either platform.

---------

Co-authored-by: Bortlesboat <Bortlesboat@users.noreply.github.com>
(cherry picked from commit 056707d033e5276acc65ac773d614abc73ac582b)
---
 CHANGELOG.md                                  |   1 +
 packages/cli/src/commands/workflow.test.ts    | 156 ++++++++++++++++++
 packages/cli/src/commands/workflow.ts         |  60 +++++++
 .../content/docs/getting-started/overview.md  |  12 +-
 4 files changed, 224 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ff712f205..9663bd5431 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053.
+- **Stale workspace symlink no longer reported as "not in a git repository" by the CLI.** When `archon workflow run` (or `--resume`) is invoked from a valid git repo whose `~/.archon/workspaces/<owner>/<repo>/source` symlink points somewhere else (common after moving/renaming the checkout), auto-registration fails but the repo is fine. Previously both the worktree-creation and resume paths fell through to the generic `Cannot create worktree: not in a git repository` / `Cannot resume: Not in a git repository` errors — a lie that sent users down the wrong diagnostic path. Both sites now preserve the registration error and throw `Cannot {create worktree,resume}: repository registration failed.` with the original cause and a concrete cleanup hint (`Remove the stale workspace entry at <path> and retry`) when the failure matches the `createProjectSourceSymlink()` shape. Credits @Bortlesboat for identifying the root cause and the parser approach in #1157. Closes #1146.
 - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon <run-id>` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216.
 
 ### Changed
diff --git a/packages/cli/src/commands/workflow.test.ts b/packages/cli/src/commands/workflow.test.ts
index d7a4030684..c6e08e8cd2 100644
--- a/packages/cli/src/commands/workflow.test.ts
+++ b/packages/cli/src/commands/workflow.test.ts
@@ -867,6 +867,114 @@ describe('workflowRunCommand', () => {
     expect(createCallsAfter).toBe(createCallsBefore);
   });
 
+  // -------------------------------------------------------------------------
+  // Stale workspace source-symlink → truthful CLI error
+  // -------------------------------------------------------------------------
+
+  it('surfaces auto-registration failures instead of claiming the repo is invalid', async () => {
+    const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery');
+    const { registerRepository } = await import('@archon/core');
+    const conversationDb = await import('@archon/core/db/conversations');
+    const codebaseDb = await import('@archon/core/db/codebases');
+    const gitModule = await import('@archon/git');
+
+    (discoverWorkflowsWithConfig as ReturnType<typeof mock>).mockResolvedValueOnce({
+      workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })],
+      errors: [],
+    });
+    (conversationDb.getOrCreateConversation as ReturnType<typeof mock>).mockResolvedValueOnce({
+      id: 'conv-123',
+    });
+    (codebaseDb.findCodebaseByDefaultCwd as ReturnType<typeof mock>).mockResolvedValueOnce(null);
+    (gitModule.findRepoRoot as ReturnType<typeof mock>).mockResolvedValueOnce('/test/path');
+    (registerRepository as ReturnType<typeof mock>).mockRejectedValueOnce(
+      new Error(
+        'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' +
+          '/home/test/.archon/workspaces/widget, expected /test/path'
+      )
+    );
+
+    const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch(
+      err => err as Error
+    );
+
+    expect(error).toBeInstanceOf(Error);
+    expect(error.message).toContain('Cannot create worktree: repository registration failed.');
+    expect(error.message).toContain(
+      'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry'
+    );
+    expect(error.message).not.toContain('not in a git repository');
+  });
+
+  it('surfaces auto-registration failures on --resume instead of claiming the repo is invalid', async () => {
+    const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery');
+    const { registerRepository } = await import('@archon/core');
+    const conversationDb = await import('@archon/core/db/conversations');
+    const codebaseDb = await import('@archon/core/db/codebases');
+    const gitModule = await import('@archon/git');
+
+    (discoverWorkflowsWithConfig as ReturnType<typeof mock>).mockResolvedValueOnce({
+      workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })],
+      errors: [],
+    });
+    (conversationDb.getOrCreateConversation as ReturnType<typeof mock>).mockResolvedValueOnce({
+      id: 'conv-123',
+    });
+    (codebaseDb.findCodebaseByDefaultCwd as ReturnType<typeof mock>).mockResolvedValueOnce(null);
+    (gitModule.findRepoRoot as ReturnType<typeof mock>).mockResolvedValueOnce('/test/path');
+    (registerRepository as ReturnType<typeof mock>).mockRejectedValueOnce(
+      new Error(
+        'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' +
+          '/home/test/.archon/workspaces/widget, expected /test/path'
+      )
+    );
+
+    const error = await workflowRunCommand('/test/path', 'assist', 'hello', {
+      resume: true,
+    }).catch(err => err as Error);
+
+    expect(error).toBeInstanceOf(Error);
+    expect(error.message).toContain('Cannot resume: repository registration failed.');
+    expect(error.message).toContain(
+      'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry'
+    );
+    expect(error.message).not.toContain('Not in a git repository');
+  });
+
+  it('falls back to generic workspace hint when registration error has an unrecognized shape', async () => {
+    const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery');
+    const { registerRepository } = await import('@archon/core');
+    const conversationDb = await import('@archon/core/db/conversations');
+    const codebaseDb = await import('@archon/core/db/codebases');
+    const gitModule = await import('@archon/git');
+
+    (discoverWorkflowsWithConfig as ReturnType<typeof mock>).mockResolvedValueOnce({
+      workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })],
+      errors: [],
+    });
+    (conversationDb.getOrCreateConversation as ReturnType<typeof mock>).mockResolvedValueOnce({
+      id: 'conv-123',
+    });
+    (codebaseDb.findCodebaseByDefaultCwd as ReturnType<typeof mock>).mockResolvedValueOnce(null);
+    (gitModule.findRepoRoot as ReturnType<typeof mock>).mockResolvedValueOnce('/test/path');
+    (registerRepository as ReturnType<typeof mock>).mockRejectedValueOnce(
+      new Error("EACCES: permission denied, mkdir '/home/test/.archon/workspaces/acme'")
+    );
+
+    const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch(
+      err => err as Error
+    );
+
+    expect(error).toBeInstanceOf(Error);
+    expect(error.message).toContain('Cannot create worktree: repository registration failed.');
+    expect(error.message).toContain('EACCES: permission denied');
+    // Path-separator-agnostic check: on Windows path.join normalizes to `\`,
+    // on POSIX to `/`. Assert the hint prefix + the final segment separately.
+    expect(error.message).toContain('Check your Archon workspace registration under');
+    expect(error.message).toMatch(/workspaces\b/);
+    expect(error.message).not.toContain('Remove the stale workspace entry');
+  });
+
   it('throws when isolation cannot be created due to missing codebase', async () => {
     const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery');
     const conversationDb = await import('@archon/core/db/conversations');
@@ -2272,3 +2380,51 @@ describe('workflowRunCommand — progress rendering', () => {
     expect(stderrSpy).toHaveBeenCalledWith('[slow] Completed (1m30s)\n');
   });
 });
+
+// ---------------------------------------------------------------------------
+// extractStaleWorkspaceEntry — parser edge cases
+// ---------------------------------------------------------------------------
+
+describe('extractStaleWorkspaceEntry', () => {
+  it('extracts the workspace dir from a POSIX source-symlink error', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(
+      extractStaleWorkspaceEntry(
+        'Source symlink at /home/user/.archon/workspaces/acme/widget/source already points to /other, expected /here'
+      )
+    ).toBe('/home/user/.archon/workspaces/acme/widget');
+  });
+
+  it('extracts the workspace dir from a Windows source-symlink error (backslash sep)', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(
+      extractStaleWorkspaceEntry(
+        'Source symlink at C:\\Users\\me\\.archon\\workspaces\\acme\\widget\\source already points to D:\\x, expected D:\\y'
+      )
+    ).toBe('C:\\Users\\me\\.archon\\workspaces\\acme\\widget');
+  });
+
+  it('returns null when the prefix does not match (unrelated error)', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(extractStaleWorkspaceEntry('ENOENT: no such file or directory')).toBeNull();
+  });
+
+  it('returns null when the prefix matches but the delimiter is missing', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(
+      extractStaleWorkspaceEntry('Source symlink at /some/path (truncated message)')
+    ).toBeNull();
+  });
+
+  it('returns null when the source path has no path separator at all', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(
+      extractStaleWorkspaceEntry('Source symlink at bareword already points to /x, expected /y')
+    ).toBeNull();
+  });
+
+  it('returns null on an empty input', async () => {
+    const { extractStaleWorkspaceEntry } = await import('./workflow');
+    expect(extractStaleWorkspaceEntry('')).toBeNull();
+  });
+});
diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts
index 4c28edcb65..4eb90e8731 100644
--- a/packages/cli/src/commands/workflow.ts
+++ b/packages/cli/src/commands/workflow.ts
@@ -11,6 +11,7 @@ import {
 import { WORKFLOW_EVENT_TYPES, type WorkflowEventType } from '@archon/workflows/store';
 import { configureIsolation, getIsolationProvider } from '@archon/isolation';
 import { createLogger, getArchonHome } from '@archon/paths';
+import { join } from 'node:path';
 import { createWorkflowDeps } from '@archon/core/workflows/store-adapter';
 import { discoverWorkflowsWithConfig } from '@archon/workflows/workflow-discovery';
 import { resolveWorkflowName } from '@archon/workflows/router';
@@ -77,6 +78,57 @@ function generateConversationId(): string {
   return `cli-${String(timestamp)}-${random}`;
 }
 
+/**
+ * Parses the "Source symlink at X already points to Y, expected Z" error
+ * thrown by `createProjectSourceSymlink` in @archon/paths. Cross-package
+ * string contract — if that throw site changes wording, this parser silently
+ * stops matching. Returns the workspace dir (parent of the `source` link) so
+ * the caller can emit an exact cleanup path, or null if unrecognized.
+ */
+export function extractStaleWorkspaceEntry(message: string): string | null {
+  const prefix = 'Source symlink at ';
+  const delimiter = ' already points to ';
+  if (!message.startsWith(prefix)) return null;
+
+  const remainder = message.slice(prefix.length);
+  const delimiterIndex = remainder.indexOf(delimiter);
+  if (delimiterIndex === -1) return null;
+
+  const sourcePath = remainder.slice(0, delimiterIndex).trim();
+  const lastSeparator = Math.max(sourcePath.lastIndexOf('/'), sourcePath.lastIndexOf('\\'));
+  return lastSeparator === -1 ? null : sourcePath.slice(0, lastSeparator);
+}
+
+/**
+ * Wraps a codebase auto-registration failure for either the worktree-create or
+ * resume path. Preserves the original error message and delegates hint detail
+ * to `extractStaleWorkspaceEntry`; falls back to a workspace-root pointer when
+ * the error shape is unrecognized.
+ */
+function buildRegistrationFailureError(action: string, error: Error): Error {
+  const staleWorkspaceEntry = extractStaleWorkspaceEntry(error.message);
+  let hint: string;
+  if (staleWorkspaceEntry) {
+    hint = `Hint: Remove the stale workspace entry at ${staleWorkspaceEntry} and retry, or use --no-worktree to skip isolation.`;
+  } else {
+    // Guard against a throwing getArchonHome() (misconfigured env vars, etc.):
+    // the registration error we're wrapping is the load-bearing one — we'd
+    // rather lose the exact path in the hint than replace it with a secondary
+    // home-resolution error that masks the root cause.
+    try {
+      const workspacesPath = join(getArchonHome(), 'workspaces');
+      hint = `Hint: Check your Archon workspace registration under ${workspacesPath} and retry, or use --no-worktree to skip isolation.`;
+    } catch {
+      hint =
+        'Hint: Check your Archon workspace registration and retry, or use --no-worktree to skip isolation.';
+    }
+  }
+
+  return new Error(
+    `Cannot ${action}: repository registration failed.\nError: ${error.message}\n${hint}`
+  );
+}
+
 /** Render a workflow event to stderr as a progress line. Called only when --quiet is not set. */
 function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): void {
   switch (event.type) {
@@ -285,6 +337,7 @@ export async function workflowRunCommand(
   // Try to find a codebase for this directory
   let codebase = null;
   let codebaseLookupError: Error | null = null;
+  let codebaseRegistrationError: Error | null = null;
   try {
     codebase = await codebaseDb.findCodebaseByDefaultCwd(cwd);
   } catch (error) {
@@ -330,6 +383,7 @@ export async function workflowRunCommand(
         }
       } catch (error) {
         const err = error as Error;
+        codebaseRegistrationError = err;
         getLog().warn(
           { err, errorType: err.constructor.name, repoRoot },
           'cli.codebase_auto_registration_failed'
@@ -354,6 +408,9 @@ export async function workflowRunCommand(
             'Hint: Check your database connection before using --resume.'
         );
       }
+      if (codebaseRegistrationError) {
+        throw buildRegistrationFailureError('resume', codebaseRegistrationError);
+      }
       throw new Error(
         'Cannot resume: Not in a git repository.\n' +
           'Either run from a git repo or use /clone first.'
@@ -507,6 +564,9 @@ export async function workflowRunCommand(
           'Hint: Check your database connection, or use --no-worktree to skip isolation.'
       );
     }
+    if (codebaseRegistrationError) {
+      throw buildRegistrationFailureError('create worktree', codebaseRegistrationError);
+    }
     throw new Error(
       'Cannot create worktree: not in a git repository.\n' +
         'Run from within a git repo, or use --no-worktree to skip isolation.'
diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md
index cee57df09d..ca3690937d 100644
--- a/packages/docs-web/src/content/docs/getting-started/overview.md
+++ b/packages/docs-web/src/content/docs/getting-started/overview.md
@@ -482,17 +482,19 @@ The CLI is standalone, but if you also want to interact via Telegram, Slack, Dis
 
 ## Troubleshooting
 
-### "Cannot create worktree: not in a git repository" (but the repo exists)
+### "Cannot create worktree: repository registration failed" (stale workspace symlink)
 
-The real cause is usually a stale symlink from a previous Archon run with a different path. Look for this in the error output:
+This happens when `~/.archon/workspaces/<owner>/<repo>/source` is a symlink pointing at a previous checkout (common after moving or renaming the repo). The error message includes the exact cleanup path to follow:
 
 ```
-Source symlink at ~/.archon/workspaces/.../source already points to <old-path>, expected <new-path>
+Cannot create worktree: repository registration failed.
+Error: Source symlink at ~/.archon/workspaces/<owner>/<repo>/source already points to <old-path>, expected <new-path>
+Hint: Remove the stale workspace entry at ~/.archon/workspaces/<owner>/<repo> and retry, or use --no-worktree to skip isolation.
 ```
 
-Fix it by manually deleting the stale workspace folder at `~/.archon/workspaces/<github-user>/<repo-name>` and retrying the command.
+Follow the hint — delete the stale workspace folder and re-run, or pass `--no-worktree` to skip isolation for one run.
 
-> In the future, `archon isolation cleanup` will handle this automatically.
+> On Archon versions before this fix, the same root cause surfaced as the misleading "Cannot create worktree: not in a git repository" (even though the repo was valid). If you see that string, upgrade and you'll get the actionable message above.
 
 ---
 

From f7a043db9ba0813bd8b04bea238599fa64d72a53 Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:15:41 +0300
Subject: [PATCH 11/14] fix(server,web,workflows): web approval gates
 auto-resume + reject-with-reason dialog (#1329)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(server,web,workflows): web approval gates auto-resume + reject-with-reason dialog

Fixes three tightly-coupled bugs that made web approval gates unusable:

1. orchestrator-agent did not pass parentConversationId to executeWorkflow
   for any web-dispatched foreground / interactive / resumable run. Without
   that field, findResumableRunByParentConversation (the machinery the CLI
   relies on for resume) couldn't find the paused run from the same
   conversation on a follow-up message, and the approve/reject API handlers
   had no conversation to dispatch back to.

2. POST /api/workflows/runs/:runId/{approve,reject} recorded the decision
   and returned "Send a message to continue the workflow." — the workflow
   never actually resumed. Added tryAutoResumeAfterGate() that mirrors what
   workflowApproveCommand / workflowRejectCommand already do on the CLI:
   look up the parent conversation, dispatch `/workflow run <name>
   <userMessage>` back through dispatchToOrchestrator. Failures are
   non-fatal — the user can still send a manual message as a fallback.

3. The during-streaming cancel-check in dag-executor aborted any streaming
   node whenever the run status left 'running', including the legitimate
   transition to 'paused' that an approval node performs. A concurrent AI
   node in the same DAG layer now tolerates 'paused' and finishes its own
   stream; only truly terminal / unknown states (null, cancelled, failed,
   completed) abort the in-flight stream.

Web UI: ConfirmRunActionDialog gains an optional reasonInput prop (label +
placeholder) that renders a textarea and passes the trimmed value to
onConfirm. WorkflowRunCard (dashboard) and WorkflowProgressCard (chat)
both use it for Reject now — the chat card was still on window.confirm,
which was both inconsistent with the dashboard and couldn't collect a
reason. The trimmed reason threads through to $REJECTION_REASON in the
workflow's on_reject prompt.

Supersedes #1147. @jonasvanderhaegen surfaced the root cause and shape of
the fix; that PR was 87 commits stale and pre-dated the reject-UX upgrade
(#1261 area), so this is a fresh re-do on current dev.

Tests:
- packages/server/src/routes/api.workflow-runs.test.ts — 5 new cases:
  approve with parent dispatches; approve without parent returns "Send a
  message"; approve with deleted parent conversation skips safely; reject
  dispatches on-reject flows; reject that cancels (no on_reject) does NOT
  dispatch.
- packages/core/src/orchestrator/orchestrator.test.ts — updated the two
  synthesizedPrompt-dispatch tests for the new executeWorkflow arity.

Closes #1131.

Co-authored-by: Jonas Vanderhaegen <7755555+jonasvanderhaegen@users.noreply.github.com>

* fix: address multi-agent review findings for web approval auto-resume

C1 (critical) — cross-adapter misrouting guard
  tryAutoResumeAfterGate now checks parentConv.platform_type === 'web'
  before dispatching. Non-web parents (Slack/Telegram/GitHub/Discord)
  being approved from the dashboard skip auto-resume rather than
  dispatching a Slack thread_ts or Telegram chat_id through the web
  adapter's lock manager.

C2 (critical) — fire-and-forget dispatch replaced with await
  void dispatchToOrchestrator() meant the "Resuming workflow." response
  fired before async work completed, and the outer try/catch couldn't
  observe dispatch failures. Changed to await; response now accurately
  reflects dispatch outcome.

I1 — replaced logPrefix string-template (which produced 3-segment
  api.workflow_*.dispatched event names violating {domain}.{action}_{state})
  with literal event names per action, branched inside the helper.
  Accepts action: 'approve' | 'reject' instead.

I2 — corrected misleading "foreground/interactive" qualifier in the
  approve-endpoint comment; background web dispatches also set
  parent_conversation_id via the pre-created run, so they auto-resume too.

I3 — extracted shouldContinueStreamingForStatus() as a small exported
  policy and added 7 unit tests covering running/paused/null/cancelled/
  failed/completed/unknown. Full-integration coverage of the paused-
  tolerance invariant would require manipulating the 10s
  CANCEL_CHECK_INTERVAL_MS, which is flaky-prone; unit test of the
  policy function captures the same invariant deterministically.

I4 — updated approval-nodes.md and authoring-workflows.md to reflect
  that Web UI approve/reject now auto-resumes (no "send a follow-up
  message" copy), documented the reject-with-reason dialog and
  $REJECTION_REASON flow, and called out the cross-platform caveat.

S1 — rewrote streaming status check as positive shouldContinue safe-list
  via the extracted policy function, matching the inline comment.

S2 — inlined handleReject on the dashboard rather than squeezing
  rejectWorkflowRun through runAction with a closure; keeps runAction
  narrow for the single-arg lifecycle actions.

S5 — new regression test covering the non-web-parent skip path
  (slack-platform parent → dispatch skipped → response falls back to
  "Send a message to continue").

S6 — removed stale reference to runAction in ConfirmRunActionDialog's
  onConfirm JSDoc (no longer accurate now that WorkflowProgressCard
  calls the dialog without runAction).

S7 — fixed misleading "user can resume manually by sending any message"
  docstring (resume is triggered by re-running the workflow command,
  not by an arbitrary message).

Skipped as out-of-scope:
  S3 — cancelWorkflowRun rowCount check (pre-existing defect; separate PR)
  S4 — tightening expect.anything() to UUID regex (deferred)
  S8 — 12-positional-arg executeWorkflow → options-bag refactor
    (tracked follow-up)

bun run validate green locally; 68 tests in api.workflow-runs.test.ts
(up from 67), 173 in dag-executor.test.ts (up from 166).

* review: close I1/I2/I3/I4/I6 — paused tolerance in loop + emitter, resume test, useId

I1 (loop inter-iteration check) — dag-executor.ts:1715
  Used `!== 'running'` in the loop node's between-iteration status check.
  A sibling approval node pausing the run in the same topological layer
  would abort the loop mid-iteration with "Loop node '<id>' stopped at
  iteration N (paused)". Switched to the shared shouldContinueStreamingForStatus
  helper so paused is tolerated — same semantics the streaming check got.
  Extended inline comment explains the sibling-layer concurrency reason.

I2 (skipIfStatusChanged emitter unregister) — dag-executor.ts:2886
  At DAG-finalization writes the helper correctly skipped writing on any
  non-running state (paused included — don't mark a paused run complete),
  but it *also* called getWorkflowEventEmitter().unregisterRun() which
  broke SSE observability for a run that's still live (waiting for user
  approval). Split the two responsibilities: skip the write for all
  non-running states, but only unregister the emitter for terminal states
  (cancelled / deleted / completed / failed). `paused` keeps the emitter
  registered so resume stays visible on the dashboard.

I3 (foreground_resume_detected branch untested) — orchestrator-agent.test.ts
  That branch was modified as part of the original fix (added
  parentConversationId as 11th positional arg) but no existing test
  configured mockFindResumableRunByParentConversation to return non-null.
  A positional mistake (e.g. accidentally swapping issueContext and
  parentConversationId) would silently break auto-resume with no failing
  test. New regression test configures the mock, asserts both the cwd
  comes from the resumable run's working_path AND parentConversationId
  is passed correctly at position 10.

I4 (null-parent log level) — api.ts tryAutoResumeAfterGate
  `getConversationById` returning null is a data-integrity signal (the
  parent conversation was deleted while the run was paused) — worth
  surfacing at info level so operators notice, not hiding at debug.
  Missing platform_conversation_id on an existing row would be an unusual
  DB state and stays at debug. Added `parentDeleted: boolean` to the log
  context so the two cases are distinguishable in observability.

I6 (hardcoded DOM id) — ConfirmRunActionDialog.tsx
  `id="confirm-run-action-reason"` collided when multiple dialog instances
  share the same page (Radix portals mitigate in practice but the code
  was fragile). Switched to React.useId() so each instance gets a unique
  id — htmlFor/id wiring preserved.

S11 (arity-only assertion) — orchestrator-agent.test.ts:1092 area
  The interactive-workflow-on-web test asserted mockExecuteWorkflow was
  called, but nothing about the args. Added a specific assertion that
  position 10 (parentConversationId) equals 'conv-1' (the caller
  conversation id) — pins the wiring that I1/I2 depend on being correct.

Deferred (from review S1-S10, I5, I7):
  - S1 (ExecuteWorkflowOptions bag) — tracked as standalone follow-up;
    12 positional args with 2 adjacent optionals is a real maintenance
    hazard but the refactor deserves its own PR.
  - S7 (WHY comment on non-web else branch) — review text says the branch
    "correctly omits" parentConversationId but the code passes it; the
    combination with the web-parent guard in tryAutoResumeAfterGate is
    intentional. Not adding a justify-what-we-don't-do comment.
  - S2/S3/S4/S5/S8/S9/S10 — pure polish (event-map ternary, platformConvId
    inlining, shared constant for REJECTION_REASON_INPUT, onChange arrow
    shorthand, discriminated union, docblock trim, suffix comment drop)
  - I5 (soften "Resuming workflow." to "— check the dashboard for progress")
    — users clicking from the dashboard are already on the dashboard; the
    current text is accurate (enqueue completed) and concise.
  - I7 (test dispatch-throws path) — covered implicitly by the try/catch
    branch of tryAutoResumeAfterGate returning false; a direct test would
    require mocking handleMessage to throw and would couple to
    dispatchToOrchestrator internals.

bun run validate green; 189 dag-executor tests, 98 orchestrator-agent
tests, 68 api.workflow-runs tests — all the new cases pass.

---------

Co-authored-by: Jonas Vanderhaegen <7755555+jonasvanderhaegen@users.noreply.github.com>
(cherry picked from commit d5c1cd960546ea934a4bc9dcf5988e7ed75c3310)
---
 CHANGELOG.md                                  |   1 +
 .../orchestrator/orchestrator-agent.test.ts   |  36 ++++
 .../src/orchestrator/orchestrator-agent.ts    |  15 +-
 .../src/orchestrator/orchestrator.test.ts     |  10 +-
 .../src/content/docs/guides/approval-nodes.md |  20 +-
 .../docs/guides/authoring-workflows.md        |   8 +-
 packages/server/src/routes/api.ts             | 115 ++++++++++-
 .../src/routes/api.workflow-runs.test.ts      | 186 +++++++++++++++++-
 .../components/chat/WorkflowProgressCard.tsx  |  40 ++--
 .../dashboard/ConfirmRunActionDialog.tsx      |  66 ++++++-
 .../components/dashboard/WorkflowRunCard.tsx  |  15 +-
 .../components/dashboard/WorkflowRunGroup.tsx |   2 +-
 packages/web/src/routes/DashboardPage.tsx     |  15 +-
 packages/workflows/src/dag-executor.test.ts   |  46 +++++
 packages/workflows/src/dag-executor.ts        |  58 +++++-
 15 files changed, 579 insertions(+), 54 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9663bd5431..6d541d13da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053.
 - **Stale workspace symlink no longer reported as "not in a git repository" by the CLI.** When `archon workflow run` (or `--resume`) is invoked from a valid git repo whose `~/.archon/workspaces/<owner>/<repo>/source` symlink points somewhere else (common after moving/renaming the checkout), auto-registration fails but the repo is fine. Previously both the worktree-creation and resume paths fell through to the generic `Cannot create worktree: not in a git repository` / `Cannot resume: Not in a git repository` errors — a lie that sent users down the wrong diagnostic path. Both sites now preserve the registration error and throw `Cannot {create worktree,resume}: repository registration failed.` with the original cause and a concrete cleanup hint (`Remove the stale workspace entry at <path> and retry`) when the failure matches the `createProjectSourceSymlink()` shape. Credits @Bortlesboat for identifying the root cause and the parser approach in #1157. Closes #1146.
 - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon <run-id>` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216.
+- **Web UI approval gates now auto-resume.** Previously, clicking Approve or Reject on a paused workflow from the Web UI only recorded the decision — the workflow never continued, and the user had to send a follow-up chat message (or use the CLI) to resume. Three fixes: (1) orchestrator-agent now threads `parentConversationId` through `executeWorkflow` for every web dispatch, (2) the `POST /approve` and `POST /reject` API handlers dispatch `/workflow run <name> <userMessage>` back through the orchestrator when `parent_conversation_id` is set and points at a web-platform parent (mirrors `workflowApproveCommand`/`workflowRejectCommand` on the CLI; non-web parents skip the auto-resume to prevent cross-adapter misrouting), and (3) the during-streaming status check in the DAG executor tolerates the `paused` state so a concurrent AI node in the same topological layer finishes its own stream rather than being aborted when a sibling approval node pauses the run. The Web UI reject button uses the proper `ConfirmRunActionDialog` with an optional reason textarea (was `window.confirm` in the chat card, and lacked a reason input on the dashboard) — the trimmed reason propagates to `$REJECTION_REASON` in the workflow's `on_reject` prompt. Credits @jonasvanderhaegen for surfacing and diagnosing the bug in #1147 (that PR was 87 commits stale on a dev that had since refactored the reject UX; this is a fresh re-do on current `dev`). Closes #1131.
 
 ### Changed
 
diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts
index ab8165ca7e..3a4a1299c9 100644
--- a/packages/core/src/orchestrator/orchestrator-agent.test.ts
+++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts
@@ -1099,6 +1099,42 @@ describe('workflow dispatch routing — interactive flag', () => {
 
     expect(mockExecuteWorkflow).toHaveBeenCalled();
     expect(mockDispatchBackgroundWorkflow).not.toHaveBeenCalled();
+    // Regression for the auto-resume plumbing: the interactive web dispatch
+    // must pass the caller conversation's DB id as parentConversationId
+    // (11th positional arg) so the approve/reject API handlers can dispatch
+    // resume back through the orchestrator.
+    const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[];
+    expect(callArgs[10]).toBe('conv-1'); // parentConversationId = conversation.id
+  });
+
+  test('foreground_resume_detected: passes parentConversationId to executeWorkflow when a resumable run exists', async () => {
+    // Regression for the foreground-resume branch added as part of the
+    // auto-resume fix: when `findResumableRunByParentConversation` returns a
+    // paused run, the orchestrator picks the working_path from that run and
+    // must still carry parentConversationId forward so the API helpers can
+    // keep dispatching resume on subsequent approvals.
+    mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(makeDispatchConversation()));
+    mockGetCodebase.mockReturnValueOnce(Promise.resolve(makeDispatchCodebase()));
+    mockHandleCommand.mockReturnValueOnce(Promise.resolve(makeWorkflowResult(true)));
+    mockFindResumableRunByParentConversation.mockReturnValueOnce(
+      Promise.resolve({
+        id: 'resumable-run-1',
+        workflow_name: 'test-workflow',
+        working_path: '/repos/test-repo/worktrees/feature',
+        parent_conversation_id: 'conv-1',
+        status: 'failed',
+      })
+    );
+
+    const platform = makePlatform(); // getPlatformType returns 'web'
+    await handleMessage(platform, 'conv-1', '/workflow run test-workflow');
+
+    expect(mockExecuteWorkflow).toHaveBeenCalled();
+    const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[];
+    // cwd (position 3) should come from the resumable run's working_path
+    expect(callArgs[3]).toBe('/repos/test-repo/worktrees/feature');
+    // parentConversationId (position 10) should still be the caller conversation id
+    expect(callArgs[10]).toBe('conv-1');
   });
 
   test('calls dispatchBackgroundWorkflow for non-interactive workflow on web', async () => {
diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts
index d5eb9397b3..292f0e0ad8 100644
--- a/packages/core/src/orchestrator/orchestrator-agent.ts
+++ b/packages/core/src/orchestrator/orchestrator-agent.ts
@@ -281,7 +281,10 @@ async function dispatchOrchestratorWorkflow(
         workflow,
         userMessage,
         conversation.id,
-        codebase.id
+        codebase.id,
+        undefined, // issueContext
+        undefined, // isolationContext
+        conversation.id // parentConversationId — enables approve/reject auto-resume
       );
     } else if (workflow.interactive) {
       // Interactive workflows run in foreground so output stays in the user's conversation
@@ -293,7 +296,10 @@ async function dispatchOrchestratorWorkflow(
         workflow,
         userMessage,
         conversation.id,
-        codebase.id
+        codebase.id,
+        undefined, // issueContext
+        undefined, // isolationContext
+        conversation.id // parentConversationId — enables approve/reject auto-resume
       );
     } else {
       await dispatchBackgroundWorkflow(
@@ -319,7 +325,10 @@ async function dispatchOrchestratorWorkflow(
       workflow,
       userMessage,
       conversation.id,
-      codebase.id
+      codebase.id,
+      undefined, // issueContext
+      undefined, // isolationContext
+      conversation.id // parentConversationId — enables approve/reject auto-resume
     );
   }
 }
diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts
index f8f199a5de..bd0caf3bf8 100644
--- a/packages/core/src/orchestrator/orchestrator.test.ts
+++ b/packages/core/src/orchestrator/orchestrator.test.ts
@@ -1078,7 +1078,10 @@ describe('orchestrator-agent handleMessage', () => {
         expect.anything(), // workflow
         synthesized, // synthesizedPrompt, not original message
         expect.anything(), // conversation.id
-        expect.anything() // codebase.id
+        expect.anything(), // codebase.id
+        undefined, // issueContext
+        undefined, // isolationContext
+        expect.anything() // parentConversationId — web approval auto-resume
       );
     });
 
@@ -1103,7 +1106,10 @@ describe('orchestrator-agent handleMessage', () => {
         expect.anything(),
         'fix the login bug', // original message used as fallback
         expect.anything(),
-        expect.anything()
+        expect.anything(),
+        undefined, // issueContext
+        undefined, // isolationContext
+        expect.anything() // parentConversationId — web approval auto-resume
       );
     });
 
diff --git a/packages/docs-web/src/content/docs/guides/approval-nodes.md b/packages/docs-web/src/content/docs/guides/approval-nodes.md
index 42ebc48fec..c48f8c4856 100644
--- a/packages/docs-web/src/content/docs/guides/approval-nodes.md
+++ b/packages/docs-web/src/content/docs/guides/approval-nodes.md
@@ -55,9 +55,9 @@ to the user on whatever platform they're using (CLI, Slack, GitHub, etc.). On th
    block the worktree path guard (no other workflow can start on the same path).
 4. **Approve**: The user approves, which writes a `node_completed` event for
    the approval node and transitions the run to resumable. Natural-language
-   messages (recommended) and the CLI auto-resume immediately. The explicit
-   `/workflow approve` command records the approval; send a follow-up message
-   to resume.
+   messages, the CLI, and the Web UI approve button all auto-resume the
+   workflow from the paused gate. (The explicit `/workflow approve <run-id>`
+   slash command also auto-resumes when issued in the originating conversation.)
 5. **Reject**: The user rejects.
    - **Without `on_reject`**: The workflow is cancelled immediately.
    - **With `on_reject`**: The executor runs the `on_reject.prompt` via AI (with
@@ -140,7 +140,19 @@ bun run cli workflow reject <run-id> --reason "Plan needs more test coverage"
 ### Web UI
 
 Paused workflows show an amber pulsing badge on the dashboard. Click **Approve**
-or **Reject** directly on the workflow card.
+or **Reject** directly on the workflow card. Both actions auto-resume the
+workflow from the paused gate — no follow-up message required.
+
+**Reject with reason**: the Reject dialog includes an optional free-text
+reason field. The trimmed value (empty after trim → omitted) is passed to
+the workflow as `$REJECTION_REASON`, available in the `on_reject.prompt`.
+Rejects on web and chat cards use the same confirmation dialog.
+
+**Cross-platform caveat**: auto-resume via the Web UI only applies when the
+run was originally dispatched from the Web UI (parent conversation is a web
+conversation). If you approve a Slack / Telegram / GitHub-dispatched run
+from the dashboard, the decision is recorded, but the resume flow has to
+happen in the originating platform (re-run the workflow there).
 
 ### REST API
 
diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md
index c4fdfc7830..4fcb6d5238 100644
--- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md
+++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md
@@ -977,12 +977,12 @@ nodes:
 When the workflow reaches `review-gate`, it pauses and notifies you. Approve or reject via:
 
 - **Natural language** (recommended): Just type your response in the conversation — the system detects the paused workflow and auto-resumes
-- **CLI**: `bun run cli workflow approve <run-id>` or `bun run cli workflow reject <run-id>`
-- **Explicit command**: `/workflow approve <run-id>` or `/workflow reject <run-id>` (records approval; send a follow-up message to resume)
-- **Web UI**: Click the Approve/Reject buttons on the dashboard card
+- **CLI**: `bun run cli workflow approve <run-id>` or `bun run cli workflow reject <run-id>` — auto-resumes
+- **Explicit command**: `/workflow approve <run-id>` or `/workflow reject <run-id>` — auto-resumes when issued in the originating conversation
+- **Web UI**: Click the Approve/Reject buttons on the dashboard card — auto-resumes for Web-UI-dispatched runs; the Reject dialog includes an optional reason field that flows to `$REJECTION_REASON`
 - **API**: `POST /api/workflows/runs/<run-id>/approve` or `/reject`
 
-After approval via natural language or CLI, the workflow auto-resumes from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node.
+All four paths auto-resume the workflow from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node. Cross-platform caveat: Web-UI approvals on Slack / Telegram / GitHub-dispatched runs record the decision but do not auto-resume — re-run from the originating platform to continue.
 
 Without `on_reject`: rejecting cancels the workflow.
 With `on_reject`: rejecting triggers an AI rework prompt and re-pauses for re-review.
diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts
index 6448c77318..4832e06b61 100644
--- a/packages/server/src/routes/api.ts
+++ b/packages/server/src/routes/api.ts
@@ -51,7 +51,7 @@ import {
   RESUMABLE_WORKFLOW_STATUSES,
   TERMINAL_WORKFLOW_STATUSES,
 } from '@archon/workflows/schemas/workflow-run';
-import type { ApprovalContext } from '@archon/workflows/schemas/workflow-run';
+import type { ApprovalContext, WorkflowRun } from '@archon/workflows/schemas/workflow-run';
 import { findMarkdownFilesRecursive } from '@archon/core/utils/commands';
 
 /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */
@@ -1051,6 +1051,95 @@ export function registerApiRoutes(
     return { accepted: true, status: result.status };
   }
 
+  /**
+   * Re-enter the orchestrator after a paused approval gate is resolved, so a
+   * web-dispatched workflow continues (approve) or runs its on_reject prompt
+   * (reject) without the user having to re-run the workflow command. The CLI's
+   * `workflowApproveCommand` / `workflowRejectCommand` already auto-resume via
+   * `workflowRunCommand({ resume: true })`; this is the web-side equivalent.
+   *
+   * Returns `true` when a resume dispatch was initiated, `false` otherwise (no
+   * parent conversation on the run, parent conversation deleted, parent was on
+   * a non-web platform, or dispatch threw). Failures are non-fatal: the gate
+   * decision is recorded regardless; when this returns `false` the response
+   * text instructs the user to re-run the workflow command.
+   *
+   * **Cross-adapter guard**: only web-sourced parents qualify.
+   * `dispatchToOrchestrator` is wired to the web adapter + its lock manager,
+   * so a Slack / Telegram / GitHub / Discord run being approved from the
+   * dashboard must not route through it — the Slack thread would never see
+   * the resumed output. Non-web parents skip auto-resume and the originating
+   * platform's own re-run flow applies.
+   */
+  async function tryAutoResumeAfterGate(
+    run: WorkflowRun,
+    action: 'approve' | 'reject'
+  ): Promise<boolean> {
+    if (!run.parent_conversation_id) return false;
+    // Literal event names per action — greppable for ops tooling. Keeping the
+    // branch explicit rather than templating avoids the earlier 3-segment
+    // `api.workflow_*.dispatched` shape that broke `{domain}.{action}_{state}`.
+    const events =
+      action === 'approve'
+        ? {
+            dispatched: 'api.workflow_approve_auto_resume_dispatched' as const,
+            skippedNoPlatformConv:
+              'api.workflow_approve_auto_resume_skipped_no_platform_conv' as const,
+            skippedNonWebParent: 'api.workflow_approve_auto_resume_skipped_non_web_parent' as const,
+            failed: 'api.workflow_approve_auto_resume_failed' as const,
+          }
+        : {
+            dispatched: 'api.workflow_reject_auto_resume_dispatched' as const,
+            skippedNoPlatformConv:
+              'api.workflow_reject_auto_resume_skipped_no_platform_conv' as const,
+            skippedNonWebParent: 'api.workflow_reject_auto_resume_skipped_non_web_parent' as const,
+            failed: 'api.workflow_reject_auto_resume_failed' as const,
+          };
+    try {
+      const parentConv = await conversationDb.getConversationById(run.parent_conversation_id);
+      const platformConvId = parentConv?.platform_conversation_id;
+      if (!platformConvId) {
+        // parentConv === null is a data-integrity signal (the parent
+        // conversation was deleted while the run was paused) — worth
+        // surfacing at info level so operators notice. Missing
+        // platform_conversation_id on an existing row shouldn't happen and
+        // stays at debug.
+        const logFn =
+          parentConv === null ? getLog().info.bind(getLog()) : getLog().debug.bind(getLog());
+        logFn(
+          {
+            runId: run.id,
+            parentConversationId: run.parent_conversation_id,
+            parentDeleted: parentConv === null,
+          },
+          events.skippedNoPlatformConv
+        );
+        return false;
+      }
+      if (parentConv.platform_type !== 'web') {
+        getLog().debug(
+          {
+            runId: run.id,
+            parentConversationId: run.parent_conversation_id,
+            platformType: parentConv.platform_type,
+          },
+          events.skippedNonWebParent
+        );
+        return false;
+      }
+      const resumeMessage = `/workflow run ${run.workflow_name} ${run.user_message ?? ''}`.trim();
+      await dispatchToOrchestrator(platformConvId, resumeMessage);
+      getLog().info(
+        { runId: run.id, workflowName: run.workflow_name, platformConvId },
+        events.dispatched
+      );
+      return true;
+    } catch (err) {
+      getLog().warn({ err: err as Error, runId: run.id }, events.failed);
+      return false;
+    }
+  }
+
   // GET /api/conversations - List conversations
   registerOpenApiRoute(getConversationsRoute, async c => {
     try {
@@ -1910,9 +1999,20 @@ export function registerApiRoutes(
         status: 'failed',
         metadata: metadataUpdate,
       });
+
+      // Auto-resume: dispatch to the orchestrator so the workflow continues
+      // without requiring the user to re-run the workflow command. Mirrors
+      // what `workflowApproveCommand` does in the CLI. Requires
+      // `parent_conversation_id` on the run (set by orchestrator-agent for any
+      // web-dispatched workflow — foreground, interactive, and background via
+      // the pre-created run) and a web-platform parent (guarded in the helper).
+      const autoResumed = await tryAutoResumeAfterGate(run, 'approve');
+
       return c.json({
         success: true,
-        message: `Workflow approved: ${run.workflow_name}. Send a message to continue the workflow.`,
+        message: autoResumed
+          ? `Workflow approved: ${run.workflow_name}. Resuming workflow.`
+          : `Workflow approved: ${run.workflow_name}. Send a message to continue.`,
       });
     } catch (error) {
       getLog().error({ err: error, runId }, 'api.workflow_run_approve_failed');
@@ -1956,9 +2056,18 @@ export function registerApiRoutes(
           status: 'failed',
           metadata: { rejection_reason: reason, rejection_count: currentCount + 1 },
         });
+
+        // Auto-resume: dispatch to the orchestrator so the on_reject prompt runs
+        // without requiring the user to re-run the workflow command. Mirrors
+        // what `workflowRejectCommand` does in the CLI. Same cross-adapter
+        // guard as approve — only web parents auto-resume.
+        const autoResumed = await tryAutoResumeAfterGate(run, 'reject');
+
         return c.json({
           success: true,
-          message: `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`,
+          message: autoResumed
+            ? `Workflow rejected: ${run.workflow_name}. Running on-reject prompt.`
+            : `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`,
         });
       }
 
diff --git a/packages/server/src/routes/api.workflow-runs.test.ts b/packages/server/src/routes/api.workflow-runs.test.ts
index 41bee85003..8d837d3623 100644
--- a/packages/server/src/routes/api.workflow-runs.test.ts
+++ b/packages/server/src/routes/api.workflow-runs.test.ts
@@ -22,7 +22,8 @@ const mockGetWorkflowRunByWorkerPlatformId = mock(
 );
 const mockListWorkflowEvents = mock(async (_runId: string) => [] as MockWorkflowEvent[]);
 const mockGetConversationById = mock(
-  async (_id: string) => null as null | { id: string; platform_conversation_id: string }
+  async (_id: string) =>
+    null as null | { id: string; platform_conversation_id: string; platform_type: string }
 );
 const mockFindConversationByPlatformId = mock(
   async (_id: string) =>
@@ -1362,3 +1363,186 @@ describe('POST /api/workflows/runs/:runId/reject', () => {
     expect(mockUpdateWorkflowRun).not.toHaveBeenCalled();
   });
 });
+
+// ---------------------------------------------------------------------------
+// Auto-resume: approve/reject endpoints dispatch to orchestrator when the run
+// has parent_conversation_id set (web-dispatched foreground/interactive
+// workflows). Mirrors what the CLI does in workflowApproveCommand/RejectCommand.
+// ---------------------------------------------------------------------------
+
+describe('approve/reject auto-resume', () => {
+  beforeEach(() => {
+    mockGetWorkflowRun.mockReset();
+    mockUpdateWorkflowRun.mockReset();
+    mockCreateWorkflowEvent.mockReset();
+    mockGetConversationById.mockReset();
+    mockHandleMessage.mockReset();
+    mockCancelWorkflowRun.mockReset();
+  });
+
+  test('approve: dispatches resume when parent_conversation_id is set', async () => {
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      id: 'run-auto-resume-approve',
+      parent_conversation_id: 'parent-conv-uuid',
+      user_message: 'Deploy feature X',
+    });
+    mockGetConversationById.mockResolvedValueOnce({
+      id: 'parent-conv-uuid',
+      platform_conversation_id: 'web-plat-abc',
+      platform_type: 'web',
+    });
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-auto-resume-approve/approve', {
+      method: 'POST',
+      body: JSON.stringify({ comment: 'LGTM' }),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    const body = (await response.json()) as { message: string };
+    expect(body.message).toContain('Resuming workflow');
+
+    // dispatchToOrchestrator → lockManager → handleMessage
+    expect(mockHandleMessage).toHaveBeenCalled();
+    const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [
+      unknown,
+      string,
+      string,
+    ];
+    expect(platformConvId).toBe('web-plat-abc');
+    expect(dispatchedMessage).toBe('/workflow run deploy Deploy feature X');
+  });
+
+  test('approve: skips dispatch when parent_conversation_id is null (CLI-dispatched run)', async () => {
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      parent_conversation_id: null,
+    });
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-paused-1/approve', {
+      method: 'POST',
+      body: JSON.stringify({ comment: 'LGTM' }),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    const body = (await response.json()) as { message: string };
+    expect(body.message).toContain('Send a message to continue');
+    expect(mockHandleMessage).not.toHaveBeenCalled();
+    expect(mockGetConversationById).not.toHaveBeenCalled();
+  });
+
+  test('approve: skips dispatch when parent conversation no longer exists', async () => {
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      parent_conversation_id: 'deleted-conv-uuid',
+    });
+    mockGetConversationById.mockResolvedValueOnce(null); // conversation deleted
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-paused-1/approve', {
+      method: 'POST',
+      body: JSON.stringify({}),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    const body = (await response.json()) as { message: string };
+    expect(body.message).toContain('Send a message to continue');
+    expect(mockHandleMessage).not.toHaveBeenCalled();
+  });
+
+  test('approve: skips dispatch when parent conversation is on a non-web platform', async () => {
+    // A Slack/Telegram/GitHub-sourced run being approved via the dashboard
+    // must not route through dispatchToOrchestrator — that helper is wired
+    // to the web adapter + lock manager, so dispatching a Slack thread_ts
+    // or Telegram chat_id would misroute through the wrong adapter.
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      parent_conversation_id: 'slack-parent-conv-uuid',
+    });
+    mockGetConversationById.mockResolvedValueOnce({
+      id: 'slack-parent-conv-uuid',
+      platform_conversation_id: '1234567890.123456', // a Slack thread_ts
+      platform_type: 'slack',
+    });
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-paused-1/approve', {
+      method: 'POST',
+      body: JSON.stringify({ comment: 'LGTM' }),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    const body = (await response.json()) as { message: string };
+    // Same fallback text as no-parent case — user re-runs from the originating platform.
+    expect(body.message).toContain('Send a message to continue');
+    expect(mockHandleMessage).not.toHaveBeenCalled();
+  });
+
+  test('reject: dispatches resume for on_reject flows when parent is set', async () => {
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      id: 'run-auto-resume-reject',
+      parent_conversation_id: 'parent-conv-uuid',
+      user_message: 'Review PR',
+      metadata: {
+        approval: {
+          type: 'approval',
+          nodeId: 'review-gate',
+          message: 'Approve?',
+          onRejectPrompt: 'Fix: $REJECTION_REASON',
+          onRejectMaxAttempts: 3,
+        },
+        rejection_count: 0,
+      },
+    });
+    mockGetConversationById.mockResolvedValueOnce({
+      id: 'parent-conv-uuid',
+      platform_conversation_id: 'web-plat-xyz',
+      platform_type: 'web',
+    });
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-auto-resume-reject/reject', {
+      method: 'POST',
+      body: JSON.stringify({ reason: 'tests missing' }),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    const body = (await response.json()) as { message: string };
+    expect(body.message).toContain('Running on-reject prompt');
+    expect(mockHandleMessage).toHaveBeenCalled();
+    const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [
+      unknown,
+      string,
+      string,
+    ];
+    expect(platformConvId).toBe('web-plat-xyz');
+    expect(dispatchedMessage).toBe('/workflow run deploy Review PR');
+  });
+
+  test('reject: does NOT dispatch when the run is being cancelled (no on_reject configured)', async () => {
+    mockGetWorkflowRun.mockResolvedValueOnce({
+      ...MOCK_PAUSED_RUN,
+      parent_conversation_id: 'parent-conv-uuid', // set, but doesn't matter — reject cancels
+    });
+
+    const { app } = makeApp();
+    const response = await app.request('/api/workflows/runs/run-paused-1/reject', {
+      method: 'POST',
+      body: JSON.stringify({ reason: 'no' }),
+      headers: { 'Content-Type': 'application/json' },
+    });
+
+    expect(response.status).toBe(200);
+    // Cancellation path doesn't auto-resume — nothing to resume to.
+    expect(mockHandleMessage).not.toHaveBeenCalled();
+    expect(mockCancelWorkflowRun).toHaveBeenCalledWith('run-paused-1');
+  });
+});
diff --git a/packages/web/src/components/chat/WorkflowProgressCard.tsx b/packages/web/src/components/chat/WorkflowProgressCard.tsx
index bb65471f3b..44eb70af74 100644
--- a/packages/web/src/components/chat/WorkflowProgressCard.tsx
+++ b/packages/web/src/components/chat/WorkflowProgressCard.tsx
@@ -5,6 +5,7 @@ import { CheckCircle, ChevronRight, Loader2, Pause, XCircle } from 'lucide-react
 import { cn } from '@/lib/utils';
 import { approveWorkflowRun, getWorkflowRunByWorker, rejectWorkflowRun } from '@/lib/api';
 import { useWorkflowStore } from '@/stores/workflow-store';
+import { ConfirmRunActionDialog } from '@/components/dashboard/ConfirmRunActionDialog';
 import { StatusIcon } from '@/components/workflows/StatusIcon';
 import { formatDurationMs } from '@/lib/format';
 import { isTerminalStatus } from '@/lib/workflow-utils';
@@ -87,7 +88,7 @@ export function WorkflowProgressCard({
     mutationFn: () => approveWorkflowRun(runId ?? ''),
   });
   const rejectMutation = useMutation({
-    mutationFn: () => rejectWorkflowRun(runId ?? ''),
+    mutationFn: (reason?: string) => rejectWorkflowRun(runId ?? '', reason),
   });
   const mutationError = approveMutation.error ?? rejectMutation.error;
 
@@ -220,18 +221,33 @@ export function WorkflowProgressCard({
                   <CheckCircle className="h-3.5 w-3.5" />
                   Approve
                 </button>
-                <button
-                  onClick={() => {
-                    if (window.confirm(`Reject workflow "${workflowName}"?`)) {
-                      rejectMutation.mutate();
-                    }
+                <ConfirmRunActionDialog
+                  trigger={
+                    <button
+                      disabled={!runId || approveMutation.isPending || rejectMutation.isPending}
+                      className="flex items-center gap-1 rounded-md px-2 py-1 text-xs text-error/80 hover:bg-error/10 hover:text-error transition-colors disabled:opacity-50"
+                    >
+                      <XCircle className="h-3.5 w-3.5" />
+                      Reject
+                    </button>
+                  }
+                  title="Reject workflow?"
+                  description={
+                    <>
+                      Reject the paused workflow <strong>{workflowName}</strong>. If the approval
+                      node defines an <code>on_reject</code> prompt, it runs with your reason as{' '}
+                      <code>$REJECTION_REASON</code>; otherwise the run is cancelled.
+                    </>
+                  }
+                  confirmLabel="Reject"
+                  reasonInput={{
+                    label: 'Reason (optional)',
+                    placeholder: 'Why are you rejecting? Visible to the on_reject prompt.',
                   }}
-                  disabled={!runId || approveMutation.isPending || rejectMutation.isPending}
-                  className="flex items-center gap-1 rounded-md px-2 py-1 text-xs text-error/80 hover:bg-error/10 hover:text-error transition-colors disabled:opacity-50"
-                >
-                  <XCircle className="h-3.5 w-3.5" />
-                  Reject
-                </button>
+                  onConfirm={(reason): void => {
+                    rejectMutation.mutate(reason);
+                  }}
+                />
               </div>
               {(approveMutation.isError || rejectMutation.isError) && (
                 <p className="text-xs text-error">
diff --git a/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx
index 2292aef3ce..4de85ce2bf 100644
--- a/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx
+++ b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx
@@ -1,4 +1,4 @@
-import type { ReactNode } from 'react';
+import { useId, useState, type ReactNode } from 'react';
 import {
   AlertDialog,
   AlertDialogAction,
@@ -11,6 +11,16 @@ import {
   AlertDialogTrigger,
 } from '@/components/ui/alert-dialog';
 
+/**
+ * Optional free-text input rendered below the description. Used for the
+ * reject flow so reviewers can attach a reason that propagates to the
+ * workflow's `on_reject` prompt as `$REJECTION_REASON`.
+ */
+interface ReasonInputConfig {
+  label: string;
+  placeholder?: string;
+}
+
 interface Props {
   /** The element that opens the dialog when clicked (typically a button). */
   trigger: ReactNode;
@@ -20,11 +30,17 @@ interface Props {
   description: ReactNode;
   /** Confirm-button label (e.g. "Abandon", "Delete"). */
   confirmLabel: string;
-  /** Invoked when the user confirms. The current callsites are all
-   *  fire-and-forget wrappers around React Query mutations whose error
-   *  handling lives at the page level (`runAction` in `DashboardPage.tsx`).
-   *  Widen to `Promise<void>` only if a caller needs to await the action. */
-  onConfirm: () => void;
+  /**
+   * When provided, renders a textarea below the description. The trimmed
+   * value is passed to `onConfirm` — empty after trim becomes `undefined`
+   * so callers can distinguish "no reason given" from "empty string given".
+   */
+  reasonInput?: ReasonInputConfig;
+  /** Invoked when the user confirms. Fire-and-forget; callers own error
+   *  surfacing. Widen to `Promise<void>` only if a future caller needs to
+   *  await the action. `reason` is only non-`undefined` when `reasonInput`
+   *  is supplied and the user typed something after trimming. */
+  onConfirm: (reason?: string) => void;
 }
 
 /**
@@ -36,6 +52,10 @@ interface Props {
  * `@/components/ui/alert-dialog`), which is appropriate for every workflow
  * lifecycle action this is used for (Abandon, Cancel, Delete, Reject).
  *
+ * For reject flows, pass `reasonInput` to collect a trimmed free-text reason
+ * that propagates to `$REJECTION_REASON` inside the workflow's `on_reject`
+ * prompt.
+ *
  * Replaces previous use of `window.confirm()` for these actions to match the
  * codebase-delete UX in `sidebar/ProjectSelector.tsx`.
  */
@@ -44,10 +64,22 @@ export function ConfirmRunActionDialog({
   title,
   description,
   confirmLabel,
+  reasonInput,
   onConfirm,
 }: Props): React.ReactElement {
+  const [reason, setReason] = useState('');
+  // useId() so multiple dialog instances on the same page (e.g. side-by-side
+  // run cards) don't collide on a shared DOM id.
+  const reasonInputId = useId();
+
   return (
-    <AlertDialog>
+    <AlertDialog
+      onOpenChange={(open): void => {
+        // Reset the textarea every time the dialog closes so a previous
+        // reason doesn't bleed into the next reject action on the same card.
+        if (!open) setReason('');
+      }}
+    >
       <AlertDialogTrigger asChild>{trigger}</AlertDialogTrigger>
       <AlertDialogContent>
         <AlertDialogHeader>
@@ -56,6 +88,23 @@ export function ConfirmRunActionDialog({
             <div>{description}</div>
           </AlertDialogDescription>
         </AlertDialogHeader>
+        {reasonInput && (
+          <div className="space-y-2">
+            <label htmlFor={reasonInputId} className="text-sm font-medium text-foreground">
+              {reasonInput.label}
+            </label>
+            <textarea
+              id={reasonInputId}
+              value={reason}
+              onChange={(e): void => {
+                setReason(e.target.value);
+              }}
+              placeholder={reasonInput.placeholder}
+              rows={3}
+              className="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2"
+            />
+          </div>
+        )}
         <AlertDialogFooter>
           <AlertDialogCancel>Cancel</AlertDialogCancel>
           <AlertDialogAction
@@ -64,7 +113,8 @@ export function ConfirmRunActionDialog({
               // runAction helper that surfaces errors via component state.
               // We do NOT catch here; swallowing would hide failures the
               // parent is positioned to display.
-              onConfirm();
+              const trimmed = reason.trim();
+              onConfirm(trimmed === '' ? undefined : trimmed);
             }}
           >
             {confirmLabel}
diff --git a/packages/web/src/components/dashboard/WorkflowRunCard.tsx b/packages/web/src/components/dashboard/WorkflowRunCard.tsx
index 6a5042de55..417a2456a0 100644
--- a/packages/web/src/components/dashboard/WorkflowRunCard.tsx
+++ b/packages/web/src/components/dashboard/WorkflowRunCard.tsx
@@ -32,7 +32,7 @@ interface WorkflowRunCardProps {
   onAbandon?: (runId: string) => void;
   onDelete?: (runId: string) => void;
   onApprove?: (runId: string) => void;
-  onReject?: (runId: string) => void;
+  onReject?: (runId: string, reason?: string) => void;
 }
 
 const PLATFORM_ICONS: Record<string, React.ReactElement> = {
@@ -329,13 +329,18 @@ export function WorkflowRunCard({
               title="Reject workflow?"
               description={
                 <>
-                  Reject the paused workflow <strong>{run.workflow_name}</strong>. The run will be
-                  marked as failed and any pending iterations will not continue.
+                  Reject the paused workflow <strong>{run.workflow_name}</strong>. If the approval
+                  node defines an <code>on_reject</code> prompt, it runs with your reason as{' '}
+                  <code>$REJECTION_REASON</code>; otherwise the run is cancelled.
                 </>
               }
               confirmLabel="Reject"
-              onConfirm={(): void => {
-                onReject(run.id);
+              reasonInput={{
+                label: 'Reason (optional)',
+                placeholder: 'Why are you rejecting? Visible to the on_reject prompt.',
+              }}
+              onConfirm={(reason): void => {
+                onReject(run.id, reason);
               }}
             />
           )}
diff --git a/packages/web/src/components/dashboard/WorkflowRunGroup.tsx b/packages/web/src/components/dashboard/WorkflowRunGroup.tsx
index c30fc39609..cbd0890b91 100644
--- a/packages/web/src/components/dashboard/WorkflowRunGroup.tsx
+++ b/packages/web/src/components/dashboard/WorkflowRunGroup.tsx
@@ -12,7 +12,7 @@ interface WorkflowRunGroupProps {
   onAbandon?: (runId: string) => void;
   onDelete?: (runId: string) => void;
   onApprove?: (runId: string) => void;
-  onReject?: (runId: string) => void;
+  onReject?: (runId: string, reason?: string) => void;
 }
 
 export function WorkflowRunGroup({
diff --git a/packages/web/src/routes/DashboardPage.tsx b/packages/web/src/routes/DashboardPage.tsx
index 1a6a70b53c..d2e77604b1 100644
--- a/packages/web/src/routes/DashboardPage.tsx
+++ b/packages/web/src/routes/DashboardPage.tsx
@@ -295,8 +295,19 @@ export function DashboardPage(): React.ReactElement {
     runAction(deleteWorkflowRun, runId, 'Failed to delete workflow run');
   const handleApprove = (runId: string): Promise<void> =>
     runAction(approveWorkflowRun, runId, 'Failed to approve workflow');
-  const handleReject = (runId: string): Promise<void> =>
-    runAction(rejectWorkflowRun, runId, 'Failed to reject workflow');
+  // Reject differs from the rest of the lifecycle actions because it takes a
+  // second argument (the optional reason). Inline it rather than squeezing
+  // through `runAction`'s `(id) => Promise` signature with a closure — keeps
+  // `runAction` usefully narrow for the single-arg actions above.
+  async function handleReject(runId: string, reason?: string): Promise<void> {
+    try {
+      setActionError(null);
+      await rejectWorkflowRun(runId, reason);
+      void queryClient.invalidateQueries({ queryKey: ['dashboardRuns'] });
+    } catch (err) {
+      setActionError(err instanceof Error ? err.message : 'Failed to reject workflow');
+    }
+  }
 
   const totalPages = Math.ceil(total / pageSize);
   const hasMore = page + 1 < totalPages;
diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts
index 03b4e77f91..52c22b41dc 100644
--- a/packages/workflows/src/dag-executor.test.ts
+++ b/packages/workflows/src/dag-executor.test.ts
@@ -5570,3 +5570,49 @@ describe('executeDagWorkflow -- script nodes', () => {
     execSpy.mockRestore();
   });
 });
+
+// ---------------------------------------------------------------------------
+// Streaming cancel-check policy (during-streaming paused tolerance)
+// ---------------------------------------------------------------------------
+
+describe('shouldContinueStreamingForStatus', () => {
+  it('continues when status is running', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('running')).toBe(true);
+  });
+
+  it('continues when status is paused (sibling approval node in same layer)', async () => {
+    // The key invariant: a concurrent approval node can pause the run while a
+    // streaming AI node is mid-response. The streaming node must finish its
+    // own output — workflow progression is gated by the approval node, not
+    // by tearing down unrelated in-flight streams.
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('paused')).toBe(true);
+  });
+
+  it('aborts when status is null (run deleted)', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus(null)).toBe(false);
+  });
+
+  it('aborts when status is cancelled', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('cancelled')).toBe(false);
+  });
+
+  it('aborts when status is failed', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('failed')).toBe(false);
+  });
+
+  it('aborts when status is completed', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('completed')).toBe(false);
+  });
+
+  it('aborts on any unrecognized state', async () => {
+    const { shouldContinueStreamingForStatus } = await import('./dag-executor');
+    expect(shouldContinueStreamingForStatus('pending')).toBe(false);
+    expect(shouldContinueStreamingForStatus('invalid-status')).toBe(false);
+  });
+});
diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts
index a60f4b7b72..ce53bc196c 100644
--- a/packages/workflows/src/dag-executor.ts
+++ b/packages/workflows/src/dag-executor.ts
@@ -95,6 +95,26 @@ type NodeExecutionResult = NodeOutput & { costUsd?: number };
 const lastNodeCancelCheck = new Map<string, number>();
 const CANCEL_CHECK_INTERVAL_MS = 10_000;
 
+/**
+ * Policy for the during-streaming cancel check: should the currently-streaming
+ * node be allowed to continue for a given observed run status?
+ *
+ * - `running`: the normal case → continue.
+ * - `paused`: a concurrent approval node in the same topological layer has
+ *   transitioned the run to paused. The streaming node should finish its own
+ *   output; workflow progression is gated by the approval node, not by tearing
+ *   down unrelated in-flight streams.
+ * - `null` (run deleted), `cancelled`, `failed`, `completed`, or any other
+ *   state → abort the stream.
+ *
+ * Exported for unit testing; the full streaming-cancel branch in
+ * `executeNodeInternal` only fires once per 10s (CANCEL_CHECK_INTERVAL_MS), so
+ * integration-level coverage of the policy is timing-sensitive and flaky.
+ */
+export function shouldContinueStreamingForStatus(status: string | null): boolean {
+  return status === 'running' || status === 'paused';
+}
+
 /** Throttle state for activity heartbeat writes (only used for stale/zombie detection) */
 const lastNodeActivityUpdate = new Map<string, number>();
 const ACTIVITY_HEARTBEAT_INTERVAL_MS = 60_000;
@@ -604,12 +624,19 @@ async function executeNodeInternal(
       const tickNow = Date.now();
       const nodeKey = `${workflowRun.id}:${node.id}`;
 
-      // Cancel/pause check — read-only, no write contention in WAL mode (every 10s)
+      // Cancel/pause check — read-only, no write contention in WAL mode (every 10s).
+      //
+      // `paused` is tolerated here: an approval node can transition the run to
+      // paused while this concurrent node is mid-stream (same topological layer).
+      // The streaming node should be allowed to finish its own output — the
+      // paused gate owns workflow progression, not individual node lifecycles.
+      // Only truly terminal / unknown states (null, cancelled, failed, completed)
+      // abort the in-flight stream.
       if (tickNow - (lastNodeCancelCheck.get(nodeKey) ?? 0) > CANCEL_CHECK_INTERVAL_MS) {
         lastNodeCancelCheck.set(nodeKey, tickNow);
         try {
           const streamStatus = await deps.store.getWorkflowRunStatus(workflowRun.id);
-          if (streamStatus === null || streamStatus !== 'running') {
+          if (!shouldContinueStreamingForStatus(streamStatus)) {
             getLog().info(
               { workflowRunId: workflowRun.id, nodeId: node.id, status: streamStatus ?? 'deleted' },
               'dag.stop_detected_during_streaming'
@@ -1535,9 +1562,13 @@ async function executeLoopNode(
   for (let i = startIteration; i <= loop.max_iterations; i++) {
     const iterationStart = Date.now();
 
-    // Check for non-running status between iterations (cancellation, deletion, or future: pause)
+    // Check for non-running status between iterations. `paused` is tolerated
+    // here for the same reason as the streaming check: a sibling approval
+    // node in the same topological layer may pause the run while this loop
+    // is between iterations — the loop should continue its own iterations
+    // regardless of unrelated pauses elsewhere in the DAG.
     const runStatus = await deps.store.getWorkflowRunStatus(workflowRun.id);
-    if (runStatus === null || runStatus !== 'running') {
+    if (!shouldContinueStreamingForStatus(runStatus)) {
       const effectiveStatus = runStatus ?? 'deleted';
       getLog().info(
         { workflowRunId: workflowRun.id, nodeId: node.id, iteration: i, status: effectiveStatus },
@@ -2727,15 +2758,24 @@ export async function executeDagWorkflow(
     }
   }
 
-  // Helper: bail out if the run was transitioned externally (cancelled, deleted, etc.)
+  /**
+   * Bail out of the final completion/failure write if the run was transitioned
+   * externally. Strict `!== 'running'` check is correct here because we don't
+   * want to mark a paused run as complete — the approval gate is still live.
+   *
+   * Emitter unregister is conditional: terminal states (cancelled / deleted /
+   * completed / failed) unregister to release subscription resources, but
+   * `paused` keeps the emitter registered so SSE stays connected while the
+   * approval gate awaits the user — crucial for resume observability.
+   */
   async function skipIfStatusChanged(logEvent: string): Promise<boolean> {
     const status = await deps.store.getWorkflowRunStatus(workflowRun.id);
-    if (status === null || status !== 'running') {
-      getLog().info({ workflowRunId: workflowRun.id, status: status ?? 'deleted' }, logEvent);
+    if (status === 'running') return false;
+    getLog().info({ workflowRunId: workflowRun.id, status: status ?? 'deleted' }, logEvent);
+    if (status !== 'paused') {
       getWorkflowEventEmitter().unregisterRun(workflowRun.id);
-      return true;
     }
-    return false;
+    return true;
   }
 
   // Single-pass: compute node outcome counts and derive success/failure booleans

From 2393edb369ce585718c26ce92bd6626ae1b317e3 Mon Sep 17 00:00:00 2001
From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:15:24 +0300
Subject: [PATCH 12/14] feat(providers): autodetect canonical binary install
 paths for Claude and Codex (#1361)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both binary resolvers previously stopped at env-var + explicit config and
threw a "not found" error when neither was set. Users who followed the
upstream-recommended install flow (Anthropic's `curl install.sh` for
Claude, `npm install -g @openai/codex`) still had to manually set either
`CLAUDE_BIN_PATH` / `CODEX_BIN_PATH` or the corresponding config field
before any workflow could run.

Add a tier-N autodetect step between the explicit config tier and the
install-instructions throw. Purely additive: env and config still win
when set (precedence covered by new tests). On autodetect miss, the same
install-instructions error fires as before.

Claude probe list (verified against docs.claude.com "Uninstall Claude
Code → Native installation" section):
  - $HOME/.local/bin/claude            (mac/linux native installer)
  - $USERPROFILE\.local\bin\claude.exe (Windows native installer)

Codex probe list (verified against openai/codex README; npm global-
install puts the binary at `{npm_prefix}/bin/<name>` on POSIX,
`{npm_prefix}\<name>.cmd` on Windows):
  - $HOME/.npm-global/bin/codex   (user-set `npm config set prefix`)
  - /opt/homebrew/bin/codex       (mac arm64 with homebrew-node)
  - /usr/local/bin/codex          (mac intel / linux system node)
  - %APPDATA%\npm\codex.cmd       (Windows npm global default)
  - $HOME\.npm-global\codex.cmd   (Windows user-set prefix)

Not probed (explicit override still required):
  - Custom npm prefixes — `npm root -g` would need a subprocess per
    resolve, too much surface for a probe helper
  - `brew install --cask codex` — cask layout isn't a PATH binary
  - Manual GitHub Releases extracts — placement is user-determined
  - `~/.bun/bin/codex` — not documented in openai/codex README

Pi provider intentionally has no equivalent change: the Pi SDK is
bundled into the archon binary (no subprocess), so there's no "binary"
to resolve. Pi auth lives at `~/.pi/agent/auth.json` which the SDK
already finds by default, and the PR A shim (`PI_PACKAGE_DIR`) handles
the package-dir case via Pi's own documented escape hatch.

E2E verified: removed both config entries from ~/.archon/config.yaml,
rebuilt compiled binary, ran `archon workflow run archon-assist` and a
Codex workflow. Logs showed `source: 'autodetect'` for both, responses
returned cleanly.

(cherry picked from commit b99cee4c2d73754733dc452d5fc410519ce2c6b9)
---
 .../src/claude/binary-resolver.test.ts        | 47 +++++++++++++-
 .../providers/src/claude/binary-resolver.ts   | 26 +++++++-
 .../src/codex/binary-resolver.test.ts         | 63 +++++++++++++++++++
 .../providers/src/codex/binary-resolver.ts    | 62 +++++++++++++++++-
 4 files changed, 193 insertions(+), 5 deletions(-)

diff --git a/packages/providers/src/claude/binary-resolver.test.ts b/packages/providers/src/claude/binary-resolver.test.ts
index f87e78f36d..4c56ba1214 100644
--- a/packages/providers/src/claude/binary-resolver.test.ts
+++ b/packages/providers/src/claude/binary-resolver.test.ts
@@ -76,7 +76,52 @@ describe('resolveClaudeBinaryPath (binary mode)', () => {
     expect(result).toBe('/env/cli.js');
   });
 
-  test('throws with install instructions when nothing configured', async () => {
+  test('autodetects native installer path when env and config are unset', async () => {
+    const home = process.env.HOME ?? '/Users/test';
+    const expected =
+      process.platform === 'win32'
+        ? `${home}\\.local\\bin\\claude.exe`
+        : `${home}/.local/bin/claude`;
+    // File exists only at the native-installer path.
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation(
+      (path: string) => path === expected
+    );
+
+    const result = await resolver.resolveClaudeBinaryPath();
+    expect(result).toBe(expected);
+    // Log must mark this as autodetect, not 'env' or 'config' — the source
+    // string is load-bearing for debug triage.
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      { binaryPath: expected, source: 'autodetect' },
+      'claude.binary_resolved'
+    );
+  });
+
+  test('env var takes precedence over autodetect when both would match', async () => {
+    process.env.CLAUDE_BIN_PATH = '/custom/env/claude';
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true);
+
+    const result = await resolver.resolveClaudeBinaryPath();
+    expect(result).toBe('/custom/env/claude');
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      { binaryPath: '/custom/env/claude', source: 'env' },
+      'claude.binary_resolved'
+    );
+  });
+
+  test('config takes precedence over autodetect when both would match', async () => {
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true);
+
+    const result = await resolver.resolveClaudeBinaryPath('/custom/config/claude');
+    expect(result).toBe('/custom/config/claude');
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      { binaryPath: '/custom/config/claude', source: 'config' },
+      'claude.binary_resolved'
+    );
+  });
+
+  test('throws with install instructions when nothing is configured and autodetect misses', async () => {
+    // Every probe returns false — env unset, config unset, native path absent.
     fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false);
 
     const promise = resolver.resolveClaudeBinaryPath();
diff --git a/packages/providers/src/claude/binary-resolver.ts b/packages/providers/src/claude/binary-resolver.ts
index f236acb277..c2273d85d2 100644
--- a/packages/providers/src/claude/binary-resolver.ts
+++ b/packages/providers/src/claude/binary-resolver.ts
@@ -9,13 +9,16 @@
  * Resolution order (binary mode only):
  * 1. `CLAUDE_BIN_PATH` environment variable
  * 2. `assistants.claude.claudeBinaryPath` in config
- * 3. Throw with install instructions
+ * 3. Autodetect canonical install path (native installer default)
+ * 4. Throw with install instructions
  *
  * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the caller
  * omits `pathToClaudeCodeExecutable` entirely and the SDK resolves via its
  * normal node_modules lookup.
  */
 import { existsSync as _existsSync } from 'node:fs';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
 import { BUNDLED_IS_BINARY, createLogger } from '@archon/paths';
 
 /** Wrapper for existsSync — enables spyOn in tests (direct imports can't be spied on). */
@@ -89,6 +92,25 @@ export async function resolveClaudeBinaryPath(
     return configClaudeBinaryPath;
   }
 
-  // 3. Not found — throw with install instructions
+  // 3. Autodetect — the Anthropic native installer
+  // (`curl -fsSL https://claude.ai/install.sh | bash` on macOS/Linux,
+  // `irm https://claude.ai/install.ps1 | iex` on Windows) writes the
+  // executable to a fixed location relative to $HOME. Users who follow
+  // the recommended install path don't need any env var or config entry;
+  // users who deviate (npm global, custom path, etc.) still set one of
+  // the higher-priority sources above.
+  const nativeInstallerPath =
+    process.platform === 'win32'
+      ? join(homedir(), '.local', 'bin', 'claude.exe')
+      : join(homedir(), '.local', 'bin', 'claude');
+  if (fileExists(nativeInstallerPath)) {
+    getLog().info(
+      { binaryPath: nativeInstallerPath, source: 'autodetect' },
+      'claude.binary_resolved'
+    );
+    return nativeInstallerPath;
+  }
+
+  // 4. Not found — throw with install instructions
   throw new Error(INSTALL_INSTRUCTIONS);
 }
diff --git a/packages/providers/src/codex/binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts
index 1df4e7c6f6..a121e4c204 100644
--- a/packages/providers/src/codex/binary-resolver.test.ts
+++ b/packages/providers/src/codex/binary-resolver.test.ts
@@ -87,7 +87,70 @@ describe('resolveCodexBinaryPath (binary mode)', () => {
     expect(normalized).toContain('/tmp/test-archon-home/vendor/codex/');
   });
 
+  test('autodetects npm global install at ~/.npm-global/bin/codex (POSIX)', async () => {
+    if (process.platform === 'win32') return; // POSIX-only probe
+    const home = process.env.HOME ?? '/Users/test';
+    const expected = `${home}/.npm-global/bin/codex`;
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation(
+      (path: string) => path === expected
+    );
+
+    const result = await resolver.resolveCodexBinaryPath();
+    expect(result).toBe(expected);
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      { binaryPath: expected, source: 'autodetect' },
+      'codex.binary_resolved'
+    );
+  });
+
+  test('autodetects homebrew install on Apple Silicon', async () => {
+    if (process.platform !== 'darwin' || process.arch !== 'arm64') {
+      // `/opt/homebrew/bin/codex` is only probed on darwin-arm64; on other
+      // hosts this test has nothing to assert (the probe list excludes it).
+      return;
+    }
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation(
+      (path: string) => path === '/opt/homebrew/bin/codex'
+    );
+
+    const result = await resolver.resolveCodexBinaryPath();
+    expect(result).toBe('/opt/homebrew/bin/codex');
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      { binaryPath: '/opt/homebrew/bin/codex', source: 'autodetect' },
+      'codex.binary_resolved'
+    );
+  });
+
+  test('autodetects system install at /usr/local/bin/codex', async () => {
+    if (process.platform === 'win32') {
+      // /usr/local/bin is not probed on Windows.
+      return;
+    }
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation(
+      (path: string) => path === '/usr/local/bin/codex'
+    );
+
+    const result = await resolver.resolveCodexBinaryPath();
+    expect(result).toBe('/usr/local/bin/codex');
+  });
+
+  test('vendor directory takes precedence over autodetect', async () => {
+    // Both vendor and npm-global would match; vendor must win (lower tier #).
+    fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation((path: string) => {
+      const normalized = path.replace(/\\/g, '/');
+      return normalized.includes('vendor/codex') || normalized.includes('.npm-global');
+    });
+
+    const result = await resolver.resolveCodexBinaryPath();
+    expect(result!.replace(/\\/g, '/')).toContain('/vendor/codex/');
+    expect(mockLogger.info).toHaveBeenCalledWith(
+      expect.objectContaining({ source: 'vendor' }),
+      'codex.binary_resolved'
+    );
+  });
+
   test('throws with install instructions when binary not found anywhere', async () => {
+    // Env unset, config unset, vendor dir empty, every autodetect path missing.
     fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false);
 
     await expect(resolver.resolveCodexBinaryPath()).rejects.toThrow('Codex CLI binary not found');
diff --git a/packages/providers/src/codex/binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts
index a1e0f01a5b..1ac8e57cfb 100644
--- a/packages/providers/src/codex/binary-resolver.ts
+++ b/packages/providers/src/codex/binary-resolver.ts
@@ -9,12 +9,14 @@
  * 1. `CODEX_BIN_PATH` environment variable
  * 2. `assistants.codex.codexBinaryPath` in config
  * 3. `~/.archon/vendor/codex/<platform-binary>` (user-placed)
- * 4. Throw with install instructions
+ * 4. Autodetect canonical install paths (npm prefix defaults per platform)
+ * 5. Throw with install instructions
  *
  * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the SDK
  * uses its normal node_modules-based resolution.
  */
 import { existsSync as _existsSync } from 'node:fs';
+import { homedir } from 'node:os';
 import { join } from 'node:path';
 import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths';
 
@@ -89,7 +91,19 @@ export async function resolveCodexBinaryPath(
     }
   }
 
-  // 4. Not found — throw with install instructions
+  // 4. Autodetect — probe the handful of paths Codex typically lands at
+  // when installed via the documented package managers. Users who install
+  // somewhere else (custom npm prefix, etc.) still set one of the higher-
+  // priority sources above. Order: most specific → least specific.
+  const autodetectPaths = getAutodetectPaths();
+  for (const probePath of autodetectPaths) {
+    if (fileExists(probePath)) {
+      getLog().info({ binaryPath: probePath, source: 'autodetect' }, 'codex.binary_resolved');
+      return probePath;
+    }
+  }
+
+  // 5. Not found — throw with install instructions
   const vendorPath = `~/.archon/${CODEX_VENDOR_DIR}/`;
   throw new Error(
     'Codex CLI binary not found. The Codex provider requires a native binary\n' +
@@ -105,3 +119,47 @@ export async function resolveCodexBinaryPath(
       '         codexBinaryPath: /path/to/codex\n'
   );
 }
+
+/**
+ * Canonical install locations probed by tier 4 autodetect. Grounded in
+ * the official @openai/codex README and the npm global-install contract
+ * (npm writes the binary to `{npm_prefix}/bin/<name>` on POSIX and
+ * `{npm_prefix}\<name>.cmd` on Windows). The probes cover the npm prefix
+ * a default install lands at on each platform:
+ *
+ *  - `$HOME/.npm-global/bin/codex` — common when the user ran
+ *    `npm config set prefix ~/.npm-global` to avoid root writes
+ *  - `/opt/homebrew/bin/codex` — mac Apple Silicon with homebrew-node
+ *    (homebrew sets npm prefix to /opt/homebrew)
+ *  - `/usr/local/bin/codex` — mac Intel with homebrew-node, or linux
+ *    with system-installed node (npm prefix defaults to /usr/local)
+ *  - `%AppData%\npm\codex.cmd` — Windows npm global default
+ *
+ * Not covered (explicit override required via CODEX_BIN_PATH or config):
+ *   - users with other custom npm prefixes — `npm root -g` would spawn
+ *     a subprocess per resolve, too heavy for a probe helper
+ *   - Homebrew cask install (`brew install --cask codex`) — cask layout
+ *     isn't a PATH binary; users should symlink or set the path
+ *   - manual GitHub Releases extract — placement is user-determined
+ */
+function getAutodetectPaths(): string[] {
+  const paths: string[] = [];
+
+  if (process.platform === 'win32') {
+    const appData = process.env.APPDATA;
+    if (appData) paths.push(join(appData, 'npm', 'codex.cmd'));
+    paths.push(join(homedir(), '.npm-global', 'codex.cmd'));
+    return paths;
+  }
+
+  // POSIX (macOS + Linux)
+  paths.push(join(homedir(), '.npm-global', 'bin', 'codex'));
+
+  if (process.platform === 'darwin' && process.arch === 'arm64') {
+    paths.push('/opt/homebrew/bin/codex');
+  }
+
+  paths.push('/usr/local/bin/codex');
+
+  return paths;
+}

From 04c0f2d0681e0d65261b7ba1c240f489b50fc8c6 Mon Sep 17 00:00:00 2001
From: Cole Medin <cole@dynamous.ai>
Date: Thu, 23 Apr 2026 07:19:55 -0500
Subject: [PATCH 13/14] fix(providers/test): use os.homedir() instead of $HOME
 in claude binary autodetect test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The native-installer autodetect test computed its expected path from
process.env.HOME, but the implementation uses node:os homedir(). On
Windows, HOME is typically unset (Windows uses USERPROFILE), so the
test fell back to '/Users/test' while the resolver returned the real
home dir — making the spy's path-equality check fail and breaking CI
on windows-latest.

Mirror the implementation by importing homedir() from node:os and
joining with node:path so the expected path matches the actual
platform-resolved home and separator.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
(cherry picked from commit f9f8775afa47cde8ca55b87c6abc6ea5d3b614f7)
---
 .../providers/src/claude/binary-resolver.test.ts  | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/packages/providers/src/claude/binary-resolver.test.ts b/packages/providers/src/claude/binary-resolver.test.ts
index 4c56ba1214..c5c407a531 100644
--- a/packages/providers/src/claude/binary-resolver.test.ts
+++ b/packages/providers/src/claude/binary-resolver.test.ts
@@ -5,6 +5,8 @@
  * with BUNDLED_IS_BINARY=true, which conflicts with other test files.
  */
 import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
 import { createMockLogger } from '../test/mocks/logger';
 
 const mockLogger = createMockLogger();
@@ -77,11 +79,14 @@ describe('resolveClaudeBinaryPath (binary mode)', () => {
   });
 
   test('autodetects native installer path when env and config are unset', async () => {
-    const home = process.env.HOME ?? '/Users/test';
-    const expected =
-      process.platform === 'win32'
-        ? `${home}\\.local\\bin\\claude.exe`
-        : `${home}/.local/bin/claude`;
+    // Mirror the implementation: use os.homedir() + node:path.join so the
+    // expected path matches the platform's actual home dir and separator.
+    const expected = join(
+      homedir(),
+      '.local',
+      'bin',
+      process.platform === 'win32' ? 'claude.exe' : 'claude'
+    );
     // File exists only at the native-installer path.
     fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation(
       (path: string) => path === expected

From 7548085218afecb707ba34e89fb41b9145b3ed55 Mon Sep 17 00:00:00 2001
From: Cole Medin <cole@dynamous.ai>
Date: Thu, 23 Apr 2026 07:33:21 -0500
Subject: [PATCH 14/14] fix(server): contain Discord login failure so it
 doesn't kill the server (#1365)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported in #1365: a user running `archon serve` with DISCORD_BOT_TOKEN
set but the "Message Content Intent" toggle disabled in the Discord
Developer Portal saw the entire server crash with `Used disallowed
intents`. Discord rejects the gateway connection (close code 4014) when
a privileged intent is requested without being enabled, and the
unguarded `await discord.start()` propagated the error all the way up,
taking the web UI down with it.

Wrap discord.start() in try/catch — log the failure with an actionable
hint (special-cased for the disallowed-intent error) and continue
running. Other adapters and the web UI come up regardless. The shutdown
handler already uses optional chaining (`discord?.stop()`) so nulling
discord after a failed start is safe.

Other adapters (Telegram, Slack, GitHub, Gitea, GitLab) have the same
unguarded-start pattern but are out of scope for this fix — addressing
them is tracked separately.

Also expanded the Discord setup docs with a caution callout that names
the exact error string and the new log event so users can grep for
both.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
(cherry picked from commit 5957c6e292e0fb35e1218a43db329e062c084702)
---
 .../docs/adapters/community/discord.md        |  8 ++++++++
 packages/server/src/index.ts                  | 20 +++++++++++++++++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/packages/docs-web/src/content/docs/adapters/community/discord.md b/packages/docs-web/src/content/docs/adapters/community/discord.md
index 0f3e59082c..b719d719ce 100644
--- a/packages/docs-web/src/content/docs/adapters/community/discord.md
+++ b/packages/docs-web/src/content/docs/adapters/community/discord.md
@@ -40,6 +40,14 @@ Connect Archon to Discord so you can interact with your AI coding assistant from
 2. Enable **"Message Content Intent"** (required for the bot to read messages)
 3. Save changes
 
+:::caution
+Skipping this step causes Discord to reject the bot's connection with
+`Used disallowed intents`. Archon will log
+`discord.start_failed_continuing_without_adapter` and keep the rest of
+the server running, but the Discord adapter will be unavailable until
+the intent is enabled and the server is restarted.
+:::
+
 ## Invite Bot to Your Server
 
 1. Go to "OAuth2" > "URL Generator" in the left sidebar
diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index d1738ce678..76f8d67690 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -395,8 +395,24 @@ export async function startServer(opts: ServerOptions = {}): Promise<void> {
           .catch(createMessageErrorHandler('Discord', discordAdapter, conversationId));
       });
 
-      await discord.start();
-      activePlatforms.push('Discord');
+      // Don't let a Discord login failure (bad token, missing privileged
+      // intents, etc.) bring down the whole server — users running
+      // `archon serve` for the web UI shouldn't lose it because of an
+      // unrelated bot misconfiguration. See #1365.
+      try {
+        await discord.start();
+        activePlatforms.push('Discord');
+      } catch (error) {
+        const err = error as Error;
+        const isPrivilegedIntentError = err.message?.includes('disallowed intents');
+        const hint = isPrivilegedIntentError
+          ? 'Enable "Message Content Intent" in the Discord Developer Portal ' +
+            '(your application > Bot > Privileged Gateway Intents) and restart, ' +
+            'or unset DISCORD_BOT_TOKEN if you do not want the Discord adapter.'
+          : 'Verify DISCORD_BOT_TOKEN is valid, or unset it to disable the Discord adapter.';
+        getLog().error({ err, hint }, 'discord.start_failed_continuing_without_adapter');
+        discord = null;
+      }
     } else {
       getLog().info('discord_adapter_skipped');
     }