From d4981b5d8094d3eeee7ac73e3f33fcd78c94771a Mon Sep 17 00:00:00 2001 From: Cole Medin Date: Sat, 18 Apr 2026 15:02:35 -0500 Subject: [PATCH 01/14] fix(workflows): fail loudly on SDK isError results (#1208) (#1291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, `dag-executor` only failed nodes/iterations when the SDK returned an `error_max_budget_usd` result. Every other `isError: true` subtype — including `error_during_execution` — was silently `break`ed out of the stream with whatever partial output had accumulated, letting failed runs masquerade as successful ones with empty output. This is the most likely explanation for the "5-second crash" symptom in #1208: iterations finish instantly with empty text, the loop keeps going, and only the `claude.result_is_error` log tips the user off. Changes: - Capture the SDK's `errors: string[]` detail on result messages (previously discarded) and surface it through `MessageChunk.errors`. - Log `errors`, `stopReason` alongside `errorSubtype` in `claude.result_is_error` so users can see what actually failed. - Throw from both the general node path and the loop iteration path on any `isError: true` result, including the subtype and SDK errors detail in the thrown message. Note: this does not implement auto-retry. See PR comments on #1121 and the analysis on #1208 — a retry-with-fresh-session approach for loop iterations is not obviously correct until we see what `error_during_execution` actually carries in the reporter's env. This change is the observability + fail-loud step that has to come first so that signal is no longer silent. Co-authored-by: Claude Opus 4.7 (cherry picked from commit 4c6ddd994f4dce2683f8cd08a68d95f86122cc12) --- packages/providers/src/claude/provider.ts | 10 +- packages/providers/src/types.ts | 2 + packages/workflows/src/dag-executor.test.ts | 116 ++++++++++++++++++++ packages/workflows/src/dag-executor.ts | 41 +++++++ 4 files changed, 168 insertions(+), 1 deletion(-) diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts index 26935bf373..0821319317 100644 --- a/packages/providers/src/claude/provider.ts +++ b/packages/providers/src/claude/provider.ts @@ -740,6 +740,7 @@ async function* streamClaudeMessages( total_cost_usd?: number; stop_reason?: string | null; num_turns?: number; + errors?: string[]; model_usage?: Record< string, { @@ -751,9 +752,15 @@ async function* streamClaudeMessages( >; }; const tokens = normalizeClaudeUsage(resultMsg.usage); + const sdkErrors = Array.isArray(resultMsg.errors) ? resultMsg.errors : undefined; if (resultMsg.is_error) { getLog().error( - { sessionId: resultMsg.session_id, errorSubtype: resultMsg.subtype }, + { + sessionId: resultMsg.session_id, + errorSubtype: resultMsg.subtype, + stopReason: resultMsg.stop_reason, + errors: sdkErrors, + }, 'claude.result_is_error' ); } @@ -765,6 +772,7 @@ async function* streamClaudeMessages( ? { structuredOutput: resultMsg.structured_output } : {}), ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), + ...(resultMsg.is_error && sdkErrors?.length ? { errors: sdkErrors } : {}), ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index 330669e0c5..5fdf48de17 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -62,6 +62,8 @@ export type MessageChunk = structuredOutput?: unknown; isError?: boolean; errorSubtype?: string; + /** SDK-provided error detail strings. Populated when isError is true. */ + errors?: string[]; cost?: number; stopReason?: string; numTurns?: number; diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index c5822197e5..0c745b39e5 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -3594,6 +3594,70 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { expect(sessionArg).toBe('loop-session-1'); }); + it('loop iteration fails loudly when SDK returns error_during_execution', async () => { + // Regression test for #1208: previously the loop silently broke on isError + // results and kept iterating with empty output, producing "5-second crashes" + // that masqueraded as successful iterations. + mockSendQueryDag.mockImplementation(function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + errors: ['Subprocess crashed mid-turn'], + sessionId: 'bad-session', + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'loop-iteration-err', + nodes: [ + { + id: 'work', + loop: { + prompt: 'Do the work. Say DONE.', + until: 'DONE', + max_iterations: 5, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // Should fail after one iteration rather than burning through max_iterations + expect(mockSendQueryDag.mock.calls.length).toBe(1); + // The loop_iteration_failed event should carry the subtype and SDK errors detail + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const iterFailedEvents = eventCalls.filter( + (call: unknown[]) => + (call[0] as Record).event_type === 'loop_iteration_failed' + ); + expect(iterFailedEvents.length).toBeGreaterThan(0); + const failedData = (iterFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('error_during_execution'); + expect(failedData.error).toContain('Subprocess crashed mid-turn'); + }); + it('non-interactive loop is unaffected (no pause)', async () => { mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'Still working...' }; @@ -4617,6 +4681,58 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(capMessage).toBeDefined(); }); + it('fails node when SDK returns error_during_execution result', async () => { + // Regression test for #1208: previously we only failed on error_max_budget_usd + // and silently broke on all other isError subtypes, letting failed nodes + // masquerade as successes with empty output. + mockSendQueryDag.mockImplementation(function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + errors: ['Tool call failed: permission denied'], + sessionId: 'sid-err', + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'err-exec-test', + nodes: [{ id: 'step1', command: 'my-cmd' }], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // The node_failed event should carry the subtype and SDK errors detail + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const failedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('error_during_execution'); + expect(failedData.error).toContain('permission denied'); + }); + it('forwards workflow-level effort to node when no per-node override', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 432a784385..c363f4ce3f 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -767,6 +767,25 @@ async function executeNodeInternal( `Node '${node.id}' exceeded cost cap${cap !== undefined ? ` of $${cap.toFixed(2)}` : ''}.` ); } + // Fail loudly on any other SDK error result. Previously we broke out of + // the stream silently, producing empty/partial output without signaling + // failure — which let failed iterations masquerade as successes (#1208). + if (msg.isError) { + const subtype = msg.errorSubtype ?? 'unknown'; + const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : ''; + getLog().error( + { + nodeId: node.id, + errorSubtype: subtype, + errors: msg.errors, + sessionId: msg.sessionId, + stopReason: msg.stopReason, + durationMs: Date.now() - nodeStartTime, + }, + 'dag.node_sdk_error_result' + ); + throw new Error(`Node '${node.id}' failed: SDK returned ${subtype}${errorsDetail}`); + } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'system' && msg.content) { // Forward provider warnings (⚠️) and MCP connection failures to the user. @@ -1640,6 +1659,28 @@ async function executeLoopNode( if (msg.numTurns !== undefined) { loopTotalNumTurns = (loopTotalNumTurns ?? 0) + msg.numTurns; } + // Fail the iteration loudly on SDK error results. Previously we broke + // silently, producing empty output and continuing to the next iteration — + // which made `error_during_execution` on resumed interactive loops look + // like a "5-second crash" that kept burning iterations (#1208). + if (msg.isError) { + const subtype = msg.errorSubtype ?? 'unknown'; + const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : ''; + getLog().error( + { + nodeId: node.id, + iteration: i, + errorSubtype: subtype, + errors: msg.errors, + sessionId: msg.sessionId, + stopReason: msg.stopReason, + }, + 'loop_node.iteration_sdk_error' + ); + throw new Error( + `Loop '${node.id}' iteration ${String(i)} failed: SDK returned ${subtype}${errorsDetail}` + ); + } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'tool' && msg.toolName) { const now = Date.now(); From 9251787ca71222980040281fee183c8c8a733b45 Mon Sep 17 00:00:00 2001 From: Kagura Date: Mon, 20 Apr 2026 21:19:50 +0800 Subject: [PATCH 02/14] fix(db): throw on corrupt commands JSON instead of silent empty fallback (#1033) * fix(db): throw on corrupt commands JSON instead of silent empty fallback (#967) getCodebaseCommands() silently returned {} when the commands column contained corrupt JSON. Callers had no way to distinguish 'no commands' from 'unreadable data', violating fail-fast principles. Now throws a descriptive error with the codebase ID and a recovery hint. The error is still logged for observability before throwing. Adds two test cases: corrupt JSON throws, valid JSON string parses. * fix: include parse error in log for better diagnostics (cherry picked from commit 39a05b762f3f1b759119c72633d5caa1b7a4d0b2) --- packages/core/src/db/codebases.test.ts | 16 ++++++++++++++++ packages/core/src/db/codebases.ts | 9 ++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/core/src/db/codebases.test.ts b/packages/core/src/db/codebases.test.ts index 26c269a085..b9bdbb6f1f 100644 --- a/packages/core/src/db/codebases.test.ts +++ b/packages/core/src/db/codebases.test.ts @@ -189,6 +189,22 @@ describe('codebases', () => { // Original frozen object should be unchanged expect(frozenCommands).not.toHaveProperty('new-command'); }); + + test('throws on corrupt JSON string (SQLite TEXT column)', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: '{not valid json' }])); + + await expect(getCodebaseCommands('codebase-123')).rejects.toThrow( + /Corrupt commands JSON for codebase codebase-123/ + ); + }); + + test('parses valid JSON string from SQLite TEXT column', async () => { + const commands = { plan: { path: 'plan.md', description: 'Plan' } }; + mockQuery.mockResolvedValueOnce(createQueryResult([{ commands: JSON.stringify(commands) }])); + + const result = await getCodebaseCommands('codebase-123'); + expect(result).toEqual(commands); + }); }); describe('registerCommand', () => { diff --git a/packages/core/src/db/codebases.ts b/packages/core/src/db/codebases.ts index f3947fb6c1..27adc91557 100644 --- a/packages/core/src/db/codebases.ts +++ b/packages/core/src/db/codebases.ts @@ -59,9 +59,12 @@ export async function getCodebaseCommands( if (typeof raw === 'string') { try { parsed = JSON.parse(raw); - } catch { - getLog().error({ codebaseId: id, raw }, 'db.codebase_commands_json_parse_failed'); - return {}; + } catch (err) { + getLog().error({ codebaseId: id, raw, err }, 'db.codebase_commands_json_parse_failed'); + throw new Error( + `Corrupt commands JSON for codebase ${id}: unable to parse stored data. ` + + `Run UPDATE remote_agent_codebases SET commands = '{}' WHERE id = '${id}' to reset.` + ); } } else { parsed = raw ?? {}; From c5d5663b5407cd0a3e6a187f5dab9bb04ca12d8e Mon Sep 17 00:00:00 2001 From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com> Date: Mon, 20 Apr 2026 21:45:24 +0300 Subject: [PATCH 03/14] fix(isolation): raise worktree git-operation timeout to 5m (#1306) All 15 worktree git-subprocess timeouts in WorktreeProvider were hardcoded at 30000ms. Repos with heavy post-checkout hooks (lint, dependency install, submodule init) routinely exceed that budget and fail worktree creation. Consolidate them onto a single GIT_OPERATION_TIMEOUT_MS constant at 5 min. Generous enough to cover reported cases while still catching genuine hangs (credential prompts in non-TTY, stalled fetches). Chosen over the config-key approach in #1029 to avoid adding permanent .archon/config.yaml surface for a problem a raised default solves cleanly. If 5 min turns out to also be too tight for real-world use, we'll revisit. Closes #1119 Supersedes #1029 Co-authored-by: Shay Elmualem <12733941+norbinsh@users.noreply.github.com> (cherry picked from commit cc78071ff62b6df20a50925d1117c4ddf6b44138) --- packages/isolation/src/providers/worktree.ts | 39 ++++++++++++-------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/packages/isolation/src/providers/worktree.ts b/packages/isolation/src/providers/worktree.ts index aad76ad6c4..9d15196f7f 100644 --- a/packages/isolation/src/providers/worktree.ts +++ b/packages/isolation/src/providers/worktree.ts @@ -49,6 +49,13 @@ function getLog(): ReturnType { return cachedLog; } +/** + * Ceiling for a single git subprocess in worktree operations (create/fetch/checkout/remove/branch-delete). + * Generous enough for repos with heavy post-checkout hooks (lint/install) while still catching genuine + * hangs (e.g. credential prompts in non-TTY, stalled network fetches). See #1119, #1029. + */ +const GIT_OPERATION_TIMEOUT_MS = 5 * 60 * 1000; + export class WorktreeProvider implements IIsolationProvider { readonly providerType = 'worktree'; @@ -150,7 +157,7 @@ export class WorktreeProvider implements IIsolationProvider { gitArgs.push(worktreePath); try { - await execFileAsync('git', gitArgs, { timeout: 30000 }); + await execFileAsync('git', gitArgs, { timeout: GIT_OPERATION_TIMEOUT_MS }); result.worktreeRemoved = true; } catch (error) { if (!this.isWorktreeMissingError(error)) { @@ -266,7 +273,9 @@ export class WorktreeProvider implements IIsolationProvider { result: DestroyResult ): Promise { try { - await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { timeout: 30000 }); + await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { + timeout: GIT_OPERATION_TIMEOUT_MS, + }); getLog().debug({ repoPath, branchName }, 'branch_deleted'); return true; } catch (error) { @@ -301,7 +310,7 @@ export class WorktreeProvider implements IIsolationProvider { ): Promise { try { await execFileAsync('git', ['-C', repoPath, 'push', 'origin', '--delete', branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); getLog().debug({ repoPath, branchName }, 'remote_branch_deleted'); return true; @@ -850,7 +859,7 @@ export class WorktreeProvider implements IIsolationProvider { ): Promise { // Fetch the PR's actual branch await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', prBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); // Try to create worktree with the branch @@ -859,14 +868,14 @@ export class WorktreeProvider implements IIsolationProvider { await execFileAsync( 'git', ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', prBranch, `origin/${prBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ); } catch (error) { const err = error as Error & { stderr?: string }; // Branch already exists locally - use it directly if (err.stderr?.includes('already exists')) { await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } else { throw error; @@ -878,7 +887,7 @@ export class WorktreeProvider implements IIsolationProvider { await execFileAsync( 'git', ['-C', worktreePath, 'branch', '--set-upstream-to', `origin/${prBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ); } catch (trackingError) { getLog().warn({ err: trackingError, worktreePath, prBranch }, 'upstream_tracking_failed'); @@ -903,11 +912,11 @@ export class WorktreeProvider implements IIsolationProvider { if (prSha) { // SHA provided: create at specific commit for reproducible reviews await execFileAsync('git', ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head`], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, prSha], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); // Create a local tracking branch so it's not detached HEAD @@ -915,7 +924,7 @@ export class WorktreeProvider implements IIsolationProvider { repoPath, () => execFileAsync('git', ['-C', worktreePath, 'checkout', '-b', reviewBranch, prSha], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }), reviewBranch ); @@ -927,13 +936,13 @@ export class WorktreeProvider implements IIsolationProvider { execFileAsync( 'git', ['-C', repoPath, 'fetch', 'origin', `pull/${prNumber}/head:${reviewBranch}`], - { timeout: 30000 } + { timeout: GIT_OPERATION_TIMEOUT_MS } ), reviewBranch ); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, reviewBranch], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } } @@ -954,7 +963,7 @@ export class WorktreeProvider implements IIsolationProvider { if (err.stderr?.includes('already exists')) { getLog().debug({ repoPath, branchName }, 'stale_branch_retry'); await execFileAsync('git', ['-C', repoPath, 'branch', '-D', branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); await createCommand(); } else { @@ -988,7 +997,7 @@ export class WorktreeProvider implements IIsolationProvider { 'git', ['-C', repoPath, 'worktree', 'add', worktreePath, '-b', branchName, startPoint], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, } ); } catch (error) { @@ -1016,7 +1025,7 @@ export class WorktreeProvider implements IIsolationProvider { timeout: 10000, }); await execFileAsync('git', ['-C', repoPath, 'worktree', 'add', worktreePath, branchName], { - timeout: 30000, + timeout: GIT_OPERATION_TIMEOUT_MS, }); } else { throw error; From 063061c9a2f00d6407744b16e3c66a6f83c499e4 Mon Sep 17 00:00:00 2001 From: Lior Franko Date: Tue, 21 Apr 2026 11:47:32 +0300 Subject: [PATCH 04/14] fix(web,server): show real platform connection status in Settings (#1061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Settings page's Platform Connections section hardcoded every platform except Web to 'Not configured', so users couldn't tell whether their Slack/ Telegram/Discord/GitHub/Gitea/GitLab adapters had actually started. - Server: /api/health now returns an activePlatforms array populated live as each adapter's start() resolves. Passed into registerApiRoutes so the reference stays mutable — Telegram starts after the HTTP listener is already accepting requests, so a snapshot would miss it. - Web: SettingsPage.PlatformConnectionsSection now reads activePlatforms from /api/health and looks each platform up in a Set. Also adds Gitea and GitLab to the list (they already ship as adapters). Closes #1031 Co-authored-by: Lior Franko (cherry picked from commit 08de8ee5c6fb5828401e082b0e92a1ba111bcb09) --- packages/server/src/index.ts | 22 ++++++++++++---------- packages/server/src/routes/api.ts | 5 ++++- packages/web/src/lib/api.ts | 1 + packages/web/src/routes/SettingsPage.tsx | 19 +++++++++++-------- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 18c173cc66..d1738ce678 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -262,6 +262,11 @@ export async function startServer(opts: ServerOptions = {}): Promise { await webAdapter.start(); persistence.startPeriodicFlush(); + // Mutable — pushed to as each adapter starts, read by the /api/health endpoint. + // Must be a live reference because Telegram starts after the HTTP listener begins + // accepting requests, so a snapshot taken at registration time would miss it. + const activePlatforms: string[] = ['Web']; + // Platform adapters (skipped in CLI serve mode or when not configured) let github: GitHubAdapter | null = null; let gitea: GiteaAdapter | null = null; @@ -294,6 +299,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { botMention ); await github.start(); + activePlatforms.push('GitHub'); } else { getLog().info('github_adapter_skipped'); } @@ -310,6 +316,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { giteaBotMention ); await gitea.start(); + activePlatforms.push('Gitea'); } else { getLog().info('gitea_adapter_skipped'); } @@ -326,6 +333,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { gitlabBotMention ); await gitlab.start(); + activePlatforms.push('GitLab'); } else { getLog().info('gitlab_adapter_skipped'); } @@ -388,6 +396,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { }); await discord.start(); + activePlatforms.push('Discord'); } else { getLog().info('discord_adapter_skipped'); } @@ -443,6 +452,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { }); await slack.start(); + activePlatforms.push('Slack'); } else { getLog().info('slack_adapter_skipped'); } @@ -461,7 +471,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { }); // Register Web UI API routes - registerApiRoutes(app, webAdapter, lockManager); + registerApiRoutes(app, webAdapter, lockManager, activePlatforms); // GitHub webhook endpoint if (github) { @@ -617,6 +627,7 @@ export async function startServer(opts: ServerOptions = {}): Promise { try { await telegramAdapter.start(); + activePlatforms.push('Telegram'); } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); getLog().error({ err: error, errorType: error.constructor.name }, 'telegram.start_failed'); @@ -679,15 +690,6 @@ export async function startServer(opts: ServerOptions = {}): Promise { // the try/catch in claude.ts). These are SDK cleanup races, not fatal app errors. process.on('unhandledRejection', handleUnhandledRejection); - // Show active platforms - const activePlatforms = ['Web']; - if (telegram) activePlatforms.push('Telegram'); - if (discord) activePlatforms.push('Discord'); - if (slack) activePlatforms.push('Slack'); - if (github) activePlatforms.push('GitHub'); - if (gitea) activePlatforms.push('Gitea'); - if (gitlab) activePlatforms.push('GitLab'); - getLog().info({ activePlatforms, port }, 'server_ready'); // Non-blocking: warn at startup if gh CLI auth is unavailable diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index 7ac7c60474..6448c77318 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -821,6 +821,7 @@ const getHealthRoute = createRoute({ runningWorkflows: z.number(), version: z.string().optional(), is_docker: z.boolean(), + activePlatforms: z.array(z.string()).optional(), }) .openapi('HealthResponse'), }, @@ -868,7 +869,8 @@ const getCostAnalyticsRoute = createRoute({ export function registerApiRoutes( app: OpenAPIHono, webAdapter: WebAdapter, - lockManager: ConversationLockManager + lockManager: ConversationLockManager, + activePlatforms?: readonly string[] ): void { function apiError( c: Context, @@ -2675,6 +2677,7 @@ export function registerApiRoutes( runningWorkflows: runningWorkflowRows.length, version: appVersion, is_docker: isDocker(), + activePlatforms: activePlatforms ? [...activePlatforms] : ['Web'], }); }); diff --git a/packages/web/src/lib/api.ts b/packages/web/src/lib/api.ts index cc52724301..8748878512 100644 --- a/packages/web/src/lib/api.ts +++ b/packages/web/src/lib/api.ts @@ -56,6 +56,7 @@ export interface HealthResponse { runningWorkflows: number; version?: string; is_docker: boolean; + activePlatforms?: string[]; } async function fetchJSON(url: string, options?: RequestInit): Promise { diff --git a/packages/web/src/routes/SettingsPage.tsx b/packages/web/src/routes/SettingsPage.tsx index 9ff8c33058..9add58d245 100644 --- a/packages/web/src/routes/SettingsPage.tsx +++ b/packages/web/src/routes/SettingsPage.tsx @@ -607,16 +607,19 @@ function AssistantConfigSection({ config }: { config: SafeConfigResponse }): Rea } function PlatformConnectionsSection({ - adapter, + activePlatforms, }: { - adapter: string | undefined; + activePlatforms: string[] | undefined; }): React.ReactElement { + const active = new Set(activePlatforms ?? []); const platforms = [ - { name: 'Web', connected: adapter === 'web' }, - { name: 'Slack', connected: false }, - { name: 'Telegram', connected: false }, - { name: 'Discord', connected: false }, - { name: 'GitHub', connected: false }, + { name: 'Web', connected: active.has('Web') }, + { name: 'Slack', connected: active.has('Slack') }, + { name: 'Telegram', connected: active.has('Telegram') }, + { name: 'Discord', connected: active.has('Discord') }, + { name: 'GitHub', connected: active.has('GitHub') }, + { name: 'Gitea', connected: active.has('Gitea') }, + { name: 'GitLab', connected: active.has('GitLab') }, ]; return ( @@ -717,7 +720,7 @@ export function SettingsPage(): React.ReactElement {
{configData && } - +
From 821f51b4ad92f3e1d4e1220ade406fc6ace60b28 Mon Sep 17 00:00:00 2001 From: Alex Siri Date: Tue, 21 Apr 2026 12:52:56 +0100 Subject: [PATCH 05/14] fix: initialize options.hooks before merging YAML node hooks (#1177) When a workflow node defines hooks (PreToolUse/PostToolUse) in YAML but no hooks exist yet on the options object, applyNodeConfig crashes with "undefined is not an object" because it tries to assign properties on the undefined options.hooks. Initialize options.hooks to {} before the merge loop. Reproduces with: archon workflow run archon-architect (which uses per-node hooks extensively). Co-authored-by: Claude Opus 4.6 (1M context) (cherry picked from commit 7ea321419f0cd48e71e9ebf12968f539bc4166bc) --- packages/providers/src/claude/provider.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts index 0821319317..7202f4e19e 100644 --- a/packages/providers/src/claude/provider.ts +++ b/packages/providers/src/claude/provider.ts @@ -381,6 +381,9 @@ async function applyNodeConfig( if (Object.keys(builtHooks).length > 0) { // Merge with existing hooks (PostToolUse capture hook) const existingHooks = options.hooks as SDKHooksMap | undefined; + if (!options.hooks) { + (options as Record).hooks = {}; + } for (const [event, matchers] of Object.entries(builtHooks)) { if (!matchers) continue; const existing = existingHooks?.[event] as HookCallbackMatcher[] | undefined; From c77ae63e10e04e291ad901b6fe04dcd28202fdef Mon Sep 17 00:00:00 2001 From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com> Date: Wed, 22 Apr 2026 08:47:46 +0300 Subject: [PATCH 06/14] fix: detect completion signal in any XML tag, not just (#1126) (#1184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: detect completion signal in any XML tag, not just (#1126) Loop nodes with `until:` reported max_iterations_reached when the AI wrapped the completion signal in XML tags other than `` (e.g., `ALL_CLEAN`). The three existing regex patterns all missed this format, causing the loop to exhaust iterations and fail. Changes: - Add generic XML-wrapped signal pattern to `detectCompletionSignal` - Extend `stripCompletionTags` to strip matched XML-wrapped signals from output - Pass `loop.until` to `stripCompletionTags` call site in dag-executor - Add unit tests for detection and stripping of XML-wrapped signals - Add integration test for loop completing on final iteration with XML tags Fixes #1126 * fix: address review findings for completion signal detection - Update detectCompletionSignal JSDoc to document all three detection formats - Update stripCompletionTags JSDoc to mention the `until` parameter - Remove superfluous `m` flag from xmlWrappedPattern (no anchors, no effect) - Document that XML tag names are matched independently (intentional permissiveness) - Add test: detects signal in mismatched XML tags (permissive behavior) - Add test: strips both and XML-tagged signal in same chunk - Add assertion in DAG integration test that raw XML tags don't appear in sent messages * simplify: reduce complexity in changed files * fix: require matching XML tag names in completion-signal detection Follow-up to the initial broadening in this PR. The first version of the regex accepted mismatched open/close tags (e.g. `X`) which was a small false-positive surface when the AI interleaves tags in prose. Tightens both detectCompletionSignal and stripCompletionTags to capture the tag name and enforce it on the close via \1 backreference. Case-insensitivity on the tag name is preserved. Test updates: - Flip the "permissive mismatch" case to assert strict rejection with a comment explaining the guard. - Add a case-insensitive matching case to lock that behavior in. No behavior change for workflows that use matching tags (the overwhelming common case) or for .... Behavior change is limited to the narrow "open tag and close tag disagree" case, which only happens when the AI is confused — in which case we'd rather report max_iterations_reached and let the author inspect than silently call the loop complete. (cherry picked from commit bc25deefbaf38a115815e631940d2989bad9381f) --- packages/workflows/src/dag-executor.test.ts | 69 +++++++++++++++++++ packages/workflows/src/dag-executor.ts | 2 +- .../workflows/src/executor-shared.test.ts | 64 +++++++++++++++++ packages/workflows/src/executor-shared.ts | 45 +++++++++--- 4 files changed, 168 insertions(+), 12 deletions(-) diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 0c745b39e5..03b4e77f91 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -2935,6 +2935,75 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { ).toBe(1); }); + it('completes on final iteration with XML-wrapped signal (SIGNAL)', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount < 3) { + yield { type: 'assistant', content: `Iteration ${String(callCount)} progress` }; + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + } else { + // Final iteration uses tag instead of + yield { type: 'assistant', content: 'All clean! ALL_CLEAN' }; + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-xml-tag', + nodes: [ + { + id: 'fix-and-review', + loop: { + prompt: 'Fix and review. When done, output ALL_CLEAN.', + until: 'ALL_CLEAN', + max_iterations: 3, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // 3 iterations run, signal found on iteration 3 → completed, NOT failed + expect(mockSendQueryDag.mock.calls.length).toBe(3); + expect( + ( + mockDeps.store.completeWorkflowRun as Mock< + (id: string, metadata?: Record) => Promise + > + ).mock.calls.length + ).toBe(1); + expect( + (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise>).mock + .calls.length + ).toBe(0); + // Verify stripping: raw XML completion tags must not appear in user-visible output + const allSentMessages = ( + platform.sendMessage as Mock<(...args: unknown[]) => Promise> + ).mock.calls + .map((call: unknown[]) => call[1] as string) + .join(''); + expect(allSentMessages).not.toContain(''); + expect(allSentMessages).not.toContain(''); + }); + it('loop node output available to downstream nodes via $nodeId.output', async () => { let loopCallCount = 0; mockSendQueryDag.mockImplementation(function* (prompt: string) { diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index c363f4ce3f..a60f4b7b72 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -1619,7 +1619,7 @@ async function executeLoopNode( })) { if (msg.type === 'assistant') { fullOutput += msg.content; - const cleaned = stripCompletionTags(msg.content); + const cleaned = stripCompletionTags(msg.content, loop.until); cleanOutput += cleaned; if (platform.getStreamingMode() === 'stream' && cleaned) { await safeSendMessage(platform, conversationId, cleaned, msgContext); diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts index bb6456383f..413e8bbc47 100644 --- a/packages/workflows/src/executor-shared.test.ts +++ b/packages/workflows/src/executor-shared.test.ts @@ -22,6 +22,8 @@ import { substituteWorkflowVariables, buildPromptWithContext, detectCreditExhaustion, + detectCompletionSignal, + stripCompletionTags, isInlineScript, } from './executor-shared'; @@ -414,3 +416,65 @@ describe('isInlineScript', () => { expect(isInlineScript('')).toBe(false); }); }); + +describe('detectCompletionSignal', () => { + it('detects SIGNAL format', () => { + expect(detectCompletionSignal('COMPLETE', 'COMPLETE')).toBe(true); + }); + + it('detects signal in custom XML tags: SIGNAL', () => { + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(true); + }); + + it('detects signal in other XML tag names', () => { + expect(detectCompletionSignal('COMPLETE', 'COMPLETE')).toBe(true); + expect(detectCompletionSignal('DONE', 'DONE')).toBe(true); + }); + + it('detects plain signal at end of output', () => { + expect(detectCompletionSignal('Work done. COMPLETE', 'COMPLETE')).toBe(true); + }); + + it('detects plain signal on its own line', () => { + expect(detectCompletionSignal('Work done.\nCOMPLETE\nExtra text', 'COMPLETE')).toBe(true); + }); + + it('does not detect signal embedded in prose', () => { + expect(detectCompletionSignal('The status is not COMPLETE yet.', 'COMPLETE')).toBe(false); + }); + + it('does not detect signal when wrong value is in tags', () => { + expect(detectCompletionSignal('WRONG', 'ALL_CLEAN')).toBe(false); + }); + + it('does NOT detect signal when XML tag names do not match (strict)', () => { + // Open/close tag names must agree — guards against AI prose that + // interleaves tags (e.g. "ALL_CLEAN") being + // treated as a completion. + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(false); + }); + + it('detects signal when tag names match case-insensitively', () => { + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(true); + }); +}); + +describe('stripCompletionTags', () => { + it('strips tags', () => { + expect(stripCompletionTags('Done. COMPLETE')).toBe('Done.'); + }); + + it('strips XML-wrapped signal when until is provided', () => { + expect(stripCompletionTags('Done. ALL_CLEAN', 'ALL_CLEAN')).toBe('Done.'); + }); + + it('does not strip XML tags when until is not provided', () => { + const input = 'Done. ALL_CLEAN'; + expect(stripCompletionTags(input)).toBe(input.trim()); + }); + + it('strips both and XML-tagged signal when until is provided', () => { + const input = 'Done. ALL_CLEAN ALL_CLEAN'; + expect(stripCompletionTags(input, 'ALL_CLEAN')).toBe('Done.'); + }); +}); diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index f00f5c6ad6..b60ceacc35 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -384,18 +384,26 @@ function escapeRegExp(str: string): string { /** * Detect whether the AI output contains a completion signal. * - * Supports two formats: + * Supports three formats, checked in order: * 1. SIGNAL - Recommended; prevents false positives in prose - * 2. Plain SIGNAL - Backwards compatibility; only at end of output or on own line + * 2. SIGNAL - Any XML-wrapped tag; case-insensitive on tag names + * 3. Plain SIGNAL - Backwards compatibility; only at end of output or on own line * - * The tag format uses case-insensitive matching for the tags. - * Plain signal detection is restrictive to prevent false positives. + * Tag matching uses a backreference (\1) so opening and closing tag names must + * agree — `X` is not treated as a completion, which avoids + * false positives when the AI interleaves tags in prose. + * + * Plain signal detection is restrictive to prevent false positives like "not SIGNAL yet". */ export function detectCompletionSignal(output: string, signal: string): boolean { - // Check for SIGNAL format (recommended - prevents false positives) - // Case-insensitive for tags - const promisePattern = new RegExp(`\\s*${escapeRegExp(signal)}\\s*`, 'i'); - if (promisePattern.test(output)) { + // Check for XML-like tag wrapping with matching open/close names: SIGNAL. + // Catches COMPLETE, ALL_CLEAN, X. + // The `([a-zA-Z][\w-]*)` capture plus `` backreference requires tag names to match. + const xmlWrappedPattern = new RegExp( + `<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapeRegExp(signal)}\\s*`, + 'i' + ); + if (xmlWrappedPattern.test(output)) { return true; } // Plain signal detection - restrictive to prevent false positives like "not COMPLETE yet" @@ -407,9 +415,24 @@ export function detectCompletionSignal(output: string, signal: string): boolean return endPattern.test(output) || ownLinePattern.test(output); } -/** Strip internal completion signal tags before sending to user-facing output. */ -export function stripCompletionTags(content: string): string { - return content.replace(/[\s\S]*?<\/promise>/gi, '').trim(); +/** + * Strip internal completion signal tags before sending to user-facing output. + * Always strips `` (any content). When `until` is provided, + * also strips any XML-wrapped form of that signal with matching tag names + * (e.g. `ALL_CLEAN`). Mismatched tag names are left alone + * so regular prose (`ALL_CLEAN`) isn't accidentally rewritten. + */ +export function stripCompletionTags(content: string, until?: string): string { + let result = content.replace(/[\s\S]*?<\/promise>/gi, ''); + if (until) { + // Strip XML-tagged completion signals with matching open/close tag names. + const escapedSignal = escapeRegExp(until); + result = result.replace( + new RegExp(`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapedSignal}\\s*`, 'gi'), + '' + ); + } + return result.trim(); } /** From 4c863d4c448bd2b1b08489591484c44a32f5599a Mon Sep 17 00:00:00 2001 From: Ahmed <44034059+medevs@users.noreply.github.com> Date: Wed, 22 Apr 2026 08:13:18 +0200 Subject: [PATCH 07/14] fix(web): allow deleting nodes from Workflow Builder (#971) (#1113) * fix(web): allow deleting nodes from Workflow Builder (#971) Three independent gaps prevented users from deleting nodes added to the Workflow Builder canvas: dropped nodes were never auto-selected so keyboard shortcuts silently no-oped, no right-click context menu existed, and the Delete Node button was buried in the Advanced tab (hidden below the viewport for Prompt/Command, completely absent for Bash since bash nodes have no Advanced tab). Fixes #971. * fix(web): push undo snapshot before adding nodes on canvas Call onPushSnapshot() before setNodes() in both onDrop and quick-add handlers so that node additions are captured by undo/redo history. Co-Authored-By: Claude Opus 4.6 (1M context) * fix(web): address PR #1113 review feedback - Hold nodes/edges in refs so handleNodeDeleteById and onPushSnapshot can't capture stale pre-drop state (fixes undo-stack correctness). - Clamp context-menu x/y to viewport so right-click near edges stays fully on-screen. - Drop non-conformant role=menu/menuitem from the single-item context menu; rely on the native button for accessibility. - Extend isInputTarget() to cover ARIA combobox/textbox/searchbox so Backspace in Radix/shadcn widgets never nukes a node. - Extract handleBuilderKeydown as a pure function and add tests covering the Delete/Backspace + isInputTarget invariant. - Remove issue-number references from code comments per CLAUDE.md. - Document the new delete affordances in the Workflow Builder docs. - Inline context-menu dismissal, rename pointer handler, drop unused deps in keyboardActions useMemo. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.6 (1M context) (cherry picked from commit d7f36b22ddcaa337cc1ab4ec152497b19c176056) --- .../docs-web/src/content/docs/adapters/web.md | 1 + packages/web/package.json | 2 +- .../components/workflows/NodeInspector.tsx | 25 +-- .../components/workflows/WorkflowBuilder.tsx | 46 +++-- .../components/workflows/WorkflowCanvas.tsx | 80 +++++++- .../web/src/hooks/useBuilderKeyboard.test.ts | 136 +++++++++++++ packages/web/src/hooks/useBuilderKeyboard.ts | 186 ++++++++++-------- 7 files changed, 362 insertions(+), 114 deletions(-) create mode 100644 packages/web/src/hooks/useBuilderKeyboard.test.ts diff --git a/packages/docs-web/src/content/docs/adapters/web.md b/packages/docs-web/src/content/docs/adapters/web.md index 7a3aeebb86..0025ca0219 100644 --- a/packages/docs-web/src/content/docs/adapters/web.md +++ b/packages/docs-web/src/content/docs/adapters/web.md @@ -172,6 +172,7 @@ The Workflow Builder at `/workflows/builder` provides a visual editor for creati - **Command picker** -- Browse available commands when configuring command nodes - **Validation panel** -- Real-time validation feedback as you build - **Undo/redo** -- Full undo/redo stack with keyboard shortcuts +- **Delete node** -- Remove a selected node with `Delete` or `Backspace`, the Delete button in the inspector header, or the right-click context menu on any node - **Save** -- Saves the workflow YAML to your project's `.archon/workflows/` directory You can also browse existing workflows on the `/workflows` page and open any of them in the builder to edit. diff --git a/packages/web/package.json b/packages/web/package.json index 8deb2ed573..ad976cff54 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -8,7 +8,7 @@ "build": "tsc --noEmit && vite build", "preview": "vite preview", "type-check": "tsc --noEmit", - "test": "bun test src/lib/ && bun test src/stores/", + "test": "bun test src/lib/ && bun test src/stores/ && bun test src/hooks/", "generate:types": "openapi-typescript http://localhost:3090/api/openapi.json -o src/lib/api.generated.d.ts" }, "dependencies": { diff --git a/packages/web/src/components/workflows/NodeInspector.tsx b/packages/web/src/components/workflows/NodeInspector.tsx index 1dfd797570..1d4748fecc 100644 --- a/packages/web/src/components/workflows/NodeInspector.tsx +++ b/packages/web/src/components/workflows/NodeInspector.tsx @@ -642,11 +642,9 @@ function JsonTextareaField({ function AdvancedTab({ node, onUpdate, - onDelete, }: { node: DagNodeData; onUpdate: (updates: Partial) => void; - onDelete: () => void; }): React.ReactElement { return (
@@ -696,12 +694,6 @@ function AdvancedTab({ onUpdate({ hooks: v }); }} /> - -
- -
); } @@ -718,14 +710,23 @@ function DagInspector({ return (
{/* Header */} -
- +
+ {node.label || node.id} +
diff --git a/packages/web/src/components/workflows/WorkflowCanvas.tsx b/packages/web/src/components/workflows/WorkflowCanvas.tsx index f784c67c4f..e1c6170b16 100644 --- a/packages/web/src/components/workflows/WorkflowCanvas.tsx +++ b/packages/web/src/components/workflows/WorkflowCanvas.tsx @@ -82,6 +82,7 @@ interface WorkflowCanvasProps { setNodes: React.Dispatch>; setEdges: React.Dispatch>; onNodeSelect: (nodeId: string | null) => void; + onNodeDelete: (nodeId: string) => void; onDirty: () => void; onPushSnapshot?: () => void; commands: CommandEntry[]; @@ -100,12 +101,19 @@ export function WorkflowCanvas({ setNodes, setEdges, onNodeSelect, + onNodeDelete, onDirty, onPushSnapshot, commands, }: WorkflowCanvasProps): React.ReactElement { const { screenToFlowPosition } = useReactFlow(); const [quickAddPosition, setQuickAddPosition] = useState(null); + const [contextMenu, setContextMenu] = useState<{ + x: number; + y: number; + nodeId: string; + } | null>(null); + const contextMenuRef = useRef(null); const nodeTypes: NodeTypes = useMemo(() => ({ dagNode: dagNodeComponent }), []); @@ -164,10 +172,12 @@ export function WorkflowCanvas({ }, }; + onPushSnapshot?.(); setNodes(nds => [...nds, newNode]); + onNodeSelect(id); onDirty(); }, - [screenToFlowPosition, setNodes, onDirty] + [screenToFlowPosition, setNodes, onNodeSelect, onDirty, onPushSnapshot] ); // Track whether we've already pushed a snapshot for the current drag gesture @@ -278,17 +288,63 @@ export function WorkflowCanvas({ }, }; + onPushSnapshot?.(); setNodes(nds => [...nds, newNode]); + onNodeSelect(id); onDirty(); setQuickAddPosition(null); }, - [quickAddPosition, setNodes, onDirty] + [quickAddPosition, setNodes, onNodeSelect, onDirty, onPushSnapshot] ); const handleQuickAddClose = useCallback(() => { setQuickAddPosition(null); }, []); + // Approximate menu size used for viewport-edge clamping. + const CONTEXT_MENU_WIDTH = 160; + const CONTEXT_MENU_HEIGHT = 40; + + const handleNodeContextMenu = useCallback( + (e: React.MouseEvent, node: DagFlowNode) => { + e.preventDefault(); + onNodeSelect(node.id); + const x = Math.min(e.clientX, window.innerWidth - CONTEXT_MENU_WIDTH); + const y = Math.min(e.clientY, window.innerHeight - CONTEXT_MENU_HEIGHT); + setContextMenu({ x, y, nodeId: node.id }); + }, + [onNodeSelect] + ); + + // Dismiss the context menu on Escape or any click/contextmenu outside it. + useEffect(() => { + if (!contextMenu) return; + + const onKey = (e: KeyboardEvent): void => { + if (e.key === 'Escape') setContextMenu(null); + }; + const onClickOutside = (e: MouseEvent): void => { + if ( + contextMenuRef.current && + e.target instanceof Node && + contextMenuRef.current.contains(e.target) + ) { + return; + } + setContextMenu(null); + }; + + window.addEventListener('keydown', onKey); + // Use capture so we beat ReactFlow's own handlers and any stopPropagation. + window.addEventListener('mousedown', onClickOutside, true); + window.addEventListener('contextmenu', onClickOutside, true); + return (): void => { + window.removeEventListener('keydown', onKey); + window.removeEventListener('mousedown', onClickOutside, true); + window.removeEventListener('contextmenu', onClickOutside, true); + }; + }, [contextMenu]); + return (
{ onNodeSelect(node.id); }} + onNodeContextMenu={handleNodeContextMenu} onPaneClick={handlePaneClick} nodeTypes={nodeTypes} panOnDrag @@ -324,6 +381,25 @@ export function WorkflowCanvas({ commands={commands} /> )} + + {contextMenu && ( +
+ +
+ )}
); } diff --git a/packages/web/src/hooks/useBuilderKeyboard.test.ts b/packages/web/src/hooks/useBuilderKeyboard.test.ts new file mode 100644 index 0000000000..8239741657 --- /dev/null +++ b/packages/web/src/hooks/useBuilderKeyboard.test.ts @@ -0,0 +1,136 @@ +import { describe, test, expect, mock, beforeEach } from 'bun:test'; +import { + handleBuilderKeydown, + isInputTarget, + type BuilderKeyboardActions, +} from './useBuilderKeyboard'; + +function makeActions(): BuilderKeyboardActions & { + calls: Record; +} { + const calls: Record = {}; + const bump = (name: string): (() => void) => { + return (): void => { + calls[name] = (calls[name] ?? 0) + 1; + }; + }; + return { + calls, + onSave: bump('onSave'), + onUndo: bump('onUndo'), + onRedo: bump('onRedo'), + onToggleLibrary: bump('onToggleLibrary'), + onToggleYaml: bump('onToggleYaml'), + onToggleValidation: bump('onToggleValidation'), + onAddPrompt: bump('onAddPrompt'), + onAddBash: bump('onAddBash'), + onDeleteSelected: bump('onDeleteSelected'), + onDuplicateSelected: bump('onDuplicateSelected'), + onQuickAdd: bump('onQuickAdd'), + onFitView: bump('onFitView'), + onSelectAll: bump('onSelectAll'), + }; +} + +function makeEvent( + key: string, + target: { tagName?: string; isContentEditable?: boolean; role?: string } | null +): KeyboardEvent { + const el = + target === null + ? null + : ({ + tagName: target.tagName ?? 'DIV', + isContentEditable: target.isContentEditable ?? false, + getAttribute: (name: string): string | null => + name === 'role' ? (target.role ?? null) : null, + } as unknown as HTMLElement); + return { + key, + target: el, + metaKey: false, + ctrlKey: false, + shiftKey: false, + preventDefault: mock(() => {}), + } as unknown as KeyboardEvent; +} + +describe('isInputTarget', () => { + test('returns true for INPUT, TEXTAREA, SELECT', () => { + expect(isInputTarget(makeEvent('a', { tagName: 'INPUT' }))).toBe(true); + expect(isInputTarget(makeEvent('a', { tagName: 'TEXTAREA' }))).toBe(true); + expect(isInputTarget(makeEvent('a', { tagName: 'SELECT' }))).toBe(true); + }); + + test('returns true for contentEditable elements', () => { + expect(isInputTarget(makeEvent('a', { tagName: 'DIV', isContentEditable: true }))).toBe(true); + }); + + test('returns true for ARIA editable roles (combobox, textbox, searchbox)', () => { + expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'combobox' }))).toBe(true); + expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'textbox' }))).toBe(true); + expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'searchbox' }))).toBe(true); + }); + + test('returns false for regular elements without editable role', () => { + expect(isInputTarget(makeEvent('a', { tagName: 'DIV' }))).toBe(false); + expect(isInputTarget(makeEvent('a', { tagName: 'BUTTON' }))).toBe(false); + expect(isInputTarget(makeEvent('a', { tagName: 'DIV', role: 'menu' }))).toBe(false); + }); + + test('returns false when target is null', () => { + expect(isInputTarget(makeEvent('a', null))).toBe(false); + }); +}); + +describe('handleBuilderKeydown — delete invariant', () => { + let actions: ReturnType; + + beforeEach(() => { + actions = makeActions(); + }); + + test('Delete key on canvas triggers onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV' }), actions); + expect(actions.calls.onDeleteSelected).toBe(1); + }); + + test('Backspace key on canvas triggers onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV' }), actions); + expect(actions.calls.onDeleteSelected).toBe(1); + }); + + test('Backspace in INPUT does NOT trigger onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Backspace', { tagName: 'INPUT' }), actions); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); + + test('Backspace in TEXTAREA does NOT trigger onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Backspace', { tagName: 'TEXTAREA' }), actions); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); + + test('Backspace in contentEditable does NOT trigger onDeleteSelected', () => { + handleBuilderKeydown( + makeEvent('Backspace', { tagName: 'DIV', isContentEditable: true }), + actions + ); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); + + test('Backspace in ARIA combobox does NOT trigger onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV', role: 'combobox' }), actions); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); + + test('Delete in ARIA textbox does NOT trigger onDeleteSelected', () => { + handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV', role: 'textbox' }), actions); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); + + test('enabled=false suppresses all shortcuts', () => { + handleBuilderKeydown(makeEvent('Delete', { tagName: 'DIV' }), actions, false); + handleBuilderKeydown(makeEvent('Backspace', { tagName: 'DIV' }), actions, false); + expect(actions.calls.onDeleteSelected).toBeUndefined(); + }); +}); diff --git a/packages/web/src/hooks/useBuilderKeyboard.ts b/packages/web/src/hooks/useBuilderKeyboard.ts index 192f29bd2b..89343331bd 100644 --- a/packages/web/src/hooks/useBuilderKeyboard.ts +++ b/packages/web/src/hooks/useBuilderKeyboard.ts @@ -1,6 +1,6 @@ import { useEffect, useCallback } from 'react'; -interface BuilderKeyboardActions { +export interface BuilderKeyboardActions { onSave: () => void; onUndo: () => void; onRedo: () => void; @@ -16,97 +16,113 @@ interface BuilderKeyboardActions { onSelectAll?: () => void; } -function isInputTarget(e: KeyboardEvent): boolean { - const tag = (e.target as HTMLElement).tagName; - return ( - tag === 'INPUT' || - tag === 'TEXTAREA' || - tag === 'SELECT' || - (e.target as HTMLElement).isContentEditable - ); +const EDITABLE_ARIA_ROLES = new Set(['combobox', 'textbox', 'searchbox']); + +export function isInputTarget(e: KeyboardEvent): boolean { + const target = e.target as HTMLElement | null; + if (!target) return false; + const tag = target.tagName; + if (tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT') return true; + if (target.isContentEditable) return true; + const role = target.getAttribute?.('role'); + if (role && EDITABLE_ARIA_ROLES.has(role)) return true; + return false; } -export function useBuilderKeyboard(actions: BuilderKeyboardActions, enabled = true): void { - const handleKeyDown = useCallback( - (e: KeyboardEvent) => { - if (!enabled) return; +export function handleBuilderKeydown( + e: KeyboardEvent, + actions: BuilderKeyboardActions, + enabled = true +): void { + if (!enabled) return; - const mod = e.metaKey || e.ctrlKey; - const inInput = isInputTarget(e); + const mod = e.metaKey || e.ctrlKey; + const inInput = isInputTarget(e); - // --- Always-active shortcuts (even in inputs) --- - if (mod) { - if (e.key === 's') { - e.preventDefault(); - actions.onSave(); - return; - } - if (e.key === 'z' && e.shiftKey) { - e.preventDefault(); - actions.onRedo(); - return; - } - if (e.key === 'z') { - e.preventDefault(); - actions.onUndo(); - return; - } - if (e.key === '\\') { - e.preventDefault(); - actions.onToggleLibrary(); - return; - } - if (e.key === 'j') { - e.preventDefault(); - actions.onToggleYaml(); - return; - } - if (e.key === '.') { - e.preventDefault(); - actions.onToggleValidation(); - return; - } - } + // --- Always-active shortcuts (even in inputs) --- + if (mod) { + if (e.key === 's') { + e.preventDefault(); + actions.onSave(); + return; + } + if (e.key === 'z' && e.shiftKey) { + e.preventDefault(); + actions.onRedo(); + return; + } + if (e.key === 'z') { + e.preventDefault(); + actions.onUndo(); + return; + } + if (e.key === '\\') { + e.preventDefault(); + actions.onToggleLibrary(); + return; + } + if (e.key === 'j') { + e.preventDefault(); + actions.onToggleYaml(); + return; + } + if (e.key === '.') { + e.preventDefault(); + actions.onToggleValidation(); + return; + } + } - // --- Only when NOT in input/textarea --- - if (inInput) return; + // --- Only when NOT in input/textarea --- + if (inInput) return; - if (mod) { - if (e.key === 'd') { - e.preventDefault(); - actions.onDuplicateSelected(); - return; - } - if (e.key === '0') { - e.preventDefault(); - actions.onFitView?.(); - return; - } - if (e.key === 'a') { - e.preventDefault(); - actions.onSelectAll?.(); - return; - } - } + if (mod) { + if (e.key === 'd') { + e.preventDefault(); + actions.onDuplicateSelected(); + return; + } + if (e.key === '0') { + e.preventDefault(); + actions.onFitView?.(); + return; + } + if (e.key === 'a') { + e.preventDefault(); + actions.onSelectAll?.(); + return; + } + } - // Single-key shortcuts - switch (e.key) { - case 'n': - actions.onQuickAdd?.(); - break; - case 'p': - actions.onAddPrompt(); - break; - case 'b': - actions.onAddBash(); - break; - case 'Delete': - actions.onDeleteSelected(); - break; - case 'f': - actions.onFitView?.(); - break; - } + // Single-key shortcuts + switch (e.key) { + case 'n': + actions.onQuickAdd?.(); + break; + case 'p': + actions.onAddPrompt(); + break; + case 'b': + actions.onAddBash(); + break; + case 'Delete': + case 'Backspace': + // Backspace is the natural delete key on macOS keyboards, which lack + // a dedicated Delete key. The isInputTarget() guard above prevents + // this from interfering with text fields. + e.preventDefault(); + actions.onDeleteSelected(); + break; + case 'f': + actions.onFitView?.(); + break; + } +} + +export function useBuilderKeyboard(actions: BuilderKeyboardActions, enabled = true): void { + const handleKeyDown = useCallback( + (e: KeyboardEvent) => { + handleBuilderKeydown(e, actions, enabled); }, [actions, enabled] ); From 883258df772f75c550888b42086fce9cf086c069 Mon Sep 17 00:00:00 2001 From: CauchYoung <2024302072042@whu.edu.cn> Date: Wed, 22 Apr 2026 15:18:27 +0800 Subject: [PATCH 08/14] fix(workflows): make archon-adversarial-dev sed replacement macOS-safe (#1155) * fix(workflows): make adversarial init sed portable on macOS * chore: regenerate bundled-defaults after adversarial-dev sed fix Sync generated bundle with the new temp-file sed pattern in archon-adversarial-dev.yaml so check:bundled passes and binary distributions ship the macOS-safe version. --------- Co-authored-by: laplace young Co-authored-by: Rasmus Widing (cherry picked from commit 817186d446ed5e01cd13d393abfa734ef5ac730f) --- .archon/workflows/defaults/archon-adversarial-dev.yaml | 4 +++- .../workflows/src/defaults/bundled-defaults.generated.ts | 2 +- packages/workflows/src/defaults/bundled-defaults.test.ts | 9 +++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml index 2ab207dc03..68722c8b1a 100644 --- a/.archon/workflows/defaults/archon-adversarial-dev.yaml +++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml @@ -101,7 +101,9 @@ nodes: "status": "running" } STATEEOF - sed -i "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" + STATE_TMP="$ARTIFACTS/state.json.tmp" + sed "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" > "$STATE_TMP" + mv "$STATE_TMP" "$ARTIFACTS/state.json" echo "{\"totalSprints\": $SPRINT_COUNT, \"appDir\": \"$ARTIFACTS/app\", \"artifactsDir\": \"$ARTIFACTS\"}" timeout: 30000 diff --git a/packages/workflows/src/defaults/bundled-defaults.generated.ts b/packages/workflows/src/defaults/bundled-defaults.generated.ts index 79f7059f0a..c214874c3f 100644 --- a/packages/workflows/src/defaults/bundled-defaults.generated.ts +++ b/packages/workflows/src/defaults/bundled-defaults.generated.ts @@ -55,7 +55,7 @@ export const BUNDLED_COMMANDS: Record = { // Bundled default workflows (22 total) export const BUNDLED_WORKFLOWS: Record = { - "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n sed -i \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", + "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", "archon-architect": "name: archon-architect\ndescription: |\n Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n \"clean up architecture\", \"codebase health\", \"fix architecture\".\n Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n DAG workflow showcasing per-node hooks:\n - PostToolUse hooks create organic quality loops (lint after write, self-review)\n - PreToolUse hooks inject architectural principles before changes\n - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: MEASURE\n # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-metrics\n bash: |\n echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n echo \"\"\n echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 8 ]; then\n echo \"$count imports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n | head -30\n\n echo \"\"\n echo \"=== TYPE SAFETY GAPS ===\"\n echo \"any usage:\"\n grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n echo \"eslint-disable comments:\"\n grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE\n # Read through hotspots with an architectural lens\n # Hooks inject assessment criteria after every file read\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze\n prompt: |\n You are a senior software architect performing a codebase health assessment.\n\n ## Codebase Metrics\n\n $scan-metrics.output\n\n ## User Focus\n\n $ARGUMENTS\n\n ## Instructions\n\n 1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n 2. For each file, assess the criteria injected after you read it (you'll see them)\n 3. Build a running list of architectural concerns\n 4. Focus on:\n - Modules doing too many things (SRP violations)\n - Abstractions that don't earn their complexity\n - Duplicated patterns that should be consolidated (Rule of Three)\n - God files or god functions\n - Leaky abstractions or tight coupling between layers\n - Dead code or unused exports\n 5. Do NOT suggest changes yet — only diagnose\n\n ## Output\n\n Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n - Executive summary (3-5 sentences)\n - Top findings ranked by impact\n - For each finding: file, what's wrong, why it matters, estimated effort\n depends_on: [scan-metrics]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n hooks:\n PostToolUse:\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n For the file you just read, assess:\n (1) Single responsibility — does this module do exactly one thing?\n (2) Cognitive load — could a new team member understand this in 5 minutes?\n (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n (4) Dependency direction — does this file depend on things at its own level or below, not above?\n Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN\n # Prioritize and scope the changes — pure reasoning, no tools\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan\n prompt: |\n You are planning targeted architectural improvements.\n\n ## Assessment\n\n $analyze.output\n\n ## Principles\n\n - KISS: prefer straightforward over clever\n - YAGNI: remove speculative abstractions\n - Rule of Three: only extract when a pattern appears 3+ times\n - Each change must be independently revertable\n - Do NOT mix refactoring with behavior changes\n - Scope to what can be done safely in one pass (max 5-7 files)\n\n ## Instructions\n\n 1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n 2. For each, write a precise plan: which file, what to change, why\n 3. Order them so each change is independent (no cascading dependencies between changes)\n 4. Estimate blast radius — how many other files are affected\n\n ## Output\n\n Write the plan as a numbered list. Be specific about exactly what code to change.\n Keep it concise — the implement node will follow this literally.\n depends_on: [analyze]\n allowed_tools: [Read]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE\n # Make the changes with hooks creating quality feedback loops\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n prompt: |\n You are implementing targeted architectural simplifications.\n\n ## Plan\n\n $plan.output\n\n ## Rules\n\n - Follow the plan exactly — do not add extra improvements you notice along the way\n - Each change must preserve existing behavior (refactor only, no feature changes)\n - After each file edit, you'll be prompted to validate — follow those instructions\n - If a change turns out to be harder than expected, skip it and move on\n - Commit each logical change separately with a clear commit message\n\n ## Instructions\n\n 1. Work through the plan items in order\n 2. For each item: read the file, make the change, follow the post-edit checklist\n 3. After all changes, do a final `git diff --stat` to verify scope\n depends_on: [plan]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before writing: Is this file in your plan? If not, explain why you're\n touching it. Check how many files import from this module — changes to\n widely-imported modules need extra scrutiny.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. Do these things NOW before moving on:\n 1. Run the type checker to verify your change compiles\n 2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n 3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Before modifying this file, consider: will your change reduce or increase\n the number of concepts a reader needs to hold in their head?\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If the command failed, diagnose the root cause\n before attempting a fix. Do not blindly retry.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # Run full validation suite — bash only, cannot edit to \"fix\" failures\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n # Always exit 0 so downstream nodes can read output and decide\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [simplify]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only runs if validate failed — focused fix with same quality hooks\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE PR\n # Hooks ensure this node only does git operations\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the architectural improvements.\n\n ## Context\n\n - Architecture assessment: $analyze.output\n - Plan: $plan.output\n - Validation: $validate.output\n\n ## Instructions\n\n 1. Stage all changes and create a single commit (or verify existing commits)\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with:\n - Title: concise description of what was simplified (under 70 chars)\n - Body: use the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Body Format\n\n ```markdown\n ## Architectural Sweep\n\n **Focus**: $ARGUMENTS\n\n ### Assessment\n\n [3-5 sentence summary from the architecture assessment]\n\n ### Changes\n\n [For each change: what file, what was simplified, why]\n\n ### Validation\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass\n - [x] Each change preserves existing behavior\n ```\n depends_on: [fix-failures]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-assist": "name: archon-assist\ndescription: |\n Use when: No other workflow matches the request.\n Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help.\n Capability: Full Claude Code agent with all tools available.\n Note: Will inform user when assist mode is used for tracking.\n\nnodes:\n - id: assist\n command: archon-assist\n", "archon-comprehensive-pr-review": "name: archon-comprehensive-pr-review\ndescription: |\n Use when: User wants a comprehensive code review of a pull request with automatic fixes.\n Triggers: \"review this PR\", \"review PR #123\", \"comprehensive review\", \"full PR review\",\n \"review and fix\", \"check this PR\", \"code review\".\n Does: Syncs PR with main (rebase if needed) -> runs 5 specialized review agents in parallel ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues -> reports remaining issues.\n NOT for: Quick questions about a PR, checking CI status, simple \"what changed\" queries.\n\n This workflow produces artifacts in $ARTIFACTS_DIR/../reviews/pr-{number}/ and posts\n a comprehensive review comment to the GitHub PR.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n", diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index 1455b2ca0c..ef8887072d 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -101,6 +101,15 @@ describe('bundled-defaults', () => { expect(content).toContain('workflow_name'); }); + it('archon-adversarial-dev init-workspace should avoid non-portable sed -i', () => { + const content = BUNDLED_WORKFLOWS['archon-adversarial-dev']; + expect(content).toContain('STATE_TMP="$ARTIFACTS/state.json.tmp"'); + expect(content).toContain( + 'sed "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/" "$ARTIFACTS/state.json" > "$STATE_TMP"' + ); + expect(content).not.toContain('sed -i "s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/"'); + }); + it('should have valid YAML structure', () => { for (const content of Object.values(BUNDLED_WORKFLOWS)) { expect(content).toContain('name:'); From b4f67f9f9531d1949ae95584af4247eb82ef5bcd Mon Sep 17 00:00:00 2001 From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:54:25 +0300 Subject: [PATCH 09/14] fix(deps): override transitive axios to ^1.15.0 for CVE-2025-62718 (#1330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit axios <1.15.0 can be coerced to bypass NO_PROXY rules via hostname normalization, enabling SSRF in the right network shape. Archon pulls axios transitively through @slack/bolt (^1.12.0) and @slack/web-api (^1.13.5); before this change bun.lock resolved axios@1.13.6 — within the vulnerable range. Adding "axios": "^1.15.0" to the root package.json overrides bumps the transitive resolution to axios@1.15.1 (latest compatible 1.x). Both Slack range specs accept it without API surface changes — no downstream code touches axios directly. Supersedes #1153. Credits @stefans71 for identifying and reporting the vulnerability; their PR was stale on the lockfile (0.3.5 → 0.3.6 drift on dev), so this is a fresh one-line re-do on current dev. Closes #1053. Co-authored-by: Stefans71 (cherry picked from commit ae2d9361bc3a063fd483aa89124d15a934a13a00) --- CHANGELOG.md | 1 + bun.lock | 26 +++++++++++++------------- package.json | 3 ++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a5efeb66b..6ff712f205 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053. - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon ` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216. ### Changed diff --git a/bun.lock b/bun.lock index 8599602c73..8f1fcec74e 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 1, "workspaces": { "": { "name": "archon", @@ -23,7 +22,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +40,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.3.6", + "version": "0.4.0", "bin": { "archon": "./src/cli.ts", }, @@ -63,7 +62,7 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", @@ -83,7 +82,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +91,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +101,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,7 +112,7 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "dotenv": "^17", "pino": "^9", @@ -141,7 +140,7 @@ }, "packages/server": { "name": "@archon/server", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", @@ -160,7 +159,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -212,7 +211,7 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.3.6", + "version": "0.4.0", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -226,6 +225,7 @@ }, }, "overrides": { + "axios": "^1.15.0", "test-exclude": "^7.0.1", }, "packages": { @@ -1043,7 +1043,7 @@ "atomic-sleep": ["atomic-sleep@1.0.0", "", {}, "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ=="], - "axios": ["axios@1.13.6", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ=="], + "axios": ["axios@1.15.1", "", { "dependencies": { "follow-redirects": "^1.15.11", "form-data": "^4.0.5", "proxy-from-env": "^2.1.0" } }, "sha512-WOG+Jj8ZOvR0a3rAn+Tuf1UQJRxw5venr6DgdbJzngJE3qG7X0kL83CZGpdHMxEm+ZK3seAbvFsw4FfOfP9vxg=="], "axobject-query": ["axobject-query@4.1.0", "", {}, "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ=="], @@ -2033,7 +2033,7 @@ "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], - "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="], + "proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="], "pump": ["pump@3.0.4", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA=="], diff --git a/package.json b/package.json index d20a7583dd..2fceb51a72 100644 --- a/package.json +++ b/package.json @@ -48,7 +48,8 @@ "bun": "^1.3.0" }, "overrides": { - "test-exclude": "^7.0.1" + "test-exclude": "^7.0.1", + "axios": "^1.15.0" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.74" From 9ce26d991535ae5187f76583b6167d0e704db28b Mon Sep 17 00:00:00 2001 From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:15:24 +0300 Subject: [PATCH 10/14] fix(cli): surface stale-workspace registration error instead of fake "not a git repo" (#1332) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(cli): surface stale-workspace registration error instead of fake "not a git repo" When workflowRunCommand auto-registers an unregistered repo, a stale ~/.archon/workspaces///source symlink (pointing to an old checkout) causes createProjectSourceSymlink() in @archon/paths to throw: Source symlink at already points to , expected The CLI caught that in a try/catch, logged it at warn level, continued with `codebase = null`, and then the isolation / resume branches hit their "codebase missing" fallback and threw the generic: Cannot create worktree: not in a git repository. That message is false — the repo is valid; the Archon workspace entry is stale. It sends users down the wrong diagnostic path (checking git config, permissions, etc.) instead of pointing at the workspace dir. Fix: preserve the registration error on a new `codebaseRegistrationError` local, and at both fallback sites (resume + worktree-creation) check it before the generic "not a git repo" branch. When set, throw a truthful: Cannot {create worktree,resume}: repository registration failed. Error: Hint: Remove the stale workspace entry at and retry, or use --no-worktree to skip isolation. The hint's exact path comes from a small parser that extracts the workspace directory from the known "Source symlink at …" format; when the message shape doesn't match (future error text changes), the parser returns null and we fall back to a generic "check registration under /workspaces" hint — safe degradation. Regression test in workflow.test.ts asserts the new error message and negatively asserts the old "not in a git repository" string is gone. Supersedes #1157 — that PR was draft + CONFLICTING against current dev, and also mentioned Windows test-compat changes that weren't in the diff (pruned scope). This is a fresh re-do focused strictly on #1146. Closes #1146. Co-authored-by: Bortlesboat * review: add resume-path test, null-fallback test, update troubleshooting docs Addresses multi-agent review feedback on this PR: - Add regression test for the --resume fallback site (the worktree-create site was already covered; the resume site had identical wiring but zero test coverage). - Add test for the unrecognized-error-shape branch of buildRegistrationFailureError so the generic workspace hint is pinned (prevents accidental inversion of the stale-entry vs generic-hint ternary). - Update the troubleshooting page to key on the new "Cannot create worktree: repository registration failed." message. Users hitting the new error won't find the page under the old heading, and the "In the future..." note is obsolete now that the error itself contains the cleanup path. - Trim both new docblocks: keep the load-bearing cross-package error string contract in extractStaleWorkspaceEntry, drop narration of what the code already shows. Drop the "Before this helper existed..." paragraph from buildRegistrationFailureError — that's CHANGELOG material. Drop PR-reference suffix from the test section divider. * review: guard getArchonHome in hint + export parser for direct tests Two follow-up fixes to the multi-agent review commit (f32f002f): CodeRabbit finding — unguarded getArchonHome() in the fallback hint. If getArchonHome() ever throws (misconfigured env vars, permission issues on the resolution path), the registration-failure Error would never get constructed: we'd throw a secondary home-resolution error that masks the root cause. Wrap the fallback branch in try/catch — prefer losing the exact path in the hint over replacing the actionable registration error. A safe generic hint ("Check your Archon workspace registration and retry") takes over when getArchonHome() throws. The original error.message is always embedded verbatim in the re-thrown Error. S2 — export extractStaleWorkspaceEntry for direct table tests. The parser is where the cross-package string contract with @archon/paths actually lives; direct tests against it are cheaper than end-to-end CLI tests and pin the edge cases: - POSIX path with forward slashes (typical unix user) - Windows path with backslashes (verifies Math.max(lastIndexOf / , lastIndexOf \)) - Unrelated error message (no prefix) → null - Prefix matches but delimiter missing → null - Source path without any separator → null (guards against returning empty string, which would produce a nonsense "Remove the stale workspace entry at " hint) - Empty string → null Six new cases in the test file. The claim of Windows support in the PR description is now actually verified. * fix(test): make generic-hint assertion path-separator agnostic Windows test runner (CI) hit: Expected to contain: "Check your Archon workspace registration under /home/test/.archon/workspaces" Received: "... under \home\test\.archon\workspaces and retry, ..." path.join normalizes to `\` on Windows and `/` on POSIX. The test hardcoded forward slashes in the expected substring. Split into two separator-agnostic asserts: the prefix up to "under", then `/workspaces\b/` regex for the final path segment. Behavior doesn't change — the hint still gets the full path.join'd workspaces dir on either platform. --------- Co-authored-by: Bortlesboat (cherry picked from commit 056707d033e5276acc65ac773d614abc73ac582b) --- CHANGELOG.md | 1 + packages/cli/src/commands/workflow.test.ts | 156 ++++++++++++++++++ packages/cli/src/commands/workflow.ts | 60 +++++++ .../content/docs/getting-started/overview.md | 12 +- 4 files changed, 224 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ff712f205..9663bd5431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053. +- **Stale workspace symlink no longer reported as "not in a git repository" by the CLI.** When `archon workflow run` (or `--resume`) is invoked from a valid git repo whose `~/.archon/workspaces///source` symlink points somewhere else (common after moving/renaming the checkout), auto-registration fails but the repo is fine. Previously both the worktree-creation and resume paths fell through to the generic `Cannot create worktree: not in a git repository` / `Cannot resume: Not in a git repository` errors — a lie that sent users down the wrong diagnostic path. Both sites now preserve the registration error and throw `Cannot {create worktree,resume}: repository registration failed.` with the original cause and a concrete cleanup hint (`Remove the stale workspace entry at and retry`) when the failure matches the `createProjectSourceSymlink()` shape. Credits @Bortlesboat for identifying the root cause and the parser approach in #1157. Closes #1146. - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon ` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216. ### Changed diff --git a/packages/cli/src/commands/workflow.test.ts b/packages/cli/src/commands/workflow.test.ts index d7a4030684..c6e08e8cd2 100644 --- a/packages/cli/src/commands/workflow.test.ts +++ b/packages/cli/src/commands/workflow.test.ts @@ -867,6 +867,114 @@ describe('workflowRunCommand', () => { expect(createCallsAfter).toBe(createCallsBefore); }); + // ------------------------------------------------------------------------- + // Stale workspace source-symlink → truthful CLI error + // ------------------------------------------------------------------------- + + it('surfaces auto-registration failures instead of claiming the repo is invalid', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error( + 'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' + + '/home/test/.archon/workspaces/widget, expected /test/path' + ) + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch( + err => err as Error + ); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot create worktree: repository registration failed.'); + expect(error.message).toContain( + 'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry' + ); + expect(error.message).not.toContain('not in a git repository'); + }); + + it('surfaces auto-registration failures on --resume instead of claiming the repo is invalid', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error( + 'Source symlink at /home/test/.archon/workspaces/acme/widget/source already points to ' + + '/home/test/.archon/workspaces/widget, expected /test/path' + ) + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', { + resume: true, + }).catch(err => err as Error); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot resume: repository registration failed.'); + expect(error.message).toContain( + 'Remove the stale workspace entry at /home/test/.archon/workspaces/acme/widget and retry' + ); + expect(error.message).not.toContain('Not in a git repository'); + }); + + it('falls back to generic workspace hint when registration error has an unrecognized shape', async () => { + const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); + const { registerRepository } = await import('@archon/core'); + const conversationDb = await import('@archon/core/db/conversations'); + const codebaseDb = await import('@archon/core/db/codebases'); + const gitModule = await import('@archon/git'); + + (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ + workflows: [makeTestWorkflowWithSource({ name: 'assist', description: 'Help' })], + errors: [], + }); + (conversationDb.getOrCreateConversation as ReturnType).mockResolvedValueOnce({ + id: 'conv-123', + }); + (codebaseDb.findCodebaseByDefaultCwd as ReturnType).mockResolvedValueOnce(null); + (gitModule.findRepoRoot as ReturnType).mockResolvedValueOnce('/test/path'); + (registerRepository as ReturnType).mockRejectedValueOnce( + new Error("EACCES: permission denied, mkdir '/home/test/.archon/workspaces/acme'") + ); + + const error = await workflowRunCommand('/test/path', 'assist', 'hello', {}).catch( + err => err as Error + ); + + expect(error).toBeInstanceOf(Error); + expect(error.message).toContain('Cannot create worktree: repository registration failed.'); + expect(error.message).toContain('EACCES: permission denied'); + // Path-separator-agnostic check: on Windows path.join normalizes to `\`, + // on POSIX to `/`. Assert the hint prefix + the final segment separately. + expect(error.message).toContain('Check your Archon workspace registration under'); + expect(error.message).toMatch(/workspaces\b/); + expect(error.message).not.toContain('Remove the stale workspace entry'); + }); + it('throws when isolation cannot be created due to missing codebase', async () => { const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); const conversationDb = await import('@archon/core/db/conversations'); @@ -2272,3 +2380,51 @@ describe('workflowRunCommand — progress rendering', () => { expect(stderrSpy).toHaveBeenCalledWith('[slow] Completed (1m30s)\n'); }); }); + +// --------------------------------------------------------------------------- +// extractStaleWorkspaceEntry — parser edge cases +// --------------------------------------------------------------------------- + +describe('extractStaleWorkspaceEntry', () => { + it('extracts the workspace dir from a POSIX source-symlink error', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry( + 'Source symlink at /home/user/.archon/workspaces/acme/widget/source already points to /other, expected /here' + ) + ).toBe('/home/user/.archon/workspaces/acme/widget'); + }); + + it('extracts the workspace dir from a Windows source-symlink error (backslash sep)', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry( + 'Source symlink at C:\\Users\\me\\.archon\\workspaces\\acme\\widget\\source already points to D:\\x, expected D:\\y' + ) + ).toBe('C:\\Users\\me\\.archon\\workspaces\\acme\\widget'); + }); + + it('returns null when the prefix does not match (unrelated error)', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect(extractStaleWorkspaceEntry('ENOENT: no such file or directory')).toBeNull(); + }); + + it('returns null when the prefix matches but the delimiter is missing', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry('Source symlink at /some/path (truncated message)') + ).toBeNull(); + }); + + it('returns null when the source path has no path separator at all', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect( + extractStaleWorkspaceEntry('Source symlink at bareword already points to /x, expected /y') + ).toBeNull(); + }); + + it('returns null on an empty input', async () => { + const { extractStaleWorkspaceEntry } = await import('./workflow'); + expect(extractStaleWorkspaceEntry('')).toBeNull(); + }); +}); diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts index 4c28edcb65..4eb90e8731 100644 --- a/packages/cli/src/commands/workflow.ts +++ b/packages/cli/src/commands/workflow.ts @@ -11,6 +11,7 @@ import { import { WORKFLOW_EVENT_TYPES, type WorkflowEventType } from '@archon/workflows/store'; import { configureIsolation, getIsolationProvider } from '@archon/isolation'; import { createLogger, getArchonHome } from '@archon/paths'; +import { join } from 'node:path'; import { createWorkflowDeps } from '@archon/core/workflows/store-adapter'; import { discoverWorkflowsWithConfig } from '@archon/workflows/workflow-discovery'; import { resolveWorkflowName } from '@archon/workflows/router'; @@ -77,6 +78,57 @@ function generateConversationId(): string { return `cli-${String(timestamp)}-${random}`; } +/** + * Parses the "Source symlink at X already points to Y, expected Z" error + * thrown by `createProjectSourceSymlink` in @archon/paths. Cross-package + * string contract — if that throw site changes wording, this parser silently + * stops matching. Returns the workspace dir (parent of the `source` link) so + * the caller can emit an exact cleanup path, or null if unrecognized. + */ +export function extractStaleWorkspaceEntry(message: string): string | null { + const prefix = 'Source symlink at '; + const delimiter = ' already points to '; + if (!message.startsWith(prefix)) return null; + + const remainder = message.slice(prefix.length); + const delimiterIndex = remainder.indexOf(delimiter); + if (delimiterIndex === -1) return null; + + const sourcePath = remainder.slice(0, delimiterIndex).trim(); + const lastSeparator = Math.max(sourcePath.lastIndexOf('/'), sourcePath.lastIndexOf('\\')); + return lastSeparator === -1 ? null : sourcePath.slice(0, lastSeparator); +} + +/** + * Wraps a codebase auto-registration failure for either the worktree-create or + * resume path. Preserves the original error message and delegates hint detail + * to `extractStaleWorkspaceEntry`; falls back to a workspace-root pointer when + * the error shape is unrecognized. + */ +function buildRegistrationFailureError(action: string, error: Error): Error { + const staleWorkspaceEntry = extractStaleWorkspaceEntry(error.message); + let hint: string; + if (staleWorkspaceEntry) { + hint = `Hint: Remove the stale workspace entry at ${staleWorkspaceEntry} and retry, or use --no-worktree to skip isolation.`; + } else { + // Guard against a throwing getArchonHome() (misconfigured env vars, etc.): + // the registration error we're wrapping is the load-bearing one — we'd + // rather lose the exact path in the hint than replace it with a secondary + // home-resolution error that masks the root cause. + try { + const workspacesPath = join(getArchonHome(), 'workspaces'); + hint = `Hint: Check your Archon workspace registration under ${workspacesPath} and retry, or use --no-worktree to skip isolation.`; + } catch { + hint = + 'Hint: Check your Archon workspace registration and retry, or use --no-worktree to skip isolation.'; + } + } + + return new Error( + `Cannot ${action}: repository registration failed.\nError: ${error.message}\n${hint}` + ); +} + /** Render a workflow event to stderr as a progress line. Called only when --quiet is not set. */ function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): void { switch (event.type) { @@ -285,6 +337,7 @@ export async function workflowRunCommand( // Try to find a codebase for this directory let codebase = null; let codebaseLookupError: Error | null = null; + let codebaseRegistrationError: Error | null = null; try { codebase = await codebaseDb.findCodebaseByDefaultCwd(cwd); } catch (error) { @@ -330,6 +383,7 @@ export async function workflowRunCommand( } } catch (error) { const err = error as Error; + codebaseRegistrationError = err; getLog().warn( { err, errorType: err.constructor.name, repoRoot }, 'cli.codebase_auto_registration_failed' @@ -354,6 +408,9 @@ export async function workflowRunCommand( 'Hint: Check your database connection before using --resume.' ); } + if (codebaseRegistrationError) { + throw buildRegistrationFailureError('resume', codebaseRegistrationError); + } throw new Error( 'Cannot resume: Not in a git repository.\n' + 'Either run from a git repo or use /clone first.' @@ -507,6 +564,9 @@ export async function workflowRunCommand( 'Hint: Check your database connection, or use --no-worktree to skip isolation.' ); } + if (codebaseRegistrationError) { + throw buildRegistrationFailureError('create worktree', codebaseRegistrationError); + } throw new Error( 'Cannot create worktree: not in a git repository.\n' + 'Run from within a git repo, or use --no-worktree to skip isolation.' diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md index cee57df09d..ca3690937d 100644 --- a/packages/docs-web/src/content/docs/getting-started/overview.md +++ b/packages/docs-web/src/content/docs/getting-started/overview.md @@ -482,17 +482,19 @@ The CLI is standalone, but if you also want to interact via Telegram, Slack, Dis ## Troubleshooting -### "Cannot create worktree: not in a git repository" (but the repo exists) +### "Cannot create worktree: repository registration failed" (stale workspace symlink) -The real cause is usually a stale symlink from a previous Archon run with a different path. Look for this in the error output: +This happens when `~/.archon/workspaces///source` is a symlink pointing at a previous checkout (common after moving or renaming the repo). The error message includes the exact cleanup path to follow: ``` -Source symlink at ~/.archon/workspaces/.../source already points to , expected +Cannot create worktree: repository registration failed. +Error: Source symlink at ~/.archon/workspaces///source already points to , expected +Hint: Remove the stale workspace entry at ~/.archon/workspaces// and retry, or use --no-worktree to skip isolation. ``` -Fix it by manually deleting the stale workspace folder at `~/.archon/workspaces//` and retrying the command. +Follow the hint — delete the stale workspace folder and re-run, or pass `--no-worktree` to skip isolation for one run. -> In the future, `archon isolation cleanup` will handle this automatically. +> On Archon versions before this fix, the same root cause surfaced as the misleading "Cannot create worktree: not in a git repository" (even though the repo was valid). If you see that string, upgrade and you'll get the actionable message above. --- From f7a043db9ba0813bd8b04bea238599fa64d72a53 Mon Sep 17 00:00:00 2001 From: Rasmus Widing <152263317+Wirasm@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:15:41 +0300 Subject: [PATCH 11/14] fix(server,web,workflows): web approval gates auto-resume + reject-with-reason dialog (#1329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(server,web,workflows): web approval gates auto-resume + reject-with-reason dialog Fixes three tightly-coupled bugs that made web approval gates unusable: 1. orchestrator-agent did not pass parentConversationId to executeWorkflow for any web-dispatched foreground / interactive / resumable run. Without that field, findResumableRunByParentConversation (the machinery the CLI relies on for resume) couldn't find the paused run from the same conversation on a follow-up message, and the approve/reject API handlers had no conversation to dispatch back to. 2. POST /api/workflows/runs/:runId/{approve,reject} recorded the decision and returned "Send a message to continue the workflow." — the workflow never actually resumed. Added tryAutoResumeAfterGate() that mirrors what workflowApproveCommand / workflowRejectCommand already do on the CLI: look up the parent conversation, dispatch `/workflow run ` back through dispatchToOrchestrator. Failures are non-fatal — the user can still send a manual message as a fallback. 3. The during-streaming cancel-check in dag-executor aborted any streaming node whenever the run status left 'running', including the legitimate transition to 'paused' that an approval node performs. A concurrent AI node in the same DAG layer now tolerates 'paused' and finishes its own stream; only truly terminal / unknown states (null, cancelled, failed, completed) abort the in-flight stream. Web UI: ConfirmRunActionDialog gains an optional reasonInput prop (label + placeholder) that renders a textarea and passes the trimmed value to onConfirm. WorkflowRunCard (dashboard) and WorkflowProgressCard (chat) both use it for Reject now — the chat card was still on window.confirm, which was both inconsistent with the dashboard and couldn't collect a reason. The trimmed reason threads through to $REJECTION_REASON in the workflow's on_reject prompt. Supersedes #1147. @jonasvanderhaegen surfaced the root cause and shape of the fix; that PR was 87 commits stale and pre-dated the reject-UX upgrade (#1261 area), so this is a fresh re-do on current dev. Tests: - packages/server/src/routes/api.workflow-runs.test.ts — 5 new cases: approve with parent dispatches; approve without parent returns "Send a message"; approve with deleted parent conversation skips safely; reject dispatches on-reject flows; reject that cancels (no on_reject) does NOT dispatch. - packages/core/src/orchestrator/orchestrator.test.ts — updated the two synthesizedPrompt-dispatch tests for the new executeWorkflow arity. Closes #1131. Co-authored-by: Jonas Vanderhaegen <7755555+jonasvanderhaegen@users.noreply.github.com> * fix: address multi-agent review findings for web approval auto-resume C1 (critical) — cross-adapter misrouting guard tryAutoResumeAfterGate now checks parentConv.platform_type === 'web' before dispatching. Non-web parents (Slack/Telegram/GitHub/Discord) being approved from the dashboard skip auto-resume rather than dispatching a Slack thread_ts or Telegram chat_id through the web adapter's lock manager. C2 (critical) — fire-and-forget dispatch replaced with await void dispatchToOrchestrator() meant the "Resuming workflow." response fired before async work completed, and the outer try/catch couldn't observe dispatch failures. Changed to await; response now accurately reflects dispatch outcome. I1 — replaced logPrefix string-template (which produced 3-segment api.workflow_*.dispatched event names violating {domain}.{action}_{state}) with literal event names per action, branched inside the helper. Accepts action: 'approve' | 'reject' instead. I2 — corrected misleading "foreground/interactive" qualifier in the approve-endpoint comment; background web dispatches also set parent_conversation_id via the pre-created run, so they auto-resume too. I3 — extracted shouldContinueStreamingForStatus() as a small exported policy and added 7 unit tests covering running/paused/null/cancelled/ failed/completed/unknown. Full-integration coverage of the paused- tolerance invariant would require manipulating the 10s CANCEL_CHECK_INTERVAL_MS, which is flaky-prone; unit test of the policy function captures the same invariant deterministically. I4 — updated approval-nodes.md and authoring-workflows.md to reflect that Web UI approve/reject now auto-resumes (no "send a follow-up message" copy), documented the reject-with-reason dialog and $REJECTION_REASON flow, and called out the cross-platform caveat. S1 — rewrote streaming status check as positive shouldContinue safe-list via the extracted policy function, matching the inline comment. S2 — inlined handleReject on the dashboard rather than squeezing rejectWorkflowRun through runAction with a closure; keeps runAction narrow for the single-arg lifecycle actions. S5 — new regression test covering the non-web-parent skip path (slack-platform parent → dispatch skipped → response falls back to "Send a message to continue"). S6 — removed stale reference to runAction in ConfirmRunActionDialog's onConfirm JSDoc (no longer accurate now that WorkflowProgressCard calls the dialog without runAction). S7 — fixed misleading "user can resume manually by sending any message" docstring (resume is triggered by re-running the workflow command, not by an arbitrary message). Skipped as out-of-scope: S3 — cancelWorkflowRun rowCount check (pre-existing defect; separate PR) S4 — tightening expect.anything() to UUID regex (deferred) S8 — 12-positional-arg executeWorkflow → options-bag refactor (tracked follow-up) bun run validate green locally; 68 tests in api.workflow-runs.test.ts (up from 67), 173 in dag-executor.test.ts (up from 166). * review: close I1/I2/I3/I4/I6 — paused tolerance in loop + emitter, resume test, useId I1 (loop inter-iteration check) — dag-executor.ts:1715 Used `!== 'running'` in the loop node's between-iteration status check. A sibling approval node pausing the run in the same topological layer would abort the loop mid-iteration with "Loop node '' stopped at iteration N (paused)". Switched to the shared shouldContinueStreamingForStatus helper so paused is tolerated — same semantics the streaming check got. Extended inline comment explains the sibling-layer concurrency reason. I2 (skipIfStatusChanged emitter unregister) — dag-executor.ts:2886 At DAG-finalization writes the helper correctly skipped writing on any non-running state (paused included — don't mark a paused run complete), but it *also* called getWorkflowEventEmitter().unregisterRun() which broke SSE observability for a run that's still live (waiting for user approval). Split the two responsibilities: skip the write for all non-running states, but only unregister the emitter for terminal states (cancelled / deleted / completed / failed). `paused` keeps the emitter registered so resume stays visible on the dashboard. I3 (foreground_resume_detected branch untested) — orchestrator-agent.test.ts That branch was modified as part of the original fix (added parentConversationId as 11th positional arg) but no existing test configured mockFindResumableRunByParentConversation to return non-null. A positional mistake (e.g. accidentally swapping issueContext and parentConversationId) would silently break auto-resume with no failing test. New regression test configures the mock, asserts both the cwd comes from the resumable run's working_path AND parentConversationId is passed correctly at position 10. I4 (null-parent log level) — api.ts tryAutoResumeAfterGate `getConversationById` returning null is a data-integrity signal (the parent conversation was deleted while the run was paused) — worth surfacing at info level so operators notice, not hiding at debug. Missing platform_conversation_id on an existing row would be an unusual DB state and stays at debug. Added `parentDeleted: boolean` to the log context so the two cases are distinguishable in observability. I6 (hardcoded DOM id) — ConfirmRunActionDialog.tsx `id="confirm-run-action-reason"` collided when multiple dialog instances share the same page (Radix portals mitigate in practice but the code was fragile). Switched to React.useId() so each instance gets a unique id — htmlFor/id wiring preserved. S11 (arity-only assertion) — orchestrator-agent.test.ts:1092 area The interactive-workflow-on-web test asserted mockExecuteWorkflow was called, but nothing about the args. Added a specific assertion that position 10 (parentConversationId) equals 'conv-1' (the caller conversation id) — pins the wiring that I1/I2 depend on being correct. Deferred (from review S1-S10, I5, I7): - S1 (ExecuteWorkflowOptions bag) — tracked as standalone follow-up; 12 positional args with 2 adjacent optionals is a real maintenance hazard but the refactor deserves its own PR. - S7 (WHY comment on non-web else branch) — review text says the branch "correctly omits" parentConversationId but the code passes it; the combination with the web-parent guard in tryAutoResumeAfterGate is intentional. Not adding a justify-what-we-don't-do comment. - S2/S3/S4/S5/S8/S9/S10 — pure polish (event-map ternary, platformConvId inlining, shared constant for REJECTION_REASON_INPUT, onChange arrow shorthand, discriminated union, docblock trim, suffix comment drop) - I5 (soften "Resuming workflow." to "— check the dashboard for progress") — users clicking from the dashboard are already on the dashboard; the current text is accurate (enqueue completed) and concise. - I7 (test dispatch-throws path) — covered implicitly by the try/catch branch of tryAutoResumeAfterGate returning false; a direct test would require mocking handleMessage to throw and would couple to dispatchToOrchestrator internals. bun run validate green; 189 dag-executor tests, 98 orchestrator-agent tests, 68 api.workflow-runs tests — all the new cases pass. --------- Co-authored-by: Jonas Vanderhaegen <7755555+jonasvanderhaegen@users.noreply.github.com> (cherry picked from commit d5c1cd960546ea934a4bc9dcf5988e7ed75c3310) --- CHANGELOG.md | 1 + .../orchestrator/orchestrator-agent.test.ts | 36 ++++ .../src/orchestrator/orchestrator-agent.ts | 15 +- .../src/orchestrator/orchestrator.test.ts | 10 +- .../src/content/docs/guides/approval-nodes.md | 20 +- .../docs/guides/authoring-workflows.md | 8 +- packages/server/src/routes/api.ts | 115 ++++++++++- .../src/routes/api.workflow-runs.test.ts | 186 +++++++++++++++++- .../components/chat/WorkflowProgressCard.tsx | 40 ++-- .../dashboard/ConfirmRunActionDialog.tsx | 66 ++++++- .../components/dashboard/WorkflowRunCard.tsx | 15 +- .../components/dashboard/WorkflowRunGroup.tsx | 2 +- packages/web/src/routes/DashboardPage.tsx | 15 +- packages/workflows/src/dag-executor.test.ts | 46 +++++ packages/workflows/src/dag-executor.ts | 58 +++++- 15 files changed, 579 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9663bd5431..6d541d13da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Bumped transitive `axios` to `^1.15.0` via root `overrides` to clear CVE-2025-62718** (NO_PROXY bypass via hostname normalization → potential SSRF). Archon pulls `axios` transitively through `@slack/bolt` and `@slack/web-api`; both semver ranges (`^1.12.0` and `^1.13.5`) accept the override cleanly, so no API surface changes. Credits @stefans71 for identifying and reporting the vulnerability in #1153. Closes #1053. - **Stale workspace symlink no longer reported as "not in a git repository" by the CLI.** When `archon workflow run` (or `--resume`) is invoked from a valid git repo whose `~/.archon/workspaces///source` symlink points somewhere else (common after moving/renaming the checkout), auto-registration fails but the repo is fine. Previously both the worktree-creation and resume paths fell through to the generic `Cannot create worktree: not in a git repository` / `Cannot resume: Not in a git repository` errors — a lie that sent users down the wrong diagnostic path. Both sites now preserve the registration error and throw `Cannot {create worktree,resume}: repository registration failed.` with the original cause and a concrete cleanup hint (`Remove the stale workspace entry at and retry`) when the failure matches the `createProjectSourceSymlink()` shape. Credits @Bortlesboat for identifying the root cause and the parser approach in #1157. Closes #1146. - **Server startup no longer marks actively-running workflows as failed.** The `failOrphanedRuns()` call has been removed from `packages/server/src/index.ts` to match the CLI precedent (`packages/cli/src/cli.ts:256-258`). Per the new CLAUDE.md principle "No Autonomous Lifecycle Mutation Across Process Boundaries", a stuck `running` row is now transitioned explicitly by the user: via the per-row Cancel/Abandon buttons on the dashboard workflow card, or `archon workflow abandon ` from the CLI. (`archon workflow cleanup` is a separate command that deletes OLD terminal runs for disk hygiene — it does not handle stuck `running` rows.) Closes #1216. +- **Web UI approval gates now auto-resume.** Previously, clicking Approve or Reject on a paused workflow from the Web UI only recorded the decision — the workflow never continued, and the user had to send a follow-up chat message (or use the CLI) to resume. Three fixes: (1) orchestrator-agent now threads `parentConversationId` through `executeWorkflow` for every web dispatch, (2) the `POST /approve` and `POST /reject` API handlers dispatch `/workflow run ` back through the orchestrator when `parent_conversation_id` is set and points at a web-platform parent (mirrors `workflowApproveCommand`/`workflowRejectCommand` on the CLI; non-web parents skip the auto-resume to prevent cross-adapter misrouting), and (3) the during-streaming status check in the DAG executor tolerates the `paused` state so a concurrent AI node in the same topological layer finishes its own stream rather than being aborted when a sibling approval node pauses the run. The Web UI reject button uses the proper `ConfirmRunActionDialog` with an optional reason textarea (was `window.confirm` in the chat card, and lacked a reason input on the dashboard) — the trimmed reason propagates to `$REJECTION_REASON` in the workflow's `on_reject` prompt. Credits @jonasvanderhaegen for surfacing and diagnosing the bug in #1147 (that PR was 87 commits stale on a dev that had since refactored the reject UX; this is a fresh re-do on current `dev`). Closes #1131. ### Changed diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index ab8165ca7e..3a4a1299c9 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -1099,6 +1099,42 @@ describe('workflow dispatch routing — interactive flag', () => { expect(mockExecuteWorkflow).toHaveBeenCalled(); expect(mockDispatchBackgroundWorkflow).not.toHaveBeenCalled(); + // Regression for the auto-resume plumbing: the interactive web dispatch + // must pass the caller conversation's DB id as parentConversationId + // (11th positional arg) so the approve/reject API handlers can dispatch + // resume back through the orchestrator. + const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[]; + expect(callArgs[10]).toBe('conv-1'); // parentConversationId = conversation.id + }); + + test('foreground_resume_detected: passes parentConversationId to executeWorkflow when a resumable run exists', async () => { + // Regression for the foreground-resume branch added as part of the + // auto-resume fix: when `findResumableRunByParentConversation` returns a + // paused run, the orchestrator picks the working_path from that run and + // must still carry parentConversationId forward so the API helpers can + // keep dispatching resume on subsequent approvals. + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(makeDispatchConversation())); + mockGetCodebase.mockReturnValueOnce(Promise.resolve(makeDispatchCodebase())); + mockHandleCommand.mockReturnValueOnce(Promise.resolve(makeWorkflowResult(true))); + mockFindResumableRunByParentConversation.mockReturnValueOnce( + Promise.resolve({ + id: 'resumable-run-1', + workflow_name: 'test-workflow', + working_path: '/repos/test-repo/worktrees/feature', + parent_conversation_id: 'conv-1', + status: 'failed', + }) + ); + + const platform = makePlatform(); // getPlatformType returns 'web' + await handleMessage(platform, 'conv-1', '/workflow run test-workflow'); + + expect(mockExecuteWorkflow).toHaveBeenCalled(); + const callArgs = mockExecuteWorkflow.mock.calls[0] as unknown[]; + // cwd (position 3) should come from the resumable run's working_path + expect(callArgs[3]).toBe('/repos/test-repo/worktrees/feature'); + // parentConversationId (position 10) should still be the caller conversation id + expect(callArgs[10]).toBe('conv-1'); }); test('calls dispatchBackgroundWorkflow for non-interactive workflow on web', async () => { diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index d5eb9397b3..292f0e0ad8 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -281,7 +281,10 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } else if (workflow.interactive) { // Interactive workflows run in foreground so output stays in the user's conversation @@ -293,7 +296,10 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } else { await dispatchBackgroundWorkflow( @@ -319,7 +325,10 @@ async function dispatchOrchestratorWorkflow( workflow, userMessage, conversation.id, - codebase.id + codebase.id, + undefined, // issueContext + undefined, // isolationContext + conversation.id // parentConversationId — enables approve/reject auto-resume ); } } diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index f8f199a5de..bd0caf3bf8 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -1078,7 +1078,10 @@ describe('orchestrator-agent handleMessage', () => { expect.anything(), // workflow synthesized, // synthesizedPrompt, not original message expect.anything(), // conversation.id - expect.anything() // codebase.id + expect.anything(), // codebase.id + undefined, // issueContext + undefined, // isolationContext + expect.anything() // parentConversationId — web approval auto-resume ); }); @@ -1103,7 +1106,10 @@ describe('orchestrator-agent handleMessage', () => { expect.anything(), 'fix the login bug', // original message used as fallback expect.anything(), - expect.anything() + expect.anything(), + undefined, // issueContext + undefined, // isolationContext + expect.anything() // parentConversationId — web approval auto-resume ); }); diff --git a/packages/docs-web/src/content/docs/guides/approval-nodes.md b/packages/docs-web/src/content/docs/guides/approval-nodes.md index 42ebc48fec..c48f8c4856 100644 --- a/packages/docs-web/src/content/docs/guides/approval-nodes.md +++ b/packages/docs-web/src/content/docs/guides/approval-nodes.md @@ -55,9 +55,9 @@ to the user on whatever platform they're using (CLI, Slack, GitHub, etc.). On th block the worktree path guard (no other workflow can start on the same path). 4. **Approve**: The user approves, which writes a `node_completed` event for the approval node and transitions the run to resumable. Natural-language - messages (recommended) and the CLI auto-resume immediately. The explicit - `/workflow approve` command records the approval; send a follow-up message - to resume. + messages, the CLI, and the Web UI approve button all auto-resume the + workflow from the paused gate. (The explicit `/workflow approve ` + slash command also auto-resumes when issued in the originating conversation.) 5. **Reject**: The user rejects. - **Without `on_reject`**: The workflow is cancelled immediately. - **With `on_reject`**: The executor runs the `on_reject.prompt` via AI (with @@ -140,7 +140,19 @@ bun run cli workflow reject --reason "Plan needs more test coverage" ### Web UI Paused workflows show an amber pulsing badge on the dashboard. Click **Approve** -or **Reject** directly on the workflow card. +or **Reject** directly on the workflow card. Both actions auto-resume the +workflow from the paused gate — no follow-up message required. + +**Reject with reason**: the Reject dialog includes an optional free-text +reason field. The trimmed value (empty after trim → omitted) is passed to +the workflow as `$REJECTION_REASON`, available in the `on_reject.prompt`. +Rejects on web and chat cards use the same confirmation dialog. + +**Cross-platform caveat**: auto-resume via the Web UI only applies when the +run was originally dispatched from the Web UI (parent conversation is a web +conversation). If you approve a Slack / Telegram / GitHub-dispatched run +from the dashboard, the decision is recorded, but the resume flow has to +happen in the originating platform (re-run the workflow there). ### REST API diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index c4fdfc7830..4fcb6d5238 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -977,12 +977,12 @@ nodes: When the workflow reaches `review-gate`, it pauses and notifies you. Approve or reject via: - **Natural language** (recommended): Just type your response in the conversation — the system detects the paused workflow and auto-resumes -- **CLI**: `bun run cli workflow approve ` or `bun run cli workflow reject ` -- **Explicit command**: `/workflow approve ` or `/workflow reject ` (records approval; send a follow-up message to resume) -- **Web UI**: Click the Approve/Reject buttons on the dashboard card +- **CLI**: `bun run cli workflow approve ` or `bun run cli workflow reject ` — auto-resumes +- **Explicit command**: `/workflow approve ` or `/workflow reject ` — auto-resumes when issued in the originating conversation +- **Web UI**: Click the Approve/Reject buttons on the dashboard card — auto-resumes for Web-UI-dispatched runs; the Reject dialog includes an optional reason field that flows to `$REJECTION_REASON` - **API**: `POST /api/workflows/runs//approve` or `/reject` -After approval via natural language or CLI, the workflow auto-resumes from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node. +All four paths auto-resume the workflow from the next node. The user's approval comment is available as `$review-gate.output` in downstream nodes only when `capture_response: true` is set on the approval node. Cross-platform caveat: Web-UI approvals on Slack / Telegram / GitHub-dispatched runs record the decision but do not auto-resume — re-run from the originating platform to continue. Without `on_reject`: rejecting cancels the workflow. With `on_reject`: rejecting triggers an AI rework prompt and re-pauses for re-review. diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index 6448c77318..4832e06b61 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -51,7 +51,7 @@ import { RESUMABLE_WORKFLOW_STATUSES, TERMINAL_WORKFLOW_STATUSES, } from '@archon/workflows/schemas/workflow-run'; -import type { ApprovalContext } from '@archon/workflows/schemas/workflow-run'; +import type { ApprovalContext, WorkflowRun } from '@archon/workflows/schemas/workflow-run'; import { findMarkdownFilesRecursive } from '@archon/core/utils/commands'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -1051,6 +1051,95 @@ export function registerApiRoutes( return { accepted: true, status: result.status }; } + /** + * Re-enter the orchestrator after a paused approval gate is resolved, so a + * web-dispatched workflow continues (approve) or runs its on_reject prompt + * (reject) without the user having to re-run the workflow command. The CLI's + * `workflowApproveCommand` / `workflowRejectCommand` already auto-resume via + * `workflowRunCommand({ resume: true })`; this is the web-side equivalent. + * + * Returns `true` when a resume dispatch was initiated, `false` otherwise (no + * parent conversation on the run, parent conversation deleted, parent was on + * a non-web platform, or dispatch threw). Failures are non-fatal: the gate + * decision is recorded regardless; when this returns `false` the response + * text instructs the user to re-run the workflow command. + * + * **Cross-adapter guard**: only web-sourced parents qualify. + * `dispatchToOrchestrator` is wired to the web adapter + its lock manager, + * so a Slack / Telegram / GitHub / Discord run being approved from the + * dashboard must not route through it — the Slack thread would never see + * the resumed output. Non-web parents skip auto-resume and the originating + * platform's own re-run flow applies. + */ + async function tryAutoResumeAfterGate( + run: WorkflowRun, + action: 'approve' | 'reject' + ): Promise { + if (!run.parent_conversation_id) return false; + // Literal event names per action — greppable for ops tooling. Keeping the + // branch explicit rather than templating avoids the earlier 3-segment + // `api.workflow_*.dispatched` shape that broke `{domain}.{action}_{state}`. + const events = + action === 'approve' + ? { + dispatched: 'api.workflow_approve_auto_resume_dispatched' as const, + skippedNoPlatformConv: + 'api.workflow_approve_auto_resume_skipped_no_platform_conv' as const, + skippedNonWebParent: 'api.workflow_approve_auto_resume_skipped_non_web_parent' as const, + failed: 'api.workflow_approve_auto_resume_failed' as const, + } + : { + dispatched: 'api.workflow_reject_auto_resume_dispatched' as const, + skippedNoPlatformConv: + 'api.workflow_reject_auto_resume_skipped_no_platform_conv' as const, + skippedNonWebParent: 'api.workflow_reject_auto_resume_skipped_non_web_parent' as const, + failed: 'api.workflow_reject_auto_resume_failed' as const, + }; + try { + const parentConv = await conversationDb.getConversationById(run.parent_conversation_id); + const platformConvId = parentConv?.platform_conversation_id; + if (!platformConvId) { + // parentConv === null is a data-integrity signal (the parent + // conversation was deleted while the run was paused) — worth + // surfacing at info level so operators notice. Missing + // platform_conversation_id on an existing row shouldn't happen and + // stays at debug. + const logFn = + parentConv === null ? getLog().info.bind(getLog()) : getLog().debug.bind(getLog()); + logFn( + { + runId: run.id, + parentConversationId: run.parent_conversation_id, + parentDeleted: parentConv === null, + }, + events.skippedNoPlatformConv + ); + return false; + } + if (parentConv.platform_type !== 'web') { + getLog().debug( + { + runId: run.id, + parentConversationId: run.parent_conversation_id, + platformType: parentConv.platform_type, + }, + events.skippedNonWebParent + ); + return false; + } + const resumeMessage = `/workflow run ${run.workflow_name} ${run.user_message ?? ''}`.trim(); + await dispatchToOrchestrator(platformConvId, resumeMessage); + getLog().info( + { runId: run.id, workflowName: run.workflow_name, platformConvId }, + events.dispatched + ); + return true; + } catch (err) { + getLog().warn({ err: err as Error, runId: run.id }, events.failed); + return false; + } + } + // GET /api/conversations - List conversations registerOpenApiRoute(getConversationsRoute, async c => { try { @@ -1910,9 +1999,20 @@ export function registerApiRoutes( status: 'failed', metadata: metadataUpdate, }); + + // Auto-resume: dispatch to the orchestrator so the workflow continues + // without requiring the user to re-run the workflow command. Mirrors + // what `workflowApproveCommand` does in the CLI. Requires + // `parent_conversation_id` on the run (set by orchestrator-agent for any + // web-dispatched workflow — foreground, interactive, and background via + // the pre-created run) and a web-platform parent (guarded in the helper). + const autoResumed = await tryAutoResumeAfterGate(run, 'approve'); + return c.json({ success: true, - message: `Workflow approved: ${run.workflow_name}. Send a message to continue the workflow.`, + message: autoResumed + ? `Workflow approved: ${run.workflow_name}. Resuming workflow.` + : `Workflow approved: ${run.workflow_name}. Send a message to continue.`, }); } catch (error) { getLog().error({ err: error, runId }, 'api.workflow_run_approve_failed'); @@ -1956,9 +2056,18 @@ export function registerApiRoutes( status: 'failed', metadata: { rejection_reason: reason, rejection_count: currentCount + 1 }, }); + + // Auto-resume: dispatch to the orchestrator so the on_reject prompt runs + // without requiring the user to re-run the workflow command. Mirrors + // what `workflowRejectCommand` does in the CLI. Same cross-adapter + // guard as approve — only web parents auto-resume. + const autoResumed = await tryAutoResumeAfterGate(run, 'reject'); + return c.json({ success: true, - message: `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`, + message: autoResumed + ? `Workflow rejected: ${run.workflow_name}. Running on-reject prompt.` + : `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`, }); } diff --git a/packages/server/src/routes/api.workflow-runs.test.ts b/packages/server/src/routes/api.workflow-runs.test.ts index 41bee85003..8d837d3623 100644 --- a/packages/server/src/routes/api.workflow-runs.test.ts +++ b/packages/server/src/routes/api.workflow-runs.test.ts @@ -22,7 +22,8 @@ const mockGetWorkflowRunByWorkerPlatformId = mock( ); const mockListWorkflowEvents = mock(async (_runId: string) => [] as MockWorkflowEvent[]); const mockGetConversationById = mock( - async (_id: string) => null as null | { id: string; platform_conversation_id: string } + async (_id: string) => + null as null | { id: string; platform_conversation_id: string; platform_type: string } ); const mockFindConversationByPlatformId = mock( async (_id: string) => @@ -1362,3 +1363,186 @@ describe('POST /api/workflows/runs/:runId/reject', () => { expect(mockUpdateWorkflowRun).not.toHaveBeenCalled(); }); }); + +// --------------------------------------------------------------------------- +// Auto-resume: approve/reject endpoints dispatch to orchestrator when the run +// has parent_conversation_id set (web-dispatched foreground/interactive +// workflows). Mirrors what the CLI does in workflowApproveCommand/RejectCommand. +// --------------------------------------------------------------------------- + +describe('approve/reject auto-resume', () => { + beforeEach(() => { + mockGetWorkflowRun.mockReset(); + mockUpdateWorkflowRun.mockReset(); + mockCreateWorkflowEvent.mockReset(); + mockGetConversationById.mockReset(); + mockHandleMessage.mockReset(); + mockCancelWorkflowRun.mockReset(); + }); + + test('approve: dispatches resume when parent_conversation_id is set', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + id: 'run-auto-resume-approve', + parent_conversation_id: 'parent-conv-uuid', + user_message: 'Deploy feature X', + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'parent-conv-uuid', + platform_conversation_id: 'web-plat-abc', + platform_type: 'web', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-auto-resume-approve/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Resuming workflow'); + + // dispatchToOrchestrator → lockManager → handleMessage + expect(mockHandleMessage).toHaveBeenCalled(); + const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [ + unknown, + string, + string, + ]; + expect(platformConvId).toBe('web-plat-abc'); + expect(dispatchedMessage).toBe('/workflow run deploy Deploy feature X'); + }); + + test('approve: skips dispatch when parent_conversation_id is null (CLI-dispatched run)', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: null, + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + expect(mockGetConversationById).not.toHaveBeenCalled(); + }); + + test('approve: skips dispatch when parent conversation no longer exists', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'deleted-conv-uuid', + }); + mockGetConversationById.mockResolvedValueOnce(null); // conversation deleted + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({}), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('approve: skips dispatch when parent conversation is on a non-web platform', async () => { + // A Slack/Telegram/GitHub-sourced run being approved via the dashboard + // must not route through dispatchToOrchestrator — that helper is wired + // to the web adapter + lock manager, so dispatching a Slack thread_ts + // or Telegram chat_id would misroute through the wrong adapter. + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'slack-parent-conv-uuid', + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'slack-parent-conv-uuid', + platform_conversation_id: '1234567890.123456', // a Slack thread_ts + platform_type: 'slack', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + // Same fallback text as no-parent case — user re-runs from the originating platform. + expect(body.message).toContain('Send a message to continue'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('reject: dispatches resume for on_reject flows when parent is set', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + id: 'run-auto-resume-reject', + parent_conversation_id: 'parent-conv-uuid', + user_message: 'Review PR', + metadata: { + approval: { + type: 'approval', + nodeId: 'review-gate', + message: 'Approve?', + onRejectPrompt: 'Fix: $REJECTION_REASON', + onRejectMaxAttempts: 3, + }, + rejection_count: 0, + }, + }); + mockGetConversationById.mockResolvedValueOnce({ + id: 'parent-conv-uuid', + platform_conversation_id: 'web-plat-xyz', + platform_type: 'web', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-auto-resume-reject/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Running on-reject prompt'); + expect(mockHandleMessage).toHaveBeenCalled(); + const [, platformConvId, dispatchedMessage] = mockHandleMessage.mock.calls[0] as [ + unknown, + string, + string, + ]; + expect(platformConvId).toBe('web-plat-xyz'); + expect(dispatchedMessage).toBe('/workflow run deploy Review PR'); + }); + + test('reject: does NOT dispatch when the run is being cancelled (no on_reject configured)', async () => { + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'parent-conv-uuid', // set, but doesn't matter — reject cancels + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'no' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + // Cancellation path doesn't auto-resume — nothing to resume to. + expect(mockHandleMessage).not.toHaveBeenCalled(); + expect(mockCancelWorkflowRun).toHaveBeenCalledWith('run-paused-1'); + }); +}); diff --git a/packages/web/src/components/chat/WorkflowProgressCard.tsx b/packages/web/src/components/chat/WorkflowProgressCard.tsx index bb65471f3b..44eb70af74 100644 --- a/packages/web/src/components/chat/WorkflowProgressCard.tsx +++ b/packages/web/src/components/chat/WorkflowProgressCard.tsx @@ -5,6 +5,7 @@ import { CheckCircle, ChevronRight, Loader2, Pause, XCircle } from 'lucide-react import { cn } from '@/lib/utils'; import { approveWorkflowRun, getWorkflowRunByWorker, rejectWorkflowRun } from '@/lib/api'; import { useWorkflowStore } from '@/stores/workflow-store'; +import { ConfirmRunActionDialog } from '@/components/dashboard/ConfirmRunActionDialog'; import { StatusIcon } from '@/components/workflows/StatusIcon'; import { formatDurationMs } from '@/lib/format'; import { isTerminalStatus } from '@/lib/workflow-utils'; @@ -87,7 +88,7 @@ export function WorkflowProgressCard({ mutationFn: () => approveWorkflowRun(runId ?? ''), }); const rejectMutation = useMutation({ - mutationFn: () => rejectWorkflowRun(runId ?? ''), + mutationFn: (reason?: string) => rejectWorkflowRun(runId ?? '', reason), }); const mutationError = approveMutation.error ?? rejectMutation.error; @@ -220,18 +221,33 @@ export function WorkflowProgressCard({ Approve - + } + title="Reject workflow?" + description={ + <> + Reject the paused workflow {workflowName}. If the approval + node defines an on_reject prompt, it runs with your reason as{' '} + $REJECTION_REASON; otherwise the run is cancelled. + + } + confirmLabel="Reject" + reasonInput={{ + label: 'Reason (optional)', + placeholder: 'Why are you rejecting? Visible to the on_reject prompt.', }} - disabled={!runId || approveMutation.isPending || rejectMutation.isPending} - className="flex items-center gap-1 rounded-md px-2 py-1 text-xs text-error/80 hover:bg-error/10 hover:text-error transition-colors disabled:opacity-50" - > - - Reject - + onConfirm={(reason): void => { + rejectMutation.mutate(reason); + }} + />
{(approveMutation.isError || rejectMutation.isError) && (

diff --git a/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx index 2292aef3ce..4de85ce2bf 100644 --- a/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx +++ b/packages/web/src/components/dashboard/ConfirmRunActionDialog.tsx @@ -1,4 +1,4 @@ -import type { ReactNode } from 'react'; +import { useId, useState, type ReactNode } from 'react'; import { AlertDialog, AlertDialogAction, @@ -11,6 +11,16 @@ import { AlertDialogTrigger, } from '@/components/ui/alert-dialog'; +/** + * Optional free-text input rendered below the description. Used for the + * reject flow so reviewers can attach a reason that propagates to the + * workflow's `on_reject` prompt as `$REJECTION_REASON`. + */ +interface ReasonInputConfig { + label: string; + placeholder?: string; +} + interface Props { /** The element that opens the dialog when clicked (typically a button). */ trigger: ReactNode; @@ -20,11 +30,17 @@ interface Props { description: ReactNode; /** Confirm-button label (e.g. "Abandon", "Delete"). */ confirmLabel: string; - /** Invoked when the user confirms. The current callsites are all - * fire-and-forget wrappers around React Query mutations whose error - * handling lives at the page level (`runAction` in `DashboardPage.tsx`). - * Widen to `Promise` only if a caller needs to await the action. */ - onConfirm: () => void; + /** + * When provided, renders a textarea below the description. The trimmed + * value is passed to `onConfirm` — empty after trim becomes `undefined` + * so callers can distinguish "no reason given" from "empty string given". + */ + reasonInput?: ReasonInputConfig; + /** Invoked when the user confirms. Fire-and-forget; callers own error + * surfacing. Widen to `Promise` only if a future caller needs to + * await the action. `reason` is only non-`undefined` when `reasonInput` + * is supplied and the user typed something after trimming. */ + onConfirm: (reason?: string) => void; } /** @@ -36,6 +52,10 @@ interface Props { * `@/components/ui/alert-dialog`), which is appropriate for every workflow * lifecycle action this is used for (Abandon, Cancel, Delete, Reject). * + * For reject flows, pass `reasonInput` to collect a trimmed free-text reason + * that propagates to `$REJECTION_REASON` inside the workflow's `on_reject` + * prompt. + * * Replaces previous use of `window.confirm()` for these actions to match the * codebase-delete UX in `sidebar/ProjectSelector.tsx`. */ @@ -44,10 +64,22 @@ export function ConfirmRunActionDialog({ title, description, confirmLabel, + reasonInput, onConfirm, }: Props): React.ReactElement { + const [reason, setReason] = useState(''); + // useId() so multiple dialog instances on the same page (e.g. side-by-side + // run cards) don't collide on a shared DOM id. + const reasonInputId = useId(); + return ( - + { + // Reset the textarea every time the dialog closes so a previous + // reason doesn't bleed into the next reject action on the same card. + if (!open) setReason(''); + }} + > {trigger} @@ -56,6 +88,23 @@ export function ConfirmRunActionDialog({

{description}
+ {reasonInput && ( +
+ +