diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index 928a8f35cd..cda9f98272 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -1035,31 +1035,41 @@ export function registerApiRoutes( return { accepted: true, status: result.status }; } + /** + * Result of an auto-resume attempt. The `reason` discriminator on the + * non-resumed branch tells the caller which guard fired so the response + * message can name the right manual remedy (CLI command vs. send-a-message + * vs. dispatch retry). + */ + type AutoResumeResult = + | { resumed: true } + | { + resumed: false; + reason: 'no_parent' | 'no_platform_conv' | 'non_web_parent' | 'dispatch_failed'; + }; + /** * Re-enter the orchestrator after a paused approval gate is resolved, so a * web-dispatched workflow continues (approve) or runs its on_reject prompt - * (reject) without the user having to re-run the workflow command. The CLI's - * `workflowApproveCommand` / `workflowRejectCommand` already auto-resume via - * `workflowRunCommand({ resume: true })`; this is the web-side equivalent. + * (reject) without the user having to re-run the workflow command. * - * Returns `true` when a resume dispatch was initiated, `false` otherwise (no - * parent conversation on the run, parent conversation deleted, parent was on - * a non-web platform, or dispatch threw). Failures are non-fatal: the gate - * decision is recorded regardless; when this returns `false` the response - * text instructs the user to re-run the workflow command. + * Returns a structured result so callers can construct an actionable + * response when auto-resume is skipped — historically the response text + * said only "send a message to continue" / "will run on resume", which is + * meaningless to a web-UI user whose run was started from the CLI. * * **Cross-adapter guard**: only web-sourced parents qualify. * `dispatchToOrchestrator` is wired to the web adapter + its lock manager, * so a Slack / Telegram / GitHub / Discord run being approved from the * dashboard must not route through it — the Slack thread would never see - * the resumed output. Non-web parents skip auto-resume and the originating - * platform's own re-run flow applies. + * the resumed output. Non-web parents skip auto-resume and the caller + * surfaces a manual resume hint instead. */ async function tryAutoResumeAfterGate( run: WorkflowRun, action: 'approve' | 'reject' - ): Promise { - if (!run.parent_conversation_id) return false; + ): Promise { + if (!run.parent_conversation_id) return { resumed: false, reason: 'no_parent' }; // Literal event names per action — greppable for ops tooling. Keeping the // branch explicit rather than templating avoids the earlier 3-segment // `api.workflow_*.dispatched` shape that broke `{domain}.{action}_{state}`. @@ -1098,7 +1108,7 @@ export function registerApiRoutes( }, events.skippedNoPlatformConv ); - return false; + return { resumed: false, reason: 'no_platform_conv' }; } if (parentConv.platform_type !== 'web') { getLog().debug( @@ -1109,7 +1119,7 @@ export function registerApiRoutes( }, events.skippedNonWebParent ); - return false; + return { resumed: false, reason: 'non_web_parent' }; } const resumeMessage = `/workflow run ${run.workflow_name} ${run.user_message ?? ''}`.trim(); await dispatchToOrchestrator(platformConvId, resumeMessage); @@ -1117,10 +1127,43 @@ export function registerApiRoutes( { runId: run.id, workflowName: run.workflow_name, platformConvId }, events.dispatched ); - return true; + return { resumed: true }; } catch (err) { getLog().warn({ err: err as Error, runId: run.id }, events.failed); - return false; + return { resumed: false, reason: 'dispatch_failed' }; + } + } + + /** + * Build the user-facing manual-resume hint for the cases where + * `tryAutoResumeAfterGate` did not dispatch. The message is shaped to the + * specific guard that fired so the user gets an actionable next step + * instead of vague text. `archon workflow resume ` is the universal + * fallback — it works regardless of how the run was started. + */ + function manualResumeMessage( + runId: string, + reason: Extract['reason'], + action: 'approve' | 'reject' + ): string { + const verb = action === 'approve' ? 'continue' : 'apply the on-reject prompt'; + const cliCommand = `\`archon workflow resume ${runId}\``; + switch (reason) { + case 'no_parent': + // CLI-originated run (no parent conversation linked). + return `Run ${cliCommand} from the working directory to ${verb}.`; + case 'non_web_parent': + // Slack / Telegram / GitHub / Discord parent — re-running on the + // originating platform is the natural path; the CLI is a fallback. + return `Send a message in the originating thread, or run ${cliCommand} from a terminal to ${verb}.`; + case 'no_platform_conv': + // Parent conversation existed but is gone (deleted) — only the CLI + // path remains. + return `Run ${cliCommand} from a terminal to ${verb} (the originating conversation is no longer available).`; + case 'dispatch_failed': + // Dispatch threw; the gate decision was recorded but the resume + // didn't kick off. User retries via CLI. + return `Auto-resume dispatch failed. Run ${cliCommand} from a terminal to ${verb}.`; } } @@ -1907,11 +1950,13 @@ export function registerApiRoutes( if (!RESUMABLE_WORKFLOW_STATUSES.includes(run.status)) { return apiError(c, 400, `Cannot resume workflow in '${run.status}' status`); } - // Run is already failed — the next invocation on the same path auto-resumes + // Run is already failed — the next invocation on the same path auto-resumes. + // The endpoint itself does not execute the resume; that's still a CLI/orchestrator + // responsibility. Surface the exact command so a web-UI user isn't left guessing. const pathInfo = run.working_path ? ` at \`${run.working_path}\`` : ''; return c.json({ success: true, - message: `Workflow run ready to resume: ${run.workflow_name}${pathInfo}. Re-run the workflow to auto-resume from completed nodes.`, + message: `Workflow run ready to resume: ${run.workflow_name}${pathInfo}. Run \`archon workflow resume ${runId}\` from the working directory to continue from completed nodes.`, }); } catch (error) { getLog().error({ err: error, runId }, 'api.workflow_run_resume_failed'); @@ -1990,13 +2035,13 @@ export function registerApiRoutes( // `parent_conversation_id` on the run (set by orchestrator-agent for any // web-dispatched workflow — foreground, interactive, and background via // the pre-created run) and a web-platform parent (guarded in the helper). - const autoResumed = await tryAutoResumeAfterGate(run, 'approve'); + const auto = await tryAutoResumeAfterGate(run, 'approve'); return c.json({ success: true, - message: autoResumed + message: auto.resumed ? `Workflow approved: ${run.workflow_name}. Resuming workflow.` - : `Workflow approved: ${run.workflow_name}. Send a message to continue.`, + : `Workflow approved: ${run.workflow_name}. ${manualResumeMessage(runId, auto.reason, 'approve')}`, }); } catch (error) { getLog().error({ err: error, runId }, 'api.workflow_run_approve_failed'); @@ -2045,13 +2090,13 @@ export function registerApiRoutes( // without requiring the user to re-run the workflow command. Mirrors // what `workflowRejectCommand` does in the CLI. Same cross-adapter // guard as approve — only web parents auto-resume. - const autoResumed = await tryAutoResumeAfterGate(run, 'reject'); + const auto = await tryAutoResumeAfterGate(run, 'reject'); return c.json({ success: true, - message: autoResumed + message: auto.resumed ? `Workflow rejected: ${run.workflow_name}. Running on-reject prompt.` - : `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`, + : `Workflow rejected: ${run.workflow_name}. ${manualResumeMessage(runId, auto.reason, 'reject')}`, }); } diff --git a/packages/server/src/routes/api.workflow-runs.test.ts b/packages/server/src/routes/api.workflow-runs.test.ts index 8d837d3623..e62d97a065 100644 --- a/packages/server/src/routes/api.workflow-runs.test.ts +++ b/packages/server/src/routes/api.workflow-runs.test.ts @@ -1061,6 +1061,8 @@ describe('POST /api/workflows/runs/:runId/resume', () => { const body = (await response.json()) as { success: boolean; message: string }; expect(body.success).toBe(true); expect(body.message).toContain('ready to resume'); + // Surface the exact CLI command so the web-UI user isn't left guessing. + expect(body.message).toContain('archon workflow resume run-uuid-4'); }); }); @@ -1326,7 +1328,10 @@ describe('POST /api/workflows/runs/:runId/reject', () => { expect(response.status).toBe(200); const body = (await response.json()) as { success: boolean; message: string }; expect(body.success).toBe(true); - expect(body.message).toContain('On-reject prompt'); + // MOCK_PAUSED_RUN has parent_conversation_id: null, so this run takes the + // CLI-hint branch — the message names the explicit resume command. + expect(body.message).toContain('on-reject prompt'); + expect(body.message).toContain('archon workflow resume run-on-reject'); expect(mockUpdateWorkflowRun).toHaveBeenCalledWith('run-on-reject', { status: 'failed', metadata: { rejection_reason: 'needs more tests', rejection_count: 1 }, @@ -1415,7 +1420,7 @@ describe('approve/reject auto-resume', () => { expect(dispatchedMessage).toBe('/workflow run deploy Deploy feature X'); }); - test('approve: skips dispatch when parent_conversation_id is null (CLI-dispatched run)', async () => { + test('approve: skips dispatch and surfaces CLI hint when parent_conversation_id is null (CLI-dispatched run)', async () => { mockGetWorkflowRun.mockResolvedValueOnce({ ...MOCK_PAUSED_RUN, parent_conversation_id: null, @@ -1430,12 +1435,14 @@ describe('approve/reject auto-resume', () => { expect(response.status).toBe(200); const body = (await response.json()) as { message: string }; - expect(body.message).toContain('Send a message to continue'); + // CLI-originated runs get a direct CLI command — no parent thread to send a message to. + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(body.message).not.toContain('originating thread'); expect(mockHandleMessage).not.toHaveBeenCalled(); expect(mockGetConversationById).not.toHaveBeenCalled(); }); - test('approve: skips dispatch when parent conversation no longer exists', async () => { + test('approve: skips dispatch and surfaces CLI hint when parent conversation no longer exists', async () => { mockGetWorkflowRun.mockResolvedValueOnce({ ...MOCK_PAUSED_RUN, parent_conversation_id: 'deleted-conv-uuid', @@ -1451,11 +1458,12 @@ describe('approve/reject auto-resume', () => { expect(response.status).toBe(200); const body = (await response.json()) as { message: string }; - expect(body.message).toContain('Send a message to continue'); + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(body.message).toContain('no longer available'); expect(mockHandleMessage).not.toHaveBeenCalled(); }); - test('approve: skips dispatch when parent conversation is on a non-web platform', async () => { + test('approve: skips dispatch and surfaces both options when parent is a non-web platform', async () => { // A Slack/Telegram/GitHub-sourced run being approved via the dashboard // must not route through dispatchToOrchestrator — that helper is wired // to the web adapter + lock manager, so dispatching a Slack thread_ts @@ -1479,8 +1487,38 @@ describe('approve/reject auto-resume', () => { expect(response.status).toBe(200); const body = (await response.json()) as { message: string }; - // Same fallback text as no-parent case — user re-runs from the originating platform. - expect(body.message).toContain('Send a message to continue'); + // Chat-platform parents get both routes — re-run on origin or use the CLI. + expect(body.message).toContain('originating thread'); + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('approve: surfaces dispatch-failed hint when an unexpected error occurs in the resume path', async () => { + // The catch branch inside tryAutoResumeAfterGate is the safety net for + // any throw on the resume path — DB lookups, the dispatch call, or any + // future addition. Trigger it via a parent-conversation lookup throw, + // which is the most reproducible path in tests. + // (handleMessage throws are swallowed inside dispatchToOrchestrator's + // own try/catch, so they never reach this branch.) + mockGetWorkflowRun.mockResolvedValueOnce({ + ...MOCK_PAUSED_RUN, + parent_conversation_id: 'web-parent-uuid', + }); + mockGetConversationById.mockImplementationOnce(async () => { + throw new Error('database connection lost'); + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/approve', { + method: 'POST', + body: JSON.stringify({ comment: 'LGTM' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Auto-resume dispatch failed'); + expect(body.message).toContain('archon workflow resume run-paused-1'); expect(mockHandleMessage).not.toHaveBeenCalled(); }); @@ -1545,4 +1583,107 @@ describe('approve/reject auto-resume', () => { expect(mockHandleMessage).not.toHaveBeenCalled(); expect(mockCancelWorkflowRun).toHaveBeenCalledWith('run-paused-1'); }); + + // --------------------------------------------------------------------- + // The four non-resumed branches on reject. Mirror the approve tests but + // for the on_reject path — the user-visible message must name the next + // step concretely (`archon workflow resume `), not vague "will run + // on resume" text. Regression coverage for #1522. + // --------------------------------------------------------------------- + + function pausedRunWithOnReject(parentConversationId: string | null): MockWorkflowRun { + return { + ...MOCK_PAUSED_RUN, + parent_conversation_id: parentConversationId, + metadata: { + approval: { + type: 'approval', + nodeId: 'review-gate', + message: 'Approve?', + onRejectPrompt: 'Fix: $REJECTION_REASON', + onRejectMaxAttempts: 3, + }, + rejection_count: 0, + }, + }; + } + + test('reject: skips dispatch and surfaces CLI hint when parent_conversation_id is null', async () => { + mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject(null)); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(body.message).toContain('apply the on-reject prompt'); + expect(body.message).not.toContain('originating thread'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('reject: surfaces both options when parent is a non-web platform', async () => { + mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('telegram-parent-uuid')); + mockGetConversationById.mockResolvedValueOnce({ + id: 'telegram-parent-uuid', + platform_conversation_id: '12345', + platform_type: 'telegram', + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('originating thread'); + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('reject: surfaces "no longer available" hint when parent conversation is deleted', async () => { + mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('deleted-parent-uuid')); + mockGetConversationById.mockResolvedValueOnce(null); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('no longer available'); + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); + + test('reject: surfaces dispatch-failed hint when an unexpected error occurs in the resume path', async () => { + // Mirror of the approve test — exercise the catch via a DB throw. + mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('web-parent-uuid')); + mockGetConversationById.mockImplementationOnce(async () => { + throw new Error('database connection lost'); + }); + + const { app } = makeApp(); + const response = await app.request('/api/workflows/runs/run-paused-1/reject', { + method: 'POST', + body: JSON.stringify({ reason: 'tests missing' }), + headers: { 'Content-Type': 'application/json' }, + }); + + expect(response.status).toBe(200); + const body = (await response.json()) as { message: string }; + expect(body.message).toContain('Auto-resume dispatch failed'); + expect(body.message).toContain('archon workflow resume run-paused-1'); + expect(mockHandleMessage).not.toHaveBeenCalled(); + }); });