Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 69 additions & 24 deletions packages/server/src/routes/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1035,31 +1035,41 @@ export function registerApiRoutes(
return { accepted: true, status: result.status };
}

/**
* Result of an auto-resume attempt. The `reason` discriminator on the
* non-resumed branch tells the caller which guard fired so the response
* message can name the right manual remedy (CLI command vs. send-a-message
* vs. dispatch retry).
*/
type AutoResumeResult =
| { resumed: true }
| {
resumed: false;
reason: 'no_parent' | 'no_platform_conv' | 'non_web_parent' | 'dispatch_failed';
};

/**
* Re-enter the orchestrator after a paused approval gate is resolved, so a
* web-dispatched workflow continues (approve) or runs its on_reject prompt
* (reject) without the user having to re-run the workflow command. The CLI's
* `workflowApproveCommand` / `workflowRejectCommand` already auto-resume via
* `workflowRunCommand({ resume: true })`; this is the web-side equivalent.
* (reject) without the user having to re-run the workflow command.
*
* Returns `true` when a resume dispatch was initiated, `false` otherwise (no
* parent conversation on the run, parent conversation deleted, parent was on
* a non-web platform, or dispatch threw). Failures are non-fatal: the gate
* decision is recorded regardless; when this returns `false` the response
* text instructs the user to re-run the workflow command.
* Returns a structured result so callers can construct an actionable
* response when auto-resume is skipped — historically the response text
* said only "send a message to continue" / "will run on resume", which is
* meaningless to a web-UI user whose run was started from the CLI.
*
* **Cross-adapter guard**: only web-sourced parents qualify.
* `dispatchToOrchestrator` is wired to the web adapter + its lock manager,
* so a Slack / Telegram / GitHub / Discord run being approved from the
* dashboard must not route through it — the Slack thread would never see
* the resumed output. Non-web parents skip auto-resume and the originating
* platform's own re-run flow applies.
* the resumed output. Non-web parents skip auto-resume and the caller
* surfaces a manual resume hint instead.
*/
async function tryAutoResumeAfterGate(
run: WorkflowRun,
action: 'approve' | 'reject'
): Promise<boolean> {
if (!run.parent_conversation_id) return false;
): Promise<AutoResumeResult> {
if (!run.parent_conversation_id) return { resumed: false, reason: 'no_parent' };
// Literal event names per action — greppable for ops tooling. Keeping the
// branch explicit rather than templating avoids the earlier 3-segment
// `api.workflow_*.dispatched` shape that broke `{domain}.{action}_{state}`.
Expand Down Expand Up @@ -1098,7 +1108,7 @@ export function registerApiRoutes(
},
events.skippedNoPlatformConv
);
return false;
return { resumed: false, reason: 'no_platform_conv' };
}
if (parentConv.platform_type !== 'web') {
getLog().debug(
Expand All @@ -1109,18 +1119,51 @@ export function registerApiRoutes(
},
events.skippedNonWebParent
);
return false;
return { resumed: false, reason: 'non_web_parent' };
}
const resumeMessage = `/workflow run ${run.workflow_name} ${run.user_message ?? ''}`.trim();
await dispatchToOrchestrator(platformConvId, resumeMessage);
getLog().info(
{ runId: run.id, workflowName: run.workflow_name, platformConvId },
events.dispatched
);
return true;
return { resumed: true };
} catch (err) {
getLog().warn({ err: err as Error, runId: run.id }, events.failed);
return false;
return { resumed: false, reason: 'dispatch_failed' };
}
}

/**
* Build the user-facing manual-resume hint for the cases where
* `tryAutoResumeAfterGate` did not dispatch. The message is shaped to the
* specific guard that fired so the user gets an actionable next step
* instead of vague text. `archon workflow resume <id>` is the universal
* fallback — it works regardless of how the run was started.
*/
function manualResumeMessage(
runId: string,
reason: Extract<AutoResumeResult, { resumed: false }>['reason'],
action: 'approve' | 'reject'
): string {
const verb = action === 'approve' ? 'continue' : 'apply the on-reject prompt';
const cliCommand = `\`archon workflow resume ${runId}\``;
switch (reason) {
case 'no_parent':
// CLI-originated run (no parent conversation linked).
return `Run ${cliCommand} from the working directory to ${verb}.`;
case 'non_web_parent':
// Slack / Telegram / GitHub / Discord parent — re-running on the
// originating platform is the natural path; the CLI is a fallback.
return `Send a message in the originating thread, or run ${cliCommand} from a terminal to ${verb}.`;
case 'no_platform_conv':
// Parent conversation existed but is gone (deleted) — only the CLI
// path remains.
return `Run ${cliCommand} from a terminal to ${verb} (the originating conversation is no longer available).`;
case 'dispatch_failed':
// Dispatch threw; the gate decision was recorded but the resume
// didn't kick off. User retries via CLI.
return `Auto-resume dispatch failed. Run ${cliCommand} from a terminal to ${verb}.`;
}
}

Expand Down Expand Up @@ -1907,11 +1950,13 @@ export function registerApiRoutes(
if (!RESUMABLE_WORKFLOW_STATUSES.includes(run.status)) {
return apiError(c, 400, `Cannot resume workflow in '${run.status}' status`);
}
// Run is already failed — the next invocation on the same path auto-resumes
// Run is already failed — the next invocation on the same path auto-resumes.
// The endpoint itself does not execute the resume; that's still a CLI/orchestrator
// responsibility. Surface the exact command so a web-UI user isn't left guessing.
const pathInfo = run.working_path ? ` at \`${run.working_path}\`` : '';
return c.json({
success: true,
message: `Workflow run ready to resume: ${run.workflow_name}${pathInfo}. Re-run the workflow to auto-resume from completed nodes.`,
message: `Workflow run ready to resume: ${run.workflow_name}${pathInfo}. Run \`archon workflow resume ${runId}\` from the working directory to continue from completed nodes.`,
});
} catch (error) {
getLog().error({ err: error, runId }, 'api.workflow_run_resume_failed');
Expand Down Expand Up @@ -1990,13 +2035,13 @@ export function registerApiRoutes(
// `parent_conversation_id` on the run (set by orchestrator-agent for any
// web-dispatched workflow — foreground, interactive, and background via
// the pre-created run) and a web-platform parent (guarded in the helper).
const autoResumed = await tryAutoResumeAfterGate(run, 'approve');
const auto = await tryAutoResumeAfterGate(run, 'approve');

return c.json({
success: true,
message: autoResumed
message: auto.resumed
? `Workflow approved: ${run.workflow_name}. Resuming workflow.`
: `Workflow approved: ${run.workflow_name}. Send a message to continue.`,
: `Workflow approved: ${run.workflow_name}. ${manualResumeMessage(runId, auto.reason, 'approve')}`,
});
} catch (error) {
getLog().error({ err: error, runId }, 'api.workflow_run_approve_failed');
Expand Down Expand Up @@ -2045,13 +2090,13 @@ export function registerApiRoutes(
// without requiring the user to re-run the workflow command. Mirrors
// what `workflowRejectCommand` does in the CLI. Same cross-adapter
// guard as approve — only web parents auto-resume.
const autoResumed = await tryAutoResumeAfterGate(run, 'reject');
const auto = await tryAutoResumeAfterGate(run, 'reject');

return c.json({
success: true,
message: autoResumed
message: auto.resumed
? `Workflow rejected: ${run.workflow_name}. Running on-reject prompt.`
: `Workflow rejected: ${run.workflow_name}. On-reject prompt will run on resume.`,
: `Workflow rejected: ${run.workflow_name}. ${manualResumeMessage(runId, auto.reason, 'reject')}`,
});
}

Expand Down
157 changes: 149 additions & 8 deletions packages/server/src/routes/api.workflow-runs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,8 @@ describe('POST /api/workflows/runs/:runId/resume', () => {
const body = (await response.json()) as { success: boolean; message: string };
expect(body.success).toBe(true);
expect(body.message).toContain('ready to resume');
// Surface the exact CLI command so the web-UI user isn't left guessing.
expect(body.message).toContain('archon workflow resume run-uuid-4');
});
});

Expand Down Expand Up @@ -1326,7 +1328,10 @@ describe('POST /api/workflows/runs/:runId/reject', () => {
expect(response.status).toBe(200);
const body = (await response.json()) as { success: boolean; message: string };
expect(body.success).toBe(true);
expect(body.message).toContain('On-reject prompt');
// MOCK_PAUSED_RUN has parent_conversation_id: null, so this run takes the
// CLI-hint branch — the message names the explicit resume command.
expect(body.message).toContain('on-reject prompt');
expect(body.message).toContain('archon workflow resume run-on-reject');
expect(mockUpdateWorkflowRun).toHaveBeenCalledWith('run-on-reject', {
status: 'failed',
metadata: { rejection_reason: 'needs more tests', rejection_count: 1 },
Expand Down Expand Up @@ -1415,7 +1420,7 @@ describe('approve/reject auto-resume', () => {
expect(dispatchedMessage).toBe('/workflow run deploy Deploy feature X');
});

test('approve: skips dispatch when parent_conversation_id is null (CLI-dispatched run)', async () => {
test('approve: skips dispatch and surfaces CLI hint when parent_conversation_id is null (CLI-dispatched run)', async () => {
mockGetWorkflowRun.mockResolvedValueOnce({
...MOCK_PAUSED_RUN,
parent_conversation_id: null,
Expand All @@ -1430,12 +1435,14 @@ describe('approve/reject auto-resume', () => {

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('Send a message to continue');
// CLI-originated runs get a direct CLI command — no parent thread to send a message to.
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(body.message).not.toContain('originating thread');
expect(mockHandleMessage).not.toHaveBeenCalled();
expect(mockGetConversationById).not.toHaveBeenCalled();
});

test('approve: skips dispatch when parent conversation no longer exists', async () => {
test('approve: skips dispatch and surfaces CLI hint when parent conversation no longer exists', async () => {
mockGetWorkflowRun.mockResolvedValueOnce({
...MOCK_PAUSED_RUN,
parent_conversation_id: 'deleted-conv-uuid',
Expand All @@ -1451,11 +1458,12 @@ describe('approve/reject auto-resume', () => {

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('Send a message to continue');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(body.message).toContain('no longer available');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

test('approve: skips dispatch when parent conversation is on a non-web platform', async () => {
test('approve: skips dispatch and surfaces both options when parent is a non-web platform', async () => {
// A Slack/Telegram/GitHub-sourced run being approved via the dashboard
// must not route through dispatchToOrchestrator — that helper is wired
// to the web adapter + lock manager, so dispatching a Slack thread_ts
Expand All @@ -1479,8 +1487,38 @@ describe('approve/reject auto-resume', () => {

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
// Same fallback text as no-parent case — user re-runs from the originating platform.
expect(body.message).toContain('Send a message to continue');
// Chat-platform parents get both routes — re-run on origin or use the CLI.
expect(body.message).toContain('originating thread');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

test('approve: surfaces dispatch-failed hint when an unexpected error occurs in the resume path', async () => {
// The catch branch inside tryAutoResumeAfterGate is the safety net for
// any throw on the resume path — DB lookups, the dispatch call, or any
// future addition. Trigger it via a parent-conversation lookup throw,
// which is the most reproducible path in tests.
// (handleMessage throws are swallowed inside dispatchToOrchestrator's
// own try/catch, so they never reach this branch.)
mockGetWorkflowRun.mockResolvedValueOnce({
...MOCK_PAUSED_RUN,
parent_conversation_id: 'web-parent-uuid',
});
mockGetConversationById.mockImplementationOnce(async () => {
throw new Error('database connection lost');
});

const { app } = makeApp();
const response = await app.request('/api/workflows/runs/run-paused-1/approve', {
method: 'POST',
body: JSON.stringify({ comment: 'LGTM' }),
headers: { 'Content-Type': 'application/json' },
});

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('Auto-resume dispatch failed');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

Expand Down Expand Up @@ -1545,4 +1583,107 @@ describe('approve/reject auto-resume', () => {
expect(mockHandleMessage).not.toHaveBeenCalled();
expect(mockCancelWorkflowRun).toHaveBeenCalledWith('run-paused-1');
});

// ---------------------------------------------------------------------
// The four non-resumed branches on reject. Mirror the approve tests but
// for the on_reject path — the user-visible message must name the next
// step concretely (`archon workflow resume <id>`), not vague "will run
// on resume" text. Regression coverage for #1522.
// ---------------------------------------------------------------------

function pausedRunWithOnReject(parentConversationId: string | null): MockWorkflowRun {
return {
...MOCK_PAUSED_RUN,
parent_conversation_id: parentConversationId,
metadata: {
approval: {
type: 'approval',
nodeId: 'review-gate',
message: 'Approve?',
onRejectPrompt: 'Fix: $REJECTION_REASON',
onRejectMaxAttempts: 3,
},
rejection_count: 0,
},
};
}

test('reject: skips dispatch and surfaces CLI hint when parent_conversation_id is null', async () => {
mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject(null));

const { app } = makeApp();
const response = await app.request('/api/workflows/runs/run-paused-1/reject', {
method: 'POST',
body: JSON.stringify({ reason: 'tests missing' }),
headers: { 'Content-Type': 'application/json' },
});

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(body.message).toContain('apply the on-reject prompt');
expect(body.message).not.toContain('originating thread');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

test('reject: surfaces both options when parent is a non-web platform', async () => {
mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('telegram-parent-uuid'));
mockGetConversationById.mockResolvedValueOnce({
id: 'telegram-parent-uuid',
platform_conversation_id: '12345',
platform_type: 'telegram',
});

const { app } = makeApp();
const response = await app.request('/api/workflows/runs/run-paused-1/reject', {
method: 'POST',
body: JSON.stringify({ reason: 'tests missing' }),
headers: { 'Content-Type': 'application/json' },
});

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('originating thread');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

test('reject: surfaces "no longer available" hint when parent conversation is deleted', async () => {
mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('deleted-parent-uuid'));
mockGetConversationById.mockResolvedValueOnce(null);

const { app } = makeApp();
const response = await app.request('/api/workflows/runs/run-paused-1/reject', {
method: 'POST',
body: JSON.stringify({ reason: 'tests missing' }),
headers: { 'Content-Type': 'application/json' },
});

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('no longer available');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(mockHandleMessage).not.toHaveBeenCalled();
});

test('reject: surfaces dispatch-failed hint when an unexpected error occurs in the resume path', async () => {
// Mirror of the approve test — exercise the catch via a DB throw.
mockGetWorkflowRun.mockResolvedValueOnce(pausedRunWithOnReject('web-parent-uuid'));
mockGetConversationById.mockImplementationOnce(async () => {
throw new Error('database connection lost');
});

const { app } = makeApp();
const response = await app.request('/api/workflows/runs/run-paused-1/reject', {
method: 'POST',
body: JSON.stringify({ reason: 'tests missing' }),
headers: { 'Content-Type': 'application/json' },
});

expect(response.status).toBe(200);
const body = (await response.json()) as { message: string };
expect(body.message).toContain('Auto-resume dispatch failed');
expect(body.message).toContain('archon workflow resume run-paused-1');
expect(mockHandleMessage).not.toHaveBeenCalled();
});
});