diff --git a/CHANGELOG.md b/CHANGELOG.md index 63d98f8264..9dabeac1d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **`$LOOP_PREV_OUTPUT` workflow variable (loop nodes only)** — exposes the previous iteration's cleaned output (after `` tag stripping) to the current iteration's prompt. Empty on the first iteration and on the first iteration after resuming from an interactive approval gate. Enables `fresh_context: true` loops to reference what the prior pass said or did without carrying full session history. (#1367) + ## [0.3.9] - 2026-04-22 First release with working compiled binaries since v0.3.6. Both v0.3.7 and v0.3.8 were tagged but neither shipped release assets — v0.3.7 was blocked by two genuine binary-runtime bugs (Pi SDK's module-init crash + Bun `--bytecode` producing broken output), and v0.3.8 was blocked by an unrelated CI smoke-test regression where `release.yml`'s Claude resolver test required an `origin` remote that the fresh `git init` test repo didn't have. Both superseded tags remain for history; their GitHub Releases were deleted at the time of tagging so `releases/latest` fell back to v0.3.6 throughout, keeping `install.sh` and Homebrew safe. v0.3.9 is what users actually install. diff --git a/CLAUDE.md b/CLAUDE.md index f2afd41e9c..17c47b1f60 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -689,6 +689,7 @@ async function createSession(conversationId: string, codebaseId: string) { - `$DOCS_DIR` - Documentation directory path; configured via `docs.path` in `.archon/config.yaml`. Defaults to `docs/`. Never throws. - `$LOOP_USER_INPUT` - User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. - `$REJECTION_REASON` - Reviewer feedback provided via `/workflow reject ` at an approval gate. Only populated in `on_reject` prompts; empty string elsewhere. +- `$LOOP_PREV_OUTPUT` - Cleaned output of the previous loop iteration (loop nodes only). Empty string on the first iteration (no prior output exists). Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed without carrying full session history. **Command Types:** diff --git a/packages/docs-web/src/content/docs/guides/loop-nodes.md b/packages/docs-web/src/content/docs/guides/loop-nodes.md index 0e9e3eebc3..1420c9670a 100644 --- a/packages/docs-web/src/content/docs/guides/loop-nodes.md +++ b/packages/docs-web/src/content/docs/guides/loop-nodes.md @@ -90,10 +90,13 @@ substitution: | `$WORKFLOW_ID` | Current workflow run ID | | `$nodeId.output` | Output from upstream nodes | | `$LOOP_USER_INPUT` | User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration. Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed. | `$USER_MESSAGE` is particularly important for `fresh_context: true` loops — the agent has no memory of prior iterations, so the prompt must include all -context needed to continue the work. +context needed to continue the work. `$LOOP_PREV_OUTPUT` complements this by +exposing the previous iteration's own output without forcing the engine to +thread the session. ### `until` @@ -177,6 +180,39 @@ The prompt tells the agent it has no memory and must bootstrap from files. window exhaustion is a risk. The agent reads `.archon/ralph/*/prd.json` or similar tracking files to know what's done and what's next. +### Retry-on-failure with `$LOOP_PREV_OUTPUT` + +When `fresh_context: true` is needed (to keep each iteration's context window +small) but the agent still benefits from knowing what the previous pass said — +typical of implement→validate or generate→review loops — inject the previous +iteration's output via `$LOOP_PREV_OUTPUT`: + +```yaml +- id: implement-and-qa + loop: + prompt: | + Implement the plan, then run `bun run validate`. + If checks fail, fix the failures. + + Previous iteration output (empty on first pass): + $LOOP_PREV_OUTPUT + + Use the above to focus your fixes. When all checks pass output: + QA_PASS + until: QA_PASS + fresh_context: true + max_iterations: 3 +``` + +In a continuous run, the first iteration sees `$LOOP_PREV_OUTPUT` substituted +to an empty string; iterations 2+ see the previous iteration's cleaned output +(after `` tags are stripped). + +When a loop resumes from an interactive approval gate, the first executed +iteration after the resume also receives an empty `$LOOP_PREV_OUTPUT` even if +its numeric iteration is 2+ — the prior output lived in a different run and is +not carried across the gate. + ### Accumulating context The agent builds on its own prior work across iterations. Good for iterative diff --git a/packages/docs-web/src/content/docs/reference/variables.md b/packages/docs-web/src/content/docs/reference/variables.md index f32779cb6c..c5cf879bed 100644 --- a/packages/docs-web/src/content/docs/reference/variables.md +++ b/packages/docs-web/src/content/docs/reference/variables.md @@ -27,6 +27,7 @@ These variables are substituted by the workflow executor in all node types (`com | `$ISSUE_CONTEXT` | Same as `$CONTEXT` | Alias | | `$LOOP_USER_INPUT` | User feedback from an interactive loop approval gate | Only populated on the first iteration of a resumed interactive loop. Empty string on all other iterations | | `$REJECTION_REASON` | Reviewer feedback from an approval node rejection | Only available in `on_reject` prompts. Empty string elsewhere | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration (loop nodes only) | Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference the prior pass without carrying the full session history | ### Context Variable Behavior @@ -88,7 +89,7 @@ nodes: Variables are substituted in a defined order: -1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON` +1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON`, `$LOOP_PREV_OUTPUT` 2. **Context variables** -- `$CONTEXT`, `$EXTERNAL_CONTEXT`, `$ISSUE_CONTEXT` 3. **Node output references** -- `$nodeId.output`, `$nodeId.output.field` @@ -107,4 +108,5 @@ Positional arguments (`$1` through `$9`) are substituted separately by the comma | `$CONTEXT` / aliases | Yes | No | No | | `$LOOP_USER_INPUT` | Yes (loop nodes) | No | No | | `$REJECTION_REASON` | Yes (`on_reject` only) | No | No | +| `$LOOP_PREV_OUTPUT` | Yes (loop nodes) | No | No | | `$nodeId.output` | Yes (DAG nodes) | No | Yes | diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index b4717e9565..0e57552548 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -3140,6 +3140,266 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { expect(mockSendQueryDag.mock.calls.length).toBe(3); }); + it('substitutes $LOOP_PREV_OUTPUT with previous iteration output (empty on iter 1)', async () => { + // Iteration 1 emits a distinctive output, iteration 2 emits the completion signal. + // We then assert the prompt sent to the AI: iteration 1 strips $LOOP_PREV_OUTPUT + // to empty, iteration 2 receives iteration 1's cleaned output. + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount === 1) { + yield { type: 'assistant', content: 'Iter1 output: 2 type errors in users.ts' }; + yield { type: 'result', sessionId: 'loop-session-1' }; + } else { + yield { type: 'assistant', content: 'All fixed. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-prev-output', + nodes: [ + { + id: 'fix-loop', + loop: { + prompt: 'Previous output: <<$LOOP_PREV_OUTPUT>>. Fix and emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 5, + fresh_context: true, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptIter1 = mockSendQueryDag.mock.calls[0][0] as string; + const promptIter2 = mockSendQueryDag.mock.calls[1][0] as string; + // Iteration 1: $LOOP_PREV_OUTPUT substitutes to empty string. + expect(promptIter1).toContain('Previous output: <<>>.'); + // Iteration 2: receives iteration 1's cleaned output. + expect(promptIter2).toContain( + 'Previous output: <>.' + ); + }); + + it('strips tags from $LOOP_PREV_OUTPUT (uses cleaned output)', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount === 1) { + // Iteration 1 includes a non-completion XML tag in its output. The cleaned + // output (after stripCompletionTags) drops ... blocks. + // We use a non-matching signal here so iteration 1 does NOT complete. + yield { + type: 'assistant', + content: 'Real work output. NOT_DONE_YET', + }; + yield { type: 'result', sessionId: 'loop-session-1' }; + } else { + yield { type: 'assistant', content: 'Done. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-prev-clean', + nodes: [ + { + id: 'fix-loop', + loop: { + prompt: 'PREV=[$LOOP_PREV_OUTPUT]', + until: 'COMPLETE', + max_iterations: 5, + fresh_context: true, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptIter2 = mockSendQueryDag.mock.calls[1][0] as string; + // The previous-output payload must be the *cleaned* output — no tags. + expect(promptIter2).toContain('PREV=[Real work output.'); + expect(promptIter2).not.toContain(''); + }); + + it('$LOOP_PREV_OUTPUT is empty on the first iteration after interactive resume', async () => { + // Regression guard for the resume-from-approval path: when an interactive + // loop pauses at the approval gate, the prior `lastIterationOutput` lives + // in a separate process and is not persisted. On resume, the executor must + // substitute $LOOP_PREV_OUTPUT to '' on the first resumed iteration — + // never to whatever the paused run produced. + // + // Wirasm-suggested shape (PR #1367 review): two executeDagWorkflow calls. + // The first call pauses at the gate after iteration 1; the second call + // resumes with metadata.approval populated and runs iteration 2. + + // ---- Call 1: fresh run, iteration 1 emits no completion → pauses at gate + mockSendQueryDag.mockImplementationOnce(function* () { + yield { type: 'assistant', content: 'Iter1 output: 2 type errors in users.ts' }; + yield { type: 'result', sessionId: 'loop-session-1' }; + }); + const mockDeps1 = createMockDeps(); + const platform1 = createMockPlatform(); + const freshRun = makeWorkflowRun('resume-prev-fresh-run'); + + await executeDagWorkflow( + mockDeps1, + platform1, + 'conv-dag', + testDir, + { + name: 'interactive-loop-resume-prev-output', + nodes: [ + { + id: 'refine', + loop: { + prompt: + 'User: $LOOP_USER_INPUT. PREV=<<$LOOP_PREV_OUTPUT>>. Continue or emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 10, + interactive: true, + gate_message: 'Review and provide feedback.', + }, + }, + ], + }, + freshRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // First iteration of a fresh interactive loop: $LOOP_PREV_OUTPUT empty; + // $LOOP_USER_INPUT empty (no user has spoken yet). + expect(mockSendQueryDag.mock.calls.length).toBe(1); + const promptIter1 = mockSendQueryDag.mock.calls[0][0] as string; + expect(promptIter1).toContain('PREV=<<>>.'); + expect(promptIter1).toContain('User: .'); + // Fresh interactive loop must pause at the gate, not return early. + const pauseCalls1 = ( + mockDeps1.store.pauseWorkflowRun as Mock< + (id: string, ctx: Record) => Promise + > + ).mock.calls; + expect(pauseCalls1.length).toBe(1); + expect(pauseCalls1[0][1]).toMatchObject({ + type: 'interactive_loop', + nodeId: 'refine', + iteration: 1, + }); + + // ---- Call 2: resumed run — metadata carries iter 1 + user input. + // iter 2 emits the completion signal so the loop exits cleanly. + mockSendQueryDag.mockImplementationOnce(function* () { + yield { type: 'assistant', content: 'All clear. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + }); + const mockDeps2 = createMockDeps(); + const platform2 = createMockPlatform(); + const resumedRun = makeWorkflowRun('resume-prev-resume-run', { + metadata: { + approval: { + type: 'interactive_loop', + nodeId: 'refine', + iteration: 1, + sessionId: 'loop-session-1', + message: 'Review and provide feedback.', + }, + loop_user_input: 'looks good, ship it', + }, + }); + + await executeDagWorkflow( + mockDeps2, + platform2, + 'conv-dag', + testDir, + { + name: 'interactive-loop-resume-prev-output', + nodes: [ + { + id: 'refine', + loop: { + prompt: + 'User: $LOOP_USER_INPUT. PREV=<<$LOOP_PREV_OUTPUT>>. Continue or emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 10, + interactive: true, + gate_message: 'Review and provide feedback.', + }, + }, + ], + }, + resumedRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // Second executeDagWorkflow call started a fresh sendQuery generator (mock + // call index 1 across the two runs). The resumed iteration must NOT carry + // the prior process's iter-1 output through $LOOP_PREV_OUTPUT — it must + // substitute to ''. + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptResumeIter = mockSendQueryDag.mock.calls[1][0] as string; + expect(promptResumeIter).toContain('PREV=<<>>.'); + expect(promptResumeIter).not.toContain('Iter1 output: 2 type errors'); + // The resume's user input flows through on the first resumed iteration. + expect(promptResumeIter).toContain('User: looks good, ship it.'); + // Resume call exits via completion, not via a second pause at the gate. + const pauseCalls2 = ( + mockDeps2.store.pauseWorkflowRun as Mock< + (id: string, ctx: Record) => Promise + > + ).mock.calls; + expect(pauseCalls2.length).toBe(0); + }); + it('fails when max_iterations exceeded', async () => { mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'Still working...' }; diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 419a9066f6..4f2f845f68 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -1766,6 +1766,10 @@ async function executeLoopNode( // Build prompt — substituteWorkflowVariables throws if $BASE_BRANCH referenced but empty // Pass loopUserInput on the first resumed iteration; '' on all others (non-interactive // or subsequent iterations) so $LOOP_USER_INPUT substitutes to empty string explicitly. + // $LOOP_PREV_OUTPUT carries the previous iteration's cleaned output and is empty on + // the first iteration (no prior output exists). Across an interactive resume, the + // executor starts a fresh `lastIterationOutput` variable, so the first iteration of + // the resume also receives an empty $LOOP_PREV_OUTPUT. const { prompt: substitutedPrompt } = substituteWorkflowVariables( loop.prompt, workflowRun.id, @@ -1774,7 +1778,9 @@ async function executeLoopNode( baseBranch, docsDir, issueContext, - i === startIteration ? loopUserInput : '' + i === startIteration ? loopUserInput : '', + undefined, // rejectionReason + i === startIteration ? '' : lastIterationOutput ); const finalPrompt = substituteNodeOutputRefs(substitutedPrompt, nodeOutputs); diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts index 17c93cc605..85d6211a37 100644 --- a/packages/workflows/src/executor-shared.test.ts +++ b/packages/workflows/src/executor-shared.test.ts @@ -252,6 +252,50 @@ describe('substituteWorkflowVariables', () => { ); expect(prompt).toBe('Fix: '); }); + + it('replaces $LOOP_PREV_OUTPUT with the previous iteration output', () => { + const { prompt } = substituteWorkflowVariables( + 'Last pass said:\n$LOOP_PREV_OUTPUT', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/', + undefined, + undefined, + undefined, + 'QA failed: 2 type errors in users.ts' + ); + expect(prompt).toBe('Last pass said:\nQA failed: 2 type errors in users.ts'); + }); + + it('clears $LOOP_PREV_OUTPUT when not provided (first iteration)', () => { + const { prompt } = substituteWorkflowVariables( + 'Previous output: $LOOP_PREV_OUTPUT (end)', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/' + ); + expect(prompt).toBe('Previous output: (end)'); + }); + + it('does not affect prompts that omit $LOOP_PREV_OUTPUT', () => { + const { prompt } = substituteWorkflowVariables( + 'Plain prompt with no loop variable.', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/', + undefined, + undefined, + undefined, + 'unused previous output' + ); + expect(prompt).toBe('Plain prompt with no loop variable.'); + }); }); describe('buildPromptWithContext', () => { diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index e88700d9cb..ff4d3836de 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -275,6 +275,9 @@ export const CONTEXT_VAR_PATTERN_STR = * - $LOOP_USER_INPUT - User feedback from interactive loop approval. Only populated on the * first iteration of a resumed interactive loop; empty string on all other iterations. * - $REJECTION_REASON - Reviewer feedback from approval node rejection (on_reject prompts only). + * - $LOOP_PREV_OUTPUT - Cleaned output of the previous loop iteration. Empty string on the + * first iteration (no prior output exists). Useful for fresh_context loops that need + * to reference what the previous pass produced or why it failed. * * When issueContext is undefined, context variables are replaced with empty string * to avoid sending literal "$CONTEXT" to the AI. @@ -288,7 +291,8 @@ export function substituteWorkflowVariables( docsDir: string, issueContext?: string, loopUserInput?: string, - rejectionReason?: string + rejectionReason?: string, + loopPrevOutput?: string ): { prompt: string; contextSubstituted: boolean } { // Fail fast if the prompt references $BASE_BRANCH but no base branch could be resolved if (!baseBranch && prompt.includes('$BASE_BRANCH')) { @@ -310,7 +314,8 @@ export function substituteWorkflowVariables( .replace(/\$BASE_BRANCH/g, baseBranch) .replace(/\$DOCS_DIR/g, resolvedDocsDir) .replace(/\$LOOP_USER_INPUT/g, loopUserInput ?? '') - .replace(/\$REJECTION_REASON/g, rejectionReason ?? ''); + .replace(/\$REJECTION_REASON/g, rejectionReason ?? '') + .replace(/\$LOOP_PREV_OUTPUT/g, loopPrevOutput ?? ''); // Check if context variables exist (use fresh regex to avoid lastIndex issues) const hasContextVariables = new RegExp(CONTEXT_VAR_PATTERN_STR).test(result);