diff --git a/get-shit-done/workflows/plan-review-convergence.md b/get-shit-done/workflows/plan-review-convergence.md index e593191539..e672589c26 100644 --- a/get-shit-done/workflows/plan-review-convergence.md +++ b/get-shit-done/workflows/plan-review-convergence.md @@ -120,7 +120,29 @@ Agent( Execute: Skill(skill='gsd-review', args='--phase {PHASE} {REVIEWER_FLAGS} {GSD_WS}') -Complete the full review workflow. Do NOT return until REVIEWS.md is committed.", +Complete the full review workflow. Do NOT return until REVIEWS.md is committed. + +--- RETURN CONTRACT (REQUIRED) --- + +Your final return message MUST end with these two sections, exactly as specified. +This is the contract the orchestrator relies on — do NOT skip or reformat. + +Section 1 — Machine-readable summary line (exact format, single line): + +CYCLE_SUMMARY: current_high= current_medium= + +Where counts HIGH-severity concerns that REMAIN UNRESOLVED in this cycle's findings: + - INCLUDE: newly raised HIGHs; PARTIALLY RESOLVED HIGHs; previously raised and still unresolved HIGHs + - EXCLUDE: explicitly RESOLVED/FULLY RESOLVED HIGHs; HIGH mentions in retrospective/summary tables comparing cycles; quoted excerpts from prior reviews + follows the same definition for MEDIUM severity. + +Section 2 — Current HIGH Concerns (for display on escalation): + +## Current HIGH Concerns +- +(If is 0, write exactly: None.) + +--- END RETURN CONTRACT ---", mode="auto" ) ``` @@ -132,13 +154,16 @@ REVIEWS_FILE=$(ls ${phase_dir}/${padded_phase}-REVIEWS.md 2>/dev/null) If REVIEWS_FILE is empty: Error — review agent did not produce REVIEWS.md. Exit. -### 5b. Check for HIGH Concerns +### 5b. Extract HIGH Count from Agent Return -```bash -HIGH_COUNT=$(grep -c '\*\*HIGH' "${REVIEWS_FILE}" 2>/dev/null || true) -HIGH_COUNT=${HIGH_COUNT:-0} -HIGH_LINES=$(grep -B0 -A1 '\*\*HIGH' "${REVIEWS_FILE}" 2>/dev/null) -``` +**Do NOT grep REVIEWS.md.** REVIEWS.md accumulates historical content across cycles; raw text counts are unreliable (a cycle 2 review that re-lists resolved HIGHs for audit would inflate the count and falsely trigger stall detection). Instead, extract the count from the review agent's RETURN MESSAGE using the CYCLE_SUMMARY contract. + +Parse the agent's final return message: + +- `HIGH_COUNT`: the integer matched by regex `CYCLE_SUMMARY:\s+current_high=(\d+)`. If no match, abort with: `Review agent did not honor the CYCLE_SUMMARY contract — cannot determine HIGH count. Retry or switch reviewer.` +- `HIGH_LINES`: the bullets under the `## Current HIGH Concerns` section of the return message (up to the next `##` heading or end of message). If the section contains only `None.`, set `HIGH_LINES=""`. + +Rationale: `gsd-plan-phase` uses the same data-flow pattern — it parses `gsd-plan-checker`'s structured return rather than re-reading PLAN.md. This keeps the count source-of-truth aligned with what the reviewer actually judged (current cycle only), avoiding false stalls from historical accumulation. **If HIGH_COUNT == 0 (converged):** @@ -246,7 +271,7 @@ After agent returns → go back to **step 5a** (review again). - [ ] Initial planning via Agent → Skill("gsd-plan-phase") if no plans exist - [ ] Review via Agent → Skill("gsd-review") — isolated, not inline - [ ] Replan via Agent → Skill("gsd-plan-phase --reviews") — isolated, not inline -- [ ] Orchestrator only does: init, loop control, grep HIGHs, stall detection, escalation +- [ ] Orchestrator only does: init, loop control, parse CYCLE_SUMMARY for HIGH count, stall detection, escalation - [ ] Each Agent fully completes its Skill before returning - [ ] Loop exits on: no HIGH concerns (converged) OR max cycles (escalation) - [ ] Stall detection reported when HIGH count not decreasing diff --git a/tests/plan-review-convergence.test.cjs b/tests/plan-review-convergence.test.cjs index b022d3df09..0ed3b526a3 100644 --- a/tests/plan-review-convergence.test.cjs +++ b/tests/plan-review-convergence.test.cjs @@ -3,8 +3,9 @@ * * Validates that the command source and workflow contain the key structural * elements required for correct cross-AI plan convergence loop behavior: - * initial planning gate, review agent spawning, HIGH count detection, - * stall detection, escalation gate, and STATE.md update on convergence. + * initial planning gate, review agent spawning, HIGH count extraction via + * CYCLE_SUMMARY contract, stall detection (behavioral), escalation gate + * (including --max-cycles 1 edge case), and STATE.md update on convergence. */ const { test, describe } = require('node:test'); @@ -107,6 +108,18 @@ describe('plan-review-convergence workflow: initialization (#2306)', () => { 'workflow must display a startup banner' ); }); + + test('workflow parses ARGUMENTS before calling gsd-tools init (PHASE must be known)', () => { + const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8'); + const parseIdx = workflow.search(/Parse (and Normalize )?Arguments/i); + const initIdx = workflow.search(/node\s+"?\$HOME[^"]*gsd-tools\.cjs"?\s+init\s+plan-phase/); + assert.ok(parseIdx > -1, 'workflow must have an ARGUMENTS parsing section'); + assert.ok(initIdx > -1, 'workflow must call gsd-tools.cjs init plan-phase'); + assert.ok( + parseIdx < initIdx, + 'Arguments parsing must come BEFORE init plan-phase so $PHASE is populated' + ); + }); }); // ─── Workflow: initial planning gate ────────────────────────────────────── @@ -148,10 +161,49 @@ describe('plan-review-convergence workflow: convergence loop (#2306)', () => { ); }); - test('workflow detects HIGH concerns by grepping REVIEWS.md', () => { + test('review agent spawn forwards --ws via {GSD_WS} (symmetric with replan agent)', () => { + // Extract the review agent spawn block (between "Cross-AI review" description and its closing mode="auto") + const reviewMatch = workflow.match(/description="Cross-AI review[\s\S]*?mode="auto"/); + assert.ok(reviewMatch, 'review agent spawn block must exist'); + assert.ok( + reviewMatch[0].includes('{GSD_WS}'), + 'review agent spawn must include {GSD_WS} in the skill args — asymmetry with replan agent is a correctness bug when user passes --ws' + ); + }); + + test('replan agent spawn forwards --ws via {GSD_WS}', () => { + const replanMatch = workflow.match(/description="Replan[\s\S]*?mode="auto"/); + assert.ok(replanMatch, 'replan agent spawn block must exist'); + assert.ok( + replanMatch[0].includes('{GSD_WS}'), + 'replan agent must include {GSD_WS} in the skill args' + ); + }); + + test('workflow extracts HIGH_COUNT via CYCLE_SUMMARY contract (not raw grep of REVIEWS.md)', () => { + assert.ok( + workflow.includes('CYCLE_SUMMARY'), + 'workflow must use CYCLE_SUMMARY contract — raw grep of REVIEWS.md double-counts historical references across cycles' + ); + assert.ok( + workflow.includes('current_high='), + 'CYCLE_SUMMARY contract must use current_high= format' + ); + assert.ok( + workflow.match(/CYCLE_SUMMARY:\s*\\s\+\s*current_high=\(\\d\+\)/) || + workflow.match(/CYCLE_SUMMARY:\\s\+current_high=\(\\d\+\)/), + 'workflow must extract HIGH_COUNT via regex matching CYCLE_SUMMARY: current_high=' + ); + }); + + test('workflow instructs reviewer to exclude resolved/historical HIGH from count', () => { assert.ok( - workflow.includes('HIGH_COUNT') || workflow.includes('grep'), - 'workflow must grep REVIEWS.md for HIGH concerns to determine convergence' + workflow.includes('EXCLUDE') && workflow.includes('RESOLVED'), + 'CYCLE_SUMMARY contract must exclude resolved HIGHs from the count' + ); + assert.ok( + workflow.match(/retrospective|summary tables/i), + 'contract must exclude HIGH mentions in retrospective/summary tables (cycle-comparison artifacts)' ); }); @@ -186,9 +238,9 @@ describe('plan-review-convergence workflow: convergence loop (#2306)', () => { }); }); -// ─── Workflow: stall detection ───────────────────────────────────────────── +// ─── Workflow: stall detection (behavioral) ─────────────────────────────── -describe('plan-review-convergence workflow: stall detection (#2306)', () => { +describe('plan-review-convergence workflow: stall detection — behavioral (#2306)', () => { const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8'); test('workflow tracks previous HIGH count to detect stalls', () => { @@ -198,15 +250,50 @@ describe('plan-review-convergence workflow: stall detection (#2306)', () => { ); }); - test('workflow warns when HIGH count is not decreasing', () => { + test('stall condition uses >= (covers both "stable" AND "increasing" cases)', () => { + // The exact spec: "If HIGH_COUNT >= prev_high_count" + // A bug would be using > alone (misses the stable-count case) or <= (inverted logic) + assert.ok( + workflow.match(/HIGH_COUNT\s*>=\s*prev_high_count/), + 'stall condition must be HIGH_COUNT >= prev_high_count — using > alone would miss the "count stable across cycles" case (real stall)' + ); + assert.ok( + !workflow.match(/HIGH_COUNT\s*<=\s*prev_high_count/), + 'stall condition must NOT be <= (that would be inverted logic — decreasing counts would falsely trigger stall)' + ); + }); + + // Behavioral simulation: verify the condition `HIGH_COUNT >= prev_high_count` + // produces correct stall/no-stall decisions across the three cycle transitions. + const stallCondition = (current, previous) => current >= previous; + + test('BEHAVIORAL: decreasing HIGH count (progress) does NOT trigger stall', () => { + // 5 → 3: real progress, cycle should continue to replan + assert.strictEqual(stallCondition(3, 5), false, 'decreasing 5→3 must NOT be stall'); + assert.strictEqual(stallCondition(1, 10), false, 'decreasing 10→1 must NOT be stall'); + }); + + test('BEHAVIORAL: stable HIGH count (no progress) DOES trigger stall', () => { + // 5 → 5: the original stall case from revision-loop.md + assert.strictEqual(stallCondition(5, 5), true, 'stable 5→5 MUST be stall (revision not converging)'); + assert.strictEqual(stallCondition(0, 0), true, 'stable 0→0 is stall but HIGH_COUNT==0 exits loop first — trap prevents false alarm'); + }); + + test('BEHAVIORAL: increasing HIGH count (regression) DOES trigger stall', () => { + // Regression case: replan made things worse + assert.strictEqual(stallCondition(7, 5), true, 'increasing 5→7 MUST be stall (regression)'); + assert.strictEqual(stallCondition(13, 3), true, 'increasing 3→13 MUST be stall'); + }); + + test('workflow emits stall warning text when condition triggers', () => { assert.ok( - workflow.includes('stall') || workflow.includes('Stall') || workflow.includes('not decreasing'), - 'workflow must warn user when HIGH count is not decreasing between cycles' + workflow.match(/stalled|Stall|not decreasing/), + 'workflow must emit a stall warning message when condition triggers (for user visibility)' ); }); }); -// ─── Workflow: escalation gate ──────────────────────────────────────────── +// ─── Workflow: escalation gate (including --max-cycles 1 edge) ──────────── describe('plan-review-convergence workflow: escalation gate (#2306)', () => { const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8'); @@ -219,6 +306,28 @@ describe('plan-review-convergence workflow: escalation gate (#2306)', () => { ); }); + test('max-cycles condition uses >= (so --max-cycles 1 triggers after cycle 1)', () => { + assert.ok( + workflow.match(/cycle\s*>=\s*MAX_CYCLES/), + 'max-cycles check must use cycle >= MAX_CYCLES — using > alone would require cycle=MAX_CYCLES+1 before escalation' + ); + }); + + // Behavioral simulation: --max-cycles 1 corner case + const shouldEscalate = (cycle, maxCycles, highCount) => highCount > 0 && cycle >= maxCycles; + + test('BEHAVIORAL: --max-cycles 1 with HIGHs in cycle 1 immediately escalates (no replan)', () => { + // User sets --max-cycles 1 as "review-only / dry-run" mode + assert.strictEqual(shouldEscalate(1, 1, 3), true, 'cycle=1, max=1, HIGHs=3 MUST escalate immediately'); + assert.strictEqual(shouldEscalate(1, 1, 0), false, 'cycle=1, max=1, HIGHs=0 converges instead (HIGH_COUNT==0 branch exits earlier)'); + }); + + test('BEHAVIORAL: --max-cycles 3 (default) only escalates after 3 cycles', () => { + assert.strictEqual(shouldEscalate(1, 3, 5), false, 'cycle=1 of 3: must NOT escalate yet — replan next'); + assert.strictEqual(shouldEscalate(2, 3, 5), false, 'cycle=2 of 3: must NOT escalate yet — replan next'); + assert.strictEqual(shouldEscalate(3, 3, 5), true, 'cycle=3 of 3: MUST escalate (max reached)'); + }); + test('escalation offers "Proceed anyway" option', () => { assert.ok( workflow.includes('Proceed anyway'), @@ -241,59 +350,6 @@ describe('plan-review-convergence workflow: escalation gate (#2306)', () => { }); }); -// ─── Workflow: stall detection — behavioral ─────────────────────────────── - -describe('plan-review-convergence workflow: stall detection behavioral (#2306)', () => { - const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8'); - - test('workflow surfaces stall warning when prev_high_count equals current HIGH_COUNT', () => { - // Behavioral test: two consecutive cycles with the same HIGH count must trigger - // the stall warning. The workflow must compare HIGH_COUNT >= prev_high_count and - // emit a warning string that would appear in output. - assert.ok( - workflow.includes('prev_high_count') || workflow.includes('prev_HIGH'), - 'workflow must track prev_high_count across cycles' - ); - // The comparison that detects the stall - assert.ok( - workflow.includes('HIGH_COUNT >= prev_high_count') || - workflow.includes('HIGH_COUNT >= prev_HIGH') || - workflow.includes('not decreasing'), - 'workflow must compare current HIGH count against previous to detect stall' - ); - // The stall warning text that appears in output - assert.ok( - workflow.includes('stall') || workflow.includes('Stall') || workflow.includes('not decreasing'), - 'workflow must emit a stall warning when HIGH count is not decreasing' - ); - }); -}); - -// ─── Workflow: --max-cycles 1 immediate escalation — behavioral ──────────── - -describe('plan-review-convergence workflow: --max-cycles 1 immediate escalation behavioral (#2306)', () => { - const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8'); - - test('workflow escalates immediately after cycle 1 when --max-cycles 1 and HIGH > 0', () => { - // Behavioral test: when max_cycles=1, after the first review cycle, if HIGH_COUNT > 0 - // the workflow must trigger the escalation gate (cycle >= MAX_CYCLES check fires on - // cycle 1 itself). Verify the workflow contains the logic for this edge case. - assert.ok( - workflow.includes('cycle >= MAX_CYCLES') || - workflow.includes('cycle >= max_cycles') || - (workflow.includes('MAX_CYCLES') && workflow.includes('AskUserQuestion')), - 'workflow must check cycle >= MAX_CYCLES so --max-cycles 1 triggers escalation after first cycle' - ); - // Escalation gate must fire when HIGH > 0 (not just at exactly max_cycles) - assert.ok( - workflow.includes('HIGH_COUNT > 0') || - workflow.includes('HIGH concerns remain') || - workflow.includes('Proceed anyway'), - 'escalation gate must be reachable when HIGH_COUNT > 0 after a single cycle' - ); - }); -}); - // ─── Workflow: REVIEWS.md verification ──────────────────────────────────── describe('plan-review-convergence workflow: artifact verification (#2306)', () => { @@ -312,4 +368,11 @@ describe('plan-review-convergence workflow: artifact verification (#2306)', () = 'workflow must error if the review agent fails to produce REVIEWS.md' ); }); + + test('workflow aborts if review agent omits CYCLE_SUMMARY contract', () => { + assert.ok( + workflow.match(/did not honor the CYCLE_SUMMARY contract/i), + 'workflow must fail-loud (not silently default) if review agent omits the CYCLE_SUMMARY summary line' + ); + }); });