gsd-build · caius-kong · Apr 17, 2026 · Apr 17, 2026 · Apr 20, 2026 · Apr 24, 2026
@@ -120,7 +120,29 @@ Agent(
 
 Execute: Skill(skill='gsd-review', args='--phase {PHASE} {REVIEWER_FLAGS} {GSD_WS}')
 
-Complete the full review workflow. Do NOT return until REVIEWS.md is committed.",
+Complete the full review workflow. Do NOT return until REVIEWS.md is committed.
+
+--- RETURN CONTRACT (REQUIRED) ---
+
+Your final return message MUST end with these two sections, exactly as specified.
+This is the contract the orchestrator relies on — do NOT skip or reformat.
+
+Section 1 — Machine-readable summary line (exact format, single line):
+
+CYCLE_SUMMARY: current_high=<N> current_medium=<M>
+
+Where <N> counts HIGH-severity concerns that REMAIN UNRESOLVED in this cycle's findings:
+  - INCLUDE: newly raised HIGHs; PARTIALLY RESOLVED HIGHs; previously raised and still unresolved HIGHs
+  - EXCLUDE: explicitly RESOLVED/FULLY RESOLVED HIGHs; HIGH mentions in retrospective/summary tables comparing cycles; quoted excerpts from prior reviews
+<M> follows the same definition for MEDIUM severity.
+
+Section 2 — Current HIGH Concerns (for display on escalation):
+
+## Current HIGH Concerns
+- <one bullet per currently unresolved HIGH, one sentence each>
+(If <N> is 0, write exactly: None.)
+
+--- END RETURN CONTRACT ---",
   mode="auto"
 )
 ```
@@ -132,13 +154,16 @@ REVIEWS_FILE=$(ls ${phase_dir}/${padded_phase}-REVIEWS.md 2>/dev/null)
 
 If REVIEWS_FILE is empty: Error — review agent did not produce REVIEWS.md. Exit.
 
-### 5b. Check for HIGH Concerns
+### 5b. Extract HIGH Count from Agent Return
 
-```bash
-HIGH_COUNT=$(grep -c '\*\*HIGH' "${REVIEWS_FILE}" 2>/dev/null || true)
-HIGH_COUNT=${HIGH_COUNT:-0}
-HIGH_LINES=$(grep -B0 -A1 '\*\*HIGH' "${REVIEWS_FILE}" 2>/dev/null)
-```
+**Do NOT grep REVIEWS.md.** REVIEWS.md accumulates historical content across cycles; raw text counts are unreliable (a cycle 2 review that re-lists resolved HIGHs for audit would inflate the count and falsely trigger stall detection). Instead, extract the count from the review agent's RETURN MESSAGE using the CYCLE_SUMMARY contract.
+
+Parse the agent's final return message:
+
+- `HIGH_COUNT`: the integer matched by regex `CYCLE_SUMMARY:\s+current_high=(\d+)`. If no match, abort with: `Review agent did not honor the CYCLE_SUMMARY contract — cannot determine HIGH count. Retry or switch reviewer.`
+- `HIGH_LINES`: the bullets under the `## Current HIGH Concerns` section of the return message (up to the next `##` heading or end of message). If the section contains only `None.`, set `HIGH_LINES=""`.
+
+Rationale: `gsd-plan-phase` uses the same data-flow pattern — it parses `gsd-plan-checker`'s structured return rather than re-reading PLAN.md. This keeps the count source-of-truth aligned with what the reviewer actually judged (current cycle only), avoiding false stalls from historical accumulation.
 
 **If HIGH_COUNT == 0 (converged):**
 
@@ -246,7 +271,7 @@ After agent returns → go back to **step 5a** (review again).
 - [ ] Initial planning via Agent → Skill("gsd-plan-phase") if no plans exist
 - [ ] Review via Agent → Skill("gsd-review") — isolated, not inline
 - [ ] Replan via Agent → Skill("gsd-plan-phase --reviews") — isolated, not inline
-- [ ] Orchestrator only does: init, loop control, grep HIGHs, stall detection, escalation
+- [ ] Orchestrator only does: init, loop control, parse CYCLE_SUMMARY for HIGH count, stall detection, escalation
 - [ ] Each Agent fully completes its Skill before returning
 - [ ] Loop exits on: no HIGH concerns (converged) OR max cycles (escalation)
 - [ ] Stall detection reported when HIGH count not decreasing

@@ -3,8 +3,9 @@
  *
  * Validates that the command source and workflow contain the key structural
  * elements required for correct cross-AI plan convergence loop behavior:
- * initial planning gate, review agent spawning, HIGH count detection,
- * stall detection, escalation gate, and STATE.md update on convergence.
+ * initial planning gate, review agent spawning, HIGH count extraction via
+ * CYCLE_SUMMARY contract, stall detection (behavioral), escalation gate
+ * (including --max-cycles 1 edge case), and STATE.md update on convergence.
  */
 
 const { test, describe } = require('node:test');
@@ -107,6 +108,18 @@ describe('plan-review-convergence workflow: initialization (#2306)', () => {
       'workflow must display a startup banner'
     );
   });
+
+  test('workflow parses ARGUMENTS before calling gsd-tools init (PHASE must be known)', () => {
+    const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8');
+    const parseIdx = workflow.search(/Parse (and Normalize )?Arguments/i);
+    const initIdx = workflow.search(/node\s+"?\$HOME[^"]*gsd-tools\.cjs"?\s+init\s+plan-phase/);
+    assert.ok(parseIdx > -1, 'workflow must have an ARGUMENTS parsing section');
+    assert.ok(initIdx > -1, 'workflow must call gsd-tools.cjs init plan-phase');
+    assert.ok(
+      parseIdx < initIdx,
+      'Arguments parsing must come BEFORE init plan-phase so $PHASE is populated'
+    );
+  });
 });
 
 // ─── Workflow: initial planning gate ──────────────────────────────────────
@@ -148,10 +161,49 @@ describe('plan-review-convergence workflow: convergence loop (#2306)', () => {
     );
   });
 
-  test('workflow detects HIGH concerns by grepping REVIEWS.md', () => {
+  test('review agent spawn forwards --ws via {GSD_WS} (symmetric with replan agent)', () => {
+    // Extract the review agent spawn block (between "Cross-AI review" description and its closing mode="auto")
+    const reviewMatch = workflow.match(/description="Cross-AI review[\s\S]*?mode="auto"/);
+    assert.ok(reviewMatch, 'review agent spawn block must exist');
+    assert.ok(
+      reviewMatch[0].includes('{GSD_WS}'),
+      'review agent spawn must include {GSD_WS} in the skill args — asymmetry with replan agent is a correctness bug when user passes --ws'
+    );
+  });
+
+  test('replan agent spawn forwards --ws via {GSD_WS}', () => {
+    const replanMatch = workflow.match(/description="Replan[\s\S]*?mode="auto"/);
+    assert.ok(replanMatch, 'replan agent spawn block must exist');
+    assert.ok(
+      replanMatch[0].includes('{GSD_WS}'),
+      'replan agent must include {GSD_WS} in the skill args'
+    );
+  });
+
+  test('workflow extracts HIGH_COUNT via CYCLE_SUMMARY contract (not raw grep of REVIEWS.md)', () => {
+    assert.ok(
+      workflow.includes('CYCLE_SUMMARY'),
+      'workflow must use CYCLE_SUMMARY contract — raw grep of REVIEWS.md double-counts historical references across cycles'
+    );
+    assert.ok(
+      workflow.includes('current_high='),
+      'CYCLE_SUMMARY contract must use current_high=<N> format'
+    );
+    assert.ok(
+      workflow.match(/CYCLE_SUMMARY:\s*\\s\+\s*current_high=\(\\d\+\)/) ||
+      workflow.match(/CYCLE_SUMMARY:\\s\+current_high=\(\\d\+\)/),
+      'workflow must extract HIGH_COUNT via regex matching CYCLE_SUMMARY: current_high=<N>'
+    );
+  });
+
+  test('workflow instructs reviewer to exclude resolved/historical HIGH from count', () => {
     assert.ok(
-      workflow.includes('HIGH_COUNT') || workflow.includes('grep'),
-      'workflow must grep REVIEWS.md for HIGH concerns to determine convergence'
+      workflow.includes('EXCLUDE') && workflow.includes('RESOLVED'),
+      'CYCLE_SUMMARY contract must exclude resolved HIGHs from the count'
+    );
+    assert.ok(
+      workflow.match(/retrospective|summary tables/i),
+      'contract must exclude HIGH mentions in retrospective/summary tables (cycle-comparison artifacts)'
     );
   });
 
@@ -186,9 +238,9 @@ describe('plan-review-convergence workflow: convergence loop (#2306)', () => {
   });
 });
 
-// ─── Workflow: stall detection ─────────────────────────────────────────────
+// ─── Workflow: stall detection (behavioral) ───────────────────────────────
 
-describe('plan-review-convergence workflow: stall detection (#2306)', () => {
+describe('plan-review-convergence workflow: stall detection — behavioral (#2306)', () => {
   const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8');
 
   test('workflow tracks previous HIGH count to detect stalls', () => {
@@ -198,15 +250,50 @@ describe('plan-review-convergence workflow: stall detection (#2306)', () => {
     );
   });
 
-  test('workflow warns when HIGH count is not decreasing', () => {
+  test('stall condition uses >= (covers both "stable" AND "increasing" cases)', () => {
+    // The exact spec: "If HIGH_COUNT >= prev_high_count"
+    // A bug would be using > alone (misses the stable-count case) or <= (inverted logic)
+    assert.ok(
+      workflow.match(/HIGH_COUNT\s*>=\s*prev_high_count/),
+      'stall condition must be HIGH_COUNT >= prev_high_count — using > alone would miss the "count stable across cycles" case (real stall)'
+    );
+    assert.ok(
+      !workflow.match(/HIGH_COUNT\s*<=\s*prev_high_count/),
+      'stall condition must NOT be <= (that would be inverted logic — decreasing counts would falsely trigger stall)'
+    );
+  });
+
+  // Behavioral simulation: verify the condition `HIGH_COUNT >= prev_high_count`
+  // produces correct stall/no-stall decisions across the three cycle transitions.
+  const stallCondition = (current, previous) => current >= previous;
+
+  test('BEHAVIORAL: decreasing HIGH count (progress) does NOT trigger stall', () => {
+    // 5 → 3: real progress, cycle should continue to replan
+    assert.strictEqual(stallCondition(3, 5), false, 'decreasing 5→3 must NOT be stall');
+    assert.strictEqual(stallCondition(1, 10), false, 'decreasing 10→1 must NOT be stall');
+  });
+
+  test('BEHAVIORAL: stable HIGH count (no progress) DOES trigger stall', () => {
+    // 5 → 5: the original stall case from revision-loop.md
+    assert.strictEqual(stallCondition(5, 5), true, 'stable 5→5 MUST be stall (revision not converging)');
+    assert.strictEqual(stallCondition(0, 0), true, 'stable 0→0 is stall but HIGH_COUNT==0 exits loop first — trap prevents false alarm');
+  });
+
+  test('BEHAVIORAL: increasing HIGH count (regression) DOES trigger stall', () => {
+    // Regression case: replan made things worse
+    assert.strictEqual(stallCondition(7, 5), true, 'increasing 5→7 MUST be stall (regression)');
+    assert.strictEqual(stallCondition(13, 3), true, 'increasing 3→13 MUST be stall');
+  });
+
+  test('workflow emits stall warning text when condition triggers', () => {
     assert.ok(
-      workflow.includes('stall') || workflow.includes('Stall') || workflow.includes('not decreasing'),
-      'workflow must warn user when HIGH count is not decreasing between cycles'
+      workflow.match(/stalled|Stall|not decreasing/),
+      'workflow must emit a stall warning message when condition triggers (for user visibility)'
     );
   });
 });
 
-// ─── Workflow: escalation gate ────────────────────────────────────────────
+// ─── Workflow: escalation gate (including --max-cycles 1 edge) ────────────
 
 describe('plan-review-convergence workflow: escalation gate (#2306)', () => {
   const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8');
@@ -219,6 +306,28 @@ describe('plan-review-convergence workflow: escalation gate (#2306)', () => {
     );
   });
 
+  test('max-cycles condition uses >= (so --max-cycles 1 triggers after cycle 1)', () => {
+    assert.ok(
+      workflow.match(/cycle\s*>=\s*MAX_CYCLES/),
+      'max-cycles check must use cycle >= MAX_CYCLES — using > alone would require cycle=MAX_CYCLES+1 before escalation'
+    );
+  });
+
+  // Behavioral simulation: --max-cycles 1 corner case
+  const shouldEscalate = (cycle, maxCycles, highCount) => highCount > 0 && cycle >= maxCycles;
+
+  test('BEHAVIORAL: --max-cycles 1 with HIGHs in cycle 1 immediately escalates (no replan)', () => {
+    // User sets --max-cycles 1 as "review-only / dry-run" mode
+    assert.strictEqual(shouldEscalate(1, 1, 3), true, 'cycle=1, max=1, HIGHs=3 MUST escalate immediately');
+    assert.strictEqual(shouldEscalate(1, 1, 0), false, 'cycle=1, max=1, HIGHs=0 converges instead (HIGH_COUNT==0 branch exits earlier)');
+  });
+
+  test('BEHAVIORAL: --max-cycles 3 (default) only escalates after 3 cycles', () => {
+    assert.strictEqual(shouldEscalate(1, 3, 5), false, 'cycle=1 of 3: must NOT escalate yet — replan next');
+    assert.strictEqual(shouldEscalate(2, 3, 5), false, 'cycle=2 of 3: must NOT escalate yet — replan next');
+    assert.strictEqual(shouldEscalate(3, 3, 5), true, 'cycle=3 of 3: MUST escalate (max reached)');
+  });
+
   test('escalation offers "Proceed anyway" option', () => {
     assert.ok(
       workflow.includes('Proceed anyway'),
@@ -241,59 +350,6 @@ describe('plan-review-convergence workflow: escalation gate (#2306)', () => {
   });
 });
 
-// ─── Workflow: stall detection — behavioral ───────────────────────────────
-
-describe('plan-review-convergence workflow: stall detection behavioral (#2306)', () => {
-  const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8');
-
-  test('workflow surfaces stall warning when prev_high_count equals current HIGH_COUNT', () => {
-    // Behavioral test: two consecutive cycles with the same HIGH count must trigger
-    // the stall warning. The workflow must compare HIGH_COUNT >= prev_high_count and
-    // emit a warning string that would appear in output.
-    assert.ok(
-      workflow.includes('prev_high_count') || workflow.includes('prev_HIGH'),
-      'workflow must track prev_high_count across cycles'
-    );
-    // The comparison that detects the stall
-    assert.ok(
-      workflow.includes('HIGH_COUNT >= prev_high_count') ||
-      workflow.includes('HIGH_COUNT >= prev_HIGH') ||
-      workflow.includes('not decreasing'),
-      'workflow must compare current HIGH count against previous to detect stall'
-    );
-    // The stall warning text that appears in output
-    assert.ok(
-      workflow.includes('stall') || workflow.includes('Stall') || workflow.includes('not decreasing'),
-      'workflow must emit a stall warning when HIGH count is not decreasing'
-    );
-  });
-});
-
-// ─── Workflow: --max-cycles 1 immediate escalation — behavioral ────────────
-
-describe('plan-review-convergence workflow: --max-cycles 1 immediate escalation behavioral (#2306)', () => {
-  const workflow = fs.readFileSync(WORKFLOW_PATH, 'utf8');
-
-  test('workflow escalates immediately after cycle 1 when --max-cycles 1 and HIGH > 0', () => {
-    // Behavioral test: when max_cycles=1, after the first review cycle, if HIGH_COUNT > 0
-    // the workflow must trigger the escalation gate (cycle >= MAX_CYCLES check fires on
-    // cycle 1 itself). Verify the workflow contains the logic for this edge case.
-    assert.ok(
-      workflow.includes('cycle >= MAX_CYCLES') ||
-      workflow.includes('cycle >= max_cycles') ||
-      (workflow.includes('MAX_CYCLES') && workflow.includes('AskUserQuestion')),
-      'workflow must check cycle >= MAX_CYCLES so --max-cycles 1 triggers escalation after first cycle'
-    );
-    // Escalation gate must fire when HIGH > 0 (not just at exactly max_cycles)
-    assert.ok(
-      workflow.includes('HIGH_COUNT > 0') ||
-      workflow.includes('HIGH concerns remain') ||
-      workflow.includes('Proceed anyway'),
-      'escalation gate must be reachable when HIGH_COUNT > 0 after a single cycle'
-    );
-  });
-});
-
 // ─── Workflow: REVIEWS.md verification ────────────────────────────────────
 
 describe('plan-review-convergence workflow: artifact verification (#2306)', () => {
@@ -312,4 +368,11 @@ describe('plan-review-convergence workflow: artifact verification (#2306)', () =
       'workflow must error if the review agent fails to produce REVIEWS.md'
     );
   });
+
+  test('workflow aborts if review agent omits CYCLE_SUMMARY contract', () => {
+    assert.ok(
+      workflow.match(/did not honor the CYCLE_SUMMARY contract/i),
+      'workflow must fail-loud (not silently default) if review agent omits the CYCLE_SUMMARY summary line'
+    );
+  });
 });