diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 815b1702d4..9e2586da3a 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -2930,6 +2930,75 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { ).toBe(1); }); + it('completes on final iteration with XML-wrapped signal (SIGNAL)', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount < 3) { + yield { type: 'assistant', content: `Iteration ${String(callCount)} progress` }; + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + } else { + // Final iteration uses tag instead of + yield { type: 'assistant', content: 'All clean! ALL_CLEAN' }; + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-xml-tag', + nodes: [ + { + id: 'fix-and-review', + loop: { + prompt: 'Fix and review. When done, output ALL_CLEAN.', + until: 'ALL_CLEAN', + max_iterations: 3, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // 3 iterations run, signal found on iteration 3 → completed, NOT failed + expect(mockSendQueryDag.mock.calls.length).toBe(3); + expect( + ( + mockDeps.store.completeWorkflowRun as Mock< + (id: string, metadata?: Record) => Promise + > + ).mock.calls.length + ).toBe(1); + expect( + (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise>).mock + .calls.length + ).toBe(0); + // Verify stripping: raw XML completion tags must not appear in user-visible output + const allSentMessages = ( + platform.sendMessage as Mock<(...args: unknown[]) => Promise> + ).mock.calls + .map((call: unknown[]) => call[1] as string) + .join(''); + expect(allSentMessages).not.toContain(''); + expect(allSentMessages).not.toContain(''); + }); + it('loop node output available to downstream nodes via $nodeId.output', async () => { let loopCallCount = 0; mockSendQueryDag.mockImplementation(function* (prompt: string) { diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index b2488a70f2..538bdb9949 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -1594,7 +1594,7 @@ async function executeLoopNode( })) { if (msg.type === 'assistant') { fullOutput += msg.content; - const cleaned = stripCompletionTags(msg.content); + const cleaned = stripCompletionTags(msg.content, loop.until); cleanOutput += cleaned; if (platform.getStreamingMode() === 'stream' && cleaned) { await safeSendMessage(platform, conversationId, cleaned, msgContext); diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts index 84346f131e..b3a3fdd899 100644 --- a/packages/workflows/src/executor-shared.test.ts +++ b/packages/workflows/src/executor-shared.test.ts @@ -22,6 +22,8 @@ import { substituteWorkflowVariables, buildPromptWithContext, detectCreditExhaustion, + detectCompletionSignal, + stripCompletionTags, isInlineScript, } from './executor-shared'; @@ -330,3 +332,65 @@ describe('isInlineScript', () => { expect(isInlineScript('')).toBe(false); }); }); + +describe('detectCompletionSignal', () => { + it('detects SIGNAL format', () => { + expect(detectCompletionSignal('COMPLETE', 'COMPLETE')).toBe(true); + }); + + it('detects signal in custom XML tags: SIGNAL', () => { + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(true); + }); + + it('detects signal in other XML tag names', () => { + expect(detectCompletionSignal('COMPLETE', 'COMPLETE')).toBe(true); + expect(detectCompletionSignal('DONE', 'DONE')).toBe(true); + }); + + it('detects plain signal at end of output', () => { + expect(detectCompletionSignal('Work done. COMPLETE', 'COMPLETE')).toBe(true); + }); + + it('detects plain signal on its own line', () => { + expect(detectCompletionSignal('Work done.\nCOMPLETE\nExtra text', 'COMPLETE')).toBe(true); + }); + + it('does not detect signal embedded in prose', () => { + expect(detectCompletionSignal('The status is not COMPLETE yet.', 'COMPLETE')).toBe(false); + }); + + it('does not detect signal when wrong value is in tags', () => { + expect(detectCompletionSignal('WRONG', 'ALL_CLEAN')).toBe(false); + }); + + it('does NOT detect signal when XML tag names do not match (strict)', () => { + // Open/close tag names must agree — guards against AI prose that + // interleaves tags (e.g. "ALL_CLEAN") being + // treated as a completion. + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(false); + }); + + it('detects signal when tag names match case-insensitively', () => { + expect(detectCompletionSignal('ALL_CLEAN', 'ALL_CLEAN')).toBe(true); + }); +}); + +describe('stripCompletionTags', () => { + it('strips tags', () => { + expect(stripCompletionTags('Done. COMPLETE')).toBe('Done.'); + }); + + it('strips XML-wrapped signal when until is provided', () => { + expect(stripCompletionTags('Done. ALL_CLEAN', 'ALL_CLEAN')).toBe('Done.'); + }); + + it('does not strip XML tags when until is not provided', () => { + const input = 'Done. ALL_CLEAN'; + expect(stripCompletionTags(input)).toBe(input.trim()); + }); + + it('strips both and XML-tagged signal when until is provided', () => { + const input = 'Done. ALL_CLEAN ALL_CLEAN'; + expect(stripCompletionTags(input, 'ALL_CLEAN')).toBe('Done.'); + }); +}); diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index e1978ae106..b5e8a7e05a 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -370,18 +370,26 @@ function escapeRegExp(str: string): string { /** * Detect whether the AI output contains a completion signal. * - * Supports two formats: + * Supports three formats, checked in order: * 1. SIGNAL - Recommended; prevents false positives in prose - * 2. Plain SIGNAL - Backwards compatibility; only at end of output or on own line + * 2. SIGNAL - Any XML-wrapped tag; case-insensitive on tag names + * 3. Plain SIGNAL - Backwards compatibility; only at end of output or on own line * - * The tag format uses case-insensitive matching for the tags. - * Plain signal detection is restrictive to prevent false positives. + * Tag matching uses a backreference (\1) so opening and closing tag names must + * agree — `X` is not treated as a completion, which avoids + * false positives when the AI interleaves tags in prose. + * + * Plain signal detection is restrictive to prevent false positives like "not SIGNAL yet". */ export function detectCompletionSignal(output: string, signal: string): boolean { - // Check for SIGNAL format (recommended - prevents false positives) - // Case-insensitive for tags - const promisePattern = new RegExp(`\\s*${escapeRegExp(signal)}\\s*`, 'i'); - if (promisePattern.test(output)) { + // Check for XML-like tag wrapping with matching open/close names: SIGNAL. + // Catches COMPLETE, ALL_CLEAN, X. + // The `([a-zA-Z][\w-]*)` capture plus `` backreference requires tag names to match. + const xmlWrappedPattern = new RegExp( + `<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapeRegExp(signal)}\\s*`, + 'i' + ); + if (xmlWrappedPattern.test(output)) { return true; } // Plain signal detection - restrictive to prevent false positives like "not COMPLETE yet" @@ -393,9 +401,24 @@ export function detectCompletionSignal(output: string, signal: string): boolean return endPattern.test(output) || ownLinePattern.test(output); } -/** Strip internal completion signal tags before sending to user-facing output. */ -export function stripCompletionTags(content: string): string { - return content.replace(/[\s\S]*?<\/promise>/gi, '').trim(); +/** + * Strip internal completion signal tags before sending to user-facing output. + * Always strips `` (any content). When `until` is provided, + * also strips any XML-wrapped form of that signal with matching tag names + * (e.g. `ALL_CLEAN`). Mismatched tag names are left alone + * so regular prose (`ALL_CLEAN`) isn't accidentally rewritten. + */ +export function stripCompletionTags(content: string, until?: string): string { + let result = content.replace(/[\s\S]*?<\/promise>/gi, ''); + if (until) { + // Strip XML-tagged completion signals with matching open/close tag names. + const escapedSignal = escapeRegExp(until); + result = result.replace( + new RegExp(`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapedSignal}\\s*`, 'gi'), + '' + ); + } + return result.trim(); } /**