Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions packages/workflows/src/dag-executor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2930,6 +2930,75 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => {
).toBe(1);
});

it('completes on final iteration with XML-wrapped signal (<COMPLETE>SIGNAL</COMPLETE>)', async () => {
let callCount = 0;
mockSendQueryDag.mockImplementation(function* () {
callCount++;
if (callCount < 3) {
yield { type: 'assistant', content: `Iteration ${String(callCount)} progress` };
yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
} else {
// Final iteration uses <COMPLETE> tag instead of <promise>
yield { type: 'assistant', content: 'All clean! <COMPLETE>ALL_CLEAN</COMPLETE>' };
yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
}
});

const mockDeps = createMockDeps();
const platform = createMockPlatform();
const workflowRun = makeWorkflowRun();

await executeDagWorkflow(
mockDeps,
platform,
'conv-dag',
testDir,
{
name: 'dag-loop-xml-tag',
nodes: [
{
id: 'fix-and-review',
loop: {
prompt: 'Fix and review. When done, output <COMPLETE>ALL_CLEAN</COMPLETE>.',
until: 'ALL_CLEAN',
max_iterations: 3,
},
},
],
},
workflowRun,
'claude',
undefined,
join(testDir, 'artifacts'),
join(testDir, 'logs'),
'main',
'docs/',
minimalConfig
);

// 3 iterations run, signal found on iteration 3 → completed, NOT failed
expect(mockSendQueryDag.mock.calls.length).toBe(3);
expect(
(
mockDeps.store.completeWorkflowRun as Mock<
(id: string, metadata?: Record<string, unknown>) => Promise<void>
>
).mock.calls.length
).toBe(1);
expect(
(mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise<void>>).mock
.calls.length
).toBe(0);
// Verify stripping: raw XML completion tags must not appear in user-visible output
const allSentMessages = (
platform.sendMessage as Mock<(...args: unknown[]) => Promise<void>>
).mock.calls
.map((call: unknown[]) => call[1] as string)
.join('');
expect(allSentMessages).not.toContain('<COMPLETE>');
expect(allSentMessages).not.toContain('</COMPLETE>');
});

it('loop node output available to downstream nodes via $nodeId.output', async () => {
let loopCallCount = 0;
mockSendQueryDag.mockImplementation(function* (prompt: string) {
Expand Down
2 changes: 1 addition & 1 deletion packages/workflows/src/dag-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1594,7 +1594,7 @@ async function executeLoopNode(
})) {
if (msg.type === 'assistant') {
fullOutput += msg.content;
const cleaned = stripCompletionTags(msg.content);
const cleaned = stripCompletionTags(msg.content, loop.until);
cleanOutput += cleaned;
if (platform.getStreamingMode() === 'stream' && cleaned) {
await safeSendMessage(platform, conversationId, cleaned, msgContext);
Expand Down
64 changes: 64 additions & 0 deletions packages/workflows/src/executor-shared.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import {
substituteWorkflowVariables,
buildPromptWithContext,
detectCreditExhaustion,
detectCompletionSignal,
stripCompletionTags,
isInlineScript,
} from './executor-shared';

Expand Down Expand Up @@ -330,3 +332,65 @@ describe('isInlineScript', () => {
expect(isInlineScript('')).toBe(false);
});
});

describe('detectCompletionSignal', () => {
it('detects <promise>SIGNAL</promise> format', () => {
expect(detectCompletionSignal('<promise>COMPLETE</promise>', 'COMPLETE')).toBe(true);
});

it('detects signal in custom XML tags: <COMPLETE>SIGNAL</COMPLETE>', () => {
expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe(true);
});

it('detects signal in other XML tag names', () => {
expect(detectCompletionSignal('<done>COMPLETE</done>', 'COMPLETE')).toBe(true);
expect(detectCompletionSignal('<status>DONE</status>', 'DONE')).toBe(true);
});

it('detects plain signal at end of output', () => {
expect(detectCompletionSignal('Work done. COMPLETE', 'COMPLETE')).toBe(true);
});

it('detects plain signal on its own line', () => {
expect(detectCompletionSignal('Work done.\nCOMPLETE\nExtra text', 'COMPLETE')).toBe(true);
});

it('does not detect signal embedded in prose', () => {
expect(detectCompletionSignal('The status is not COMPLETE yet.', 'COMPLETE')).toBe(false);
});

it('does not detect signal when wrong value is in tags', () => {
expect(detectCompletionSignal('<COMPLETE>WRONG</COMPLETE>', 'ALL_CLEAN')).toBe(false);
});

it('does NOT detect signal when XML tag names do not match (strict)', () => {
// Open/close tag names must agree — guards against AI prose that
// interleaves tags (e.g. "<COMPLETE>ALL_CLEAN</other-tag>") being
// treated as a completion.
expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</done>', 'ALL_CLEAN')).toBe(false);
});

it('detects signal when tag names match case-insensitively', () => {
expect(detectCompletionSignal('<Complete>ALL_CLEAN</complete>', 'ALL_CLEAN')).toBe(true);
});
});

describe('stripCompletionTags', () => {
it('strips <promise> tags', () => {
expect(stripCompletionTags('Done. <promise>COMPLETE</promise>')).toBe('Done.');
});

it('strips XML-wrapped signal when until is provided', () => {
expect(stripCompletionTags('Done. <COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe('Done.');
});

it('does not strip XML tags when until is not provided', () => {
const input = 'Done. <COMPLETE>ALL_CLEAN</COMPLETE>';
expect(stripCompletionTags(input)).toBe(input.trim());
});

it('strips both <promise> and XML-tagged signal when until is provided', () => {
const input = 'Done. <promise>ALL_CLEAN</promise> <COMPLETE>ALL_CLEAN</COMPLETE>';
expect(stripCompletionTags(input, 'ALL_CLEAN')).toBe('Done.');
});
});
45 changes: 34 additions & 11 deletions packages/workflows/src/executor-shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -370,18 +370,26 @@ function escapeRegExp(str: string): string {
/**
* Detect whether the AI output contains a completion signal.
*
* Supports two formats:
* Supports three formats, checked in order:
* 1. <promise>SIGNAL</promise> - Recommended; prevents false positives in prose
* 2. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
* 2. <anytag>SIGNAL</anytag> - Any XML-wrapped tag; case-insensitive on tag names
* 3. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
*
* The <promise> tag format uses case-insensitive matching for the tags.
* Plain signal detection is restrictive to prevent false positives.
* Tag matching uses a backreference (\1) so opening and closing tag names must
* agree — `<COMPLETE>X</done>` is not treated as a completion, which avoids
* false positives when the AI interleaves tags in prose.
*
* Plain signal detection is restrictive to prevent false positives like "not SIGNAL yet".
*/
export function detectCompletionSignal(output: string, signal: string): boolean {
// Check for <promise>SIGNAL</promise> format (recommended - prevents false positives)
// Case-insensitive for tags
const promisePattern = new RegExp(`<promise>\\s*${escapeRegExp(signal)}\\s*</promise>`, 'i');
if (promisePattern.test(output)) {
// Check for XML-like tag wrapping with matching open/close names: <tag>SIGNAL</tag>.
// Catches <promise>COMPLETE</promise>, <COMPLETE>ALL_CLEAN</COMPLETE>, <done>X</done>.
// The `([a-zA-Z][\w-]*)` capture plus `</\1>` backreference requires tag names to match.
const xmlWrappedPattern = new RegExp(
`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapeRegExp(signal)}\\s*</\\1>`,
'i'
);
if (xmlWrappedPattern.test(output)) {
return true;
}
// Plain signal detection - restrictive to prevent false positives like "not COMPLETE yet"
Expand All @@ -393,9 +401,24 @@ export function detectCompletionSignal(output: string, signal: string): boolean
return endPattern.test(output) || ownLinePattern.test(output);
}

/** Strip internal completion signal tags before sending to user-facing output. */
export function stripCompletionTags(content: string): string {
return content.replace(/<promise>[\s\S]*?<\/promise>/gi, '').trim();
/**
* Strip internal completion signal tags before sending to user-facing output.
* Always strips `<promise>…</promise>` (any content). When `until` is provided,
* also strips any XML-wrapped form of that signal with matching tag names
* (e.g. `<COMPLETE>ALL_CLEAN</COMPLETE>`). Mismatched tag names are left alone
* so regular prose (`<note>ALL_CLEAN</warning>`) isn't accidentally rewritten.
*/
export function stripCompletionTags(content: string, until?: string): string {
let result = content.replace(/<promise>[\s\S]*?<\/promise>/gi, '');
if (until) {
// Strip XML-tagged completion signals with matching open/close tag names.
const escapedSignal = escapeRegExp(until);
result = result.replace(
new RegExp(`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapedSignal}\\s*</\\1>`, 'gi'),
''
);
}
return result.trim();
}

/**
Expand Down
Loading