coleam00 · Wirasm · Apr 22, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts
@@ -2930,6 +2930,75 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => {
       ).toBe(1);
     });
 
+    it('completes on final iteration with XML-wrapped signal (<COMPLETE>SIGNAL</COMPLETE>)', async () => {
+      let callCount = 0;
+      mockSendQueryDag.mockImplementation(function* () {
+        callCount++;
+        if (callCount < 3) {
+          yield { type: 'assistant', content: `Iteration ${String(callCount)} progress` };
+          yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
+        } else {
+          // Final iteration uses <COMPLETE> tag instead of <promise>
+          yield { type: 'assistant', content: 'All clean! <COMPLETE>ALL_CLEAN</COMPLETE>' };
+          yield { type: 'result', sessionId: `loop-session-${String(callCount)}` };
+        }
+      });
+
+      const mockDeps = createMockDeps();
+      const platform = createMockPlatform();
+      const workflowRun = makeWorkflowRun();
+
+      await executeDagWorkflow(
+        mockDeps,
+        platform,
+        'conv-dag',
+        testDir,
+        {
+          name: 'dag-loop-xml-tag',
+          nodes: [
+            {
+              id: 'fix-and-review',
+              loop: {
+                prompt: 'Fix and review. When done, output <COMPLETE>ALL_CLEAN</COMPLETE>.',
+                until: 'ALL_CLEAN',
+                max_iterations: 3,
+              },
+            },
+          ],
+        },
+        workflowRun,
+        'claude',
+        undefined,
+        join(testDir, 'artifacts'),
+        join(testDir, 'logs'),
+        'main',
+        'docs/',
+        minimalConfig
+      );
+
+      // 3 iterations run, signal found on iteration 3 → completed, NOT failed
+      expect(mockSendQueryDag.mock.calls.length).toBe(3);
+      expect(
+        (
+          mockDeps.store.completeWorkflowRun as Mock<
+            (id: string, metadata?: Record<string, unknown>) => Promise<void>
+          >
+        ).mock.calls.length
+      ).toBe(1);
+      expect(
+        (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise<void>>).mock
+          .calls.length
+      ).toBe(0);
+      // Verify stripping: raw XML completion tags must not appear in user-visible output
+      const allSentMessages = (
+        platform.sendMessage as Mock<(...args: unknown[]) => Promise<void>>
+      ).mock.calls
+        .map((call: unknown[]) => call[1] as string)
+        .join('');
+      expect(allSentMessages).not.toContain('<COMPLETE>');
+      expect(allSentMessages).not.toContain('</COMPLETE>');
+    });
+
     it('loop node output available to downstream nodes via $nodeId.output', async () => {
       let loopCallCount = 0;
       mockSendQueryDag.mockImplementation(function* (prompt: string) {

diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts
@@ -1594,7 +1594,7 @@ async function executeLoopNode(
       })) {
         if (msg.type === 'assistant') {
           fullOutput += msg.content;
-          const cleaned = stripCompletionTags(msg.content);
+          const cleaned = stripCompletionTags(msg.content, loop.until);
           cleanOutput += cleaned;
           if (platform.getStreamingMode() === 'stream' && cleaned) {
             await safeSendMessage(platform, conversationId, cleaned, msgContext);

diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts
@@ -22,6 +22,8 @@ import {
   substituteWorkflowVariables,
   buildPromptWithContext,
   detectCreditExhaustion,
+  detectCompletionSignal,
+  stripCompletionTags,
   isInlineScript,
 } from './executor-shared';
 
@@ -330,3 +332,65 @@ describe('isInlineScript', () => {
     expect(isInlineScript('')).toBe(false);
   });
 });
+
+describe('detectCompletionSignal', () => {
+  it('detects <promise>SIGNAL</promise> format', () => {
+    expect(detectCompletionSignal('<promise>COMPLETE</promise>', 'COMPLETE')).toBe(true);
+  });
+
+  it('detects signal in custom XML tags: <COMPLETE>SIGNAL</COMPLETE>', () => {
+    expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe(true);
+  });
+
+  it('detects signal in other XML tag names', () => {
+    expect(detectCompletionSignal('<done>COMPLETE</done>', 'COMPLETE')).toBe(true);
+    expect(detectCompletionSignal('<status>DONE</status>', 'DONE')).toBe(true);
+  });
+
+  it('detects plain signal at end of output', () => {
+    expect(detectCompletionSignal('Work done. COMPLETE', 'COMPLETE')).toBe(true);
+  });
+
+  it('detects plain signal on its own line', () => {
+    expect(detectCompletionSignal('Work done.\nCOMPLETE\nExtra text', 'COMPLETE')).toBe(true);
+  });
+
+  it('does not detect signal embedded in prose', () => {
+    expect(detectCompletionSignal('The status is not COMPLETE yet.', 'COMPLETE')).toBe(false);
+  });
+
+  it('does not detect signal when wrong value is in tags', () => {
+    expect(detectCompletionSignal('<COMPLETE>WRONG</COMPLETE>', 'ALL_CLEAN')).toBe(false);
+  });
+
+  it('does NOT detect signal when XML tag names do not match (strict)', () => {
+    // Open/close tag names must agree — guards against AI prose that
+    // interleaves tags (e.g. "<COMPLETE>ALL_CLEAN</other-tag>") being
+    // treated as a completion.
+    expect(detectCompletionSignal('<COMPLETE>ALL_CLEAN</done>', 'ALL_CLEAN')).toBe(false);
+  });
+
+  it('detects signal when tag names match case-insensitively', () => {
+    expect(detectCompletionSignal('<Complete>ALL_CLEAN</complete>', 'ALL_CLEAN')).toBe(true);
+  });
+});
+
+describe('stripCompletionTags', () => {
+  it('strips <promise> tags', () => {
+    expect(stripCompletionTags('Done. <promise>COMPLETE</promise>')).toBe('Done.');
+  });
+
+  it('strips XML-wrapped signal when until is provided', () => {
+    expect(stripCompletionTags('Done. <COMPLETE>ALL_CLEAN</COMPLETE>', 'ALL_CLEAN')).toBe('Done.');
+  });
+
+  it('does not strip XML tags when until is not provided', () => {
+    const input = 'Done. <COMPLETE>ALL_CLEAN</COMPLETE>';
+    expect(stripCompletionTags(input)).toBe(input.trim());
+  });
+
+  it('strips both <promise> and XML-tagged signal when until is provided', () => {
+    const input = 'Done. <promise>ALL_CLEAN</promise> <COMPLETE>ALL_CLEAN</COMPLETE>';
+    expect(stripCompletionTags(input, 'ALL_CLEAN')).toBe('Done.');
+  });
+});
diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts
@@ -370,18 +370,26 @@ function escapeRegExp(str: string): string {
 /**
  * Detect whether the AI output contains a completion signal.
  *
- * Supports two formats:
+ * Supports three formats, checked in order:
  * 1. <promise>SIGNAL</promise> - Recommended; prevents false positives in prose
- * 2. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
+ * 2. <anytag>SIGNAL</anytag> - Any XML-wrapped tag; case-insensitive on tag names
+ * 3. Plain SIGNAL - Backwards compatibility; only at end of output or on own line
  *
- * The <promise> tag format uses case-insensitive matching for the tags.
- * Plain signal detection is restrictive to prevent false positives.
+ * Tag matching uses a backreference (\1) so opening and closing tag names must
+ * agree — `<COMPLETE>X</done>` is not treated as a completion, which avoids
+ * false positives when the AI interleaves tags in prose.
+ *
+ * Plain signal detection is restrictive to prevent false positives like "not SIGNAL yet".
  */
 export function detectCompletionSignal(output: string, signal: string): boolean {
-  // Check for <promise>SIGNAL</promise> format (recommended - prevents false positives)
-  // Case-insensitive for tags
-  const promisePattern = new RegExp(`<promise>\\s*${escapeRegExp(signal)}\\s*</promise>`, 'i');
-  if (promisePattern.test(output)) {
+  // Check for XML-like tag wrapping with matching open/close names: <tag>SIGNAL</tag>.
+  // Catches <promise>COMPLETE</promise>, <COMPLETE>ALL_CLEAN</COMPLETE>, <done>X</done>.
+  // The `([a-zA-Z][\w-]*)` capture plus `</\1>` backreference requires tag names to match.
+  const xmlWrappedPattern = new RegExp(
+    `<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapeRegExp(signal)}\\s*</\\1>`,
+    'i'
+  );
+  if (xmlWrappedPattern.test(output)) {
     return true;
   }
   // Plain signal detection - restrictive to prevent false positives like "not COMPLETE yet"
@@ -393,9 +401,24 @@ export function detectCompletionSignal(output: string, signal: string): boolean
   return endPattern.test(output) || ownLinePattern.test(output);
 }
 
-/** Strip internal completion signal tags before sending to user-facing output. */
-export function stripCompletionTags(content: string): string {
-  return content.replace(/<promise>[\s\S]*?<\/promise>/gi, '').trim();
+/**
+ * Strip internal completion signal tags before sending to user-facing output.
+ * Always strips `<promise>…</promise>` (any content). When `until` is provided,
+ * also strips any XML-wrapped form of that signal with matching tag names
+ * (e.g. `<COMPLETE>ALL_CLEAN</COMPLETE>`). Mismatched tag names are left alone
+ * so regular prose (`<note>ALL_CLEAN</warning>`) isn't accidentally rewritten.
+ */
+export function stripCompletionTags(content: string, until?: string): string {
+  let result = content.replace(/<promise>[\s\S]*?<\/promise>/gi, '');
+  if (until) {
+    // Strip XML-tagged completion signals with matching open/close tag names.
+    const escapedSignal = escapeRegExp(until);
+    result = result.replace(
+      new RegExp(`<([a-zA-Z][\\w-]*)[^>]*>\\s*${escapedSignal}\\s*</\\1>`, 'gi'),
+      ''
+    );
+  }
+  return result.trim();
 }
 
 /**