livekit · toubatbrian · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.changeset/mocktools-async-local-storage.md b/.changeset/mocktools-async-local-storage.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents': patch
+---
+
+Isolate the `withMockTools` test utility per async context. The active mock registry now lives in an `AsyncLocalStorage` instead of a module-level mutable global, so overlapping/concurrent tests no longer clobber each other's mock maps. This matches Python's per-async-context `ContextVar`. The public `using withMockTools(...)` Disposable API is unchanged.
diff --git a/agents/src/voice/testing/run_result.test.ts b/agents/src/voice/testing/run_result.test.ts
@@ -2,14 +2,20 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import { ReadableStream } from 'node:stream/web';
-import { describe, expect, it } from 'vitest';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 import { z } from 'zod';
 import { FunctionCall } from '../../llm/chat_context.js';
 import { ToolContext, tool } from '../../llm/tool_context.js';
+import { initializeLogger } from '../../log.js';
+import { Future } from '../../utils.js';
 import { Agent } from '../agent.js';
+import { AgentSession } from '../agent_session.js';
 import { performToolExecutions } from '../generation.js';
 import { SpeechHandle } from '../speech_handle.js';
-import { activeMockTools, withMockTools } from './run_result.js';
+import { FakeLLM } from './fake_llm.js';
+import { getActiveMockTools, getMockTool, withMockTools } from './run_result.js';
+
+initializeLogger({ pretty: false, level: 'silent' });
 
 class AgentA extends Agent {
   constructor() {
@@ -23,17 +29,44 @@ class AgentB extends Agent {
   }
 }
 
+// Probes for the activity-loop tests below: which implementation actually executed.
+let realToolRan = false;
+let mockRan = false;
+
+class ProbeAgent extends Agent {
+  constructor() {
+    super({
+      instructions: 'You are a probe agent.',
+      tools: [
+        tool({
+          name: 'theTool',
+          description: 'A real tool whose execution we can detect.',
+          parameters: z.object({}),
+          execute: async () => {
+            realToolRan = true;
+            return 'REAL';
+          },
+        }),
+      ],
+    });
+  }
+}
+
+function makeFakeLLM(): FakeLLM {
+  return new FakeLLM([{ input: 'order', toolCalls: [{ name: 'theTool', args: {} }] }]);
+}
+
 describe('withMockTools', () => {
   it('sets the mock registry for the given agent inside the block', () => {
     const mock = () => 'mocked';
 
     {
       using _mock = withMockTools(AgentA, { tool1: mock });
-      expect(activeMockTools).toBeDefined();
-      expect(activeMockTools?.get(AgentA)?.tool1).toBe(mock);
+      expect(getActiveMockTools()).toBeDefined();
+      expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mock);
     }
 
-    expect(activeMockTools).toBeUndefined();
+    expect(getActiveMockTools()).toBeUndefined();
   });
 
   it('merges mocks across nested blocks and isolates per agent', () => {
@@ -44,12 +77,12 @@ describe('withMockTools', () => {
       using _mockA = withMockTools(AgentA, { toolA: mockA });
       {
         using _mockB = withMockTools(AgentB, { toolB: mockB });
-        expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA);
-        expect(activeMockTools?.get(AgentB)?.toolB).toBe(mockB);
+        expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA);
+        expect(getActiveMockTools()?.get(AgentB)?.toolB).toBe(mockB);
       }
 
-      expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA);
-      expect(activeMockTools?.get(AgentB)).toBeUndefined();
+      expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA);
+      expect(getActiveMockTools()?.get(AgentB)).toBeUndefined();
     }
   });
 
@@ -61,15 +94,15 @@ describe('withMockTools', () => {
       using _outer = withMockTools(AgentA, { tool1: outer });
       {
         using _inner = withMockTools(AgentA, { tool1: inner });
-        expect(activeMockTools?.get(AgentA)?.tool1).toBe(inner);
+        expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(inner);
       }
-      expect(activeMockTools?.get(AgentA)?.tool1).toBe(outer);
+      expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(outer);
     }
   });
 
   it('exposes the mock for invocation within the block', async () => {
     using _mock = withMockTools(AgentA, { tool1: async () => 42 });
-    const mock = activeMockTools?.get(AgentA)?.tool1;
+    const mock = getActiveMockTools()?.get(AgentA)?.tool1;
     expect(await mock?.()).toBe(42);
   });
 
@@ -179,4 +212,128 @@ describe('withMockTools', () => {
     expect(output.output[0]?.rawException?.message).toBe('test failure');
     expect(output.output[0]?.toolCallOutput?.isError).toBe(true);
   });
+
+  it('propagates the mock registry to child async tasks started within the block', async () => {
+    const mock = () => 'child-visible';
+    using _mock = withMockTools(AgentA, { tool1: mock });
+
+    // A child async task started after withMockTools should inherit the registry.
+    const childSaw = await (async () => {
+      await Promise.resolve();
+      return getActiveMockTools()?.get(AgentA)?.tool1;
+    })();
+
+    expect(childSaw).toBe(mock);
+    expect(getMockTool(new AgentA(), 'tool1')).toBe(mock);
+  });
+
+  it('isolates mock registries across overlapping async contexts', async () => {
+    const mockA = () => 'a';
+    const mockB = () => 'b';
+
+    const aEntered = new Future<void>();
+    const bEntered = new Future<void>();
+
+    // Scope A installs its mock first, then stays alive while scope B installs a
+    // conflicting mock for the SAME agent/tool. With a module-level global, B would
+    // clobber A's registry; with AsyncLocalStorage each scope keeps its own view.
+    const scopeA = async () => {
+      // Detach into this scope's own async context before installing the mock.
+      await Promise.resolve();
+      using _mockA = withMockTools(AgentA, { tool1: mockA });
+      aEntered.resolve();
+      await bEntered.await;
+      expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockA);
+      expect(getMockTool(new AgentA(), 'tool1')).toBe(mockA);
+    };
+
+    const scopeB = async () => {
+      await aEntered.await;
+      using _mockB = withMockTools(AgentA, { tool1: mockB });
+      bEntered.resolve();
+      await Promise.resolve();
+      expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockB);
+      expect(getMockTool(new AgentA(), 'tool1')).toBe(mockB);
+    };
+
+    await Promise.all([scopeA(), scopeB()]);
+
+    // Both scopes have exited: nothing leaks into the outer context.
+    expect(getActiveMockTools()).toBeUndefined();
+  });
+});
+
+describe('withMockTools reaches the agent-activity loop', () => {
+  let session: AgentSession;
+
+  beforeAll(async () => {
+    // Start the activity loop in the setup async context, before any mock exists,
+    // mirroring the real `session.start()` (e.g. drive-thru) usage pattern.
+    session = new AgentSession({ llm: makeFakeLLM() });
+    await session.start({ agent: new ProbeAgent() });
+  }, 30_000);
+
+  afterAll(async () => {
+    await session?.close();
+  });
+
+  it('routes the activity-loop tool execution to a mock installed in the test body', async () => {
+    realToolRan = false;
+    mockRan = false;
+
+    using _mock = withMockTools(ProbeAgent, {
+      theTool: () => {
+        mockRan = true;
+        return 'MOCKED';
+      },
+    });
+
+    const result = session.run({ userInput: 'order' });
+    await result.wait();
+
+    result.expect.containsFunctionCall({ name: 'theTool' });
+    expect(mockRan).toBe(true);
+    expect(realToolRan).toBe(false);
+    // The tool output is JSON-serialized, so the raw string 'MOCKED' surfaces as '"MOCKED"'.
+    result.expect.containsFunctionCallOutput({ output: '"MOCKED"' });
+  }, 30_000);
+
+  it('executes the real tool when no mock is installed (harness sanity)', async () => {
+    realToolRan = false;
+    mockRan = false;
+
+    const result = session.run({ userInput: 'order' });
+    await result.wait();
+
+    result.expect.containsFunctionCall({ name: 'theTool' });
+    expect(realToolRan).toBe(true);
+    expect(mockRan).toBe(false);
+    result.expect.containsFunctionCallOutput({ output: '"REAL"' });
+  }, 30_000);
+});
+
+describe('withMockTools caller-leak inside an async helper (known limitation)', () => {
+  it('leaks the mock into the caller continuation after the using block', async () => {
+    // No mock active at the outer scope.
+    expect(getActiveMockTools()).toBeUndefined();
+
+    async function helper(): Promise<void> {
+      using _mock = withMockTools(ProbeAgent, { theTool: () => 'X' });
+      // The mock is visible inside the helper.
+      expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined();
+      await Promise.resolve();
+      await new Promise((r) => setTimeout(r, 1));
+    }
+
+    await helper();
+
+    // KNOWN LIMITATION: `withMockTools` uses `AsyncLocalStorage.enterWith`, which mutates the
+    // caller's context synchronously; the `using` dispose runs in the helper's post-await child
+    // context and restores that context rather than the caller's, so the caller still observes
+    // the mock after `await helper()`. The canonical synchronous `using` usage in a test body is
+    // unaffected. Flip these to `toBeUndefined()` if the leak is fixed (e.g. scope via
+    // `mockToolsStorage.run(...)` instead of `enterWith`).
+    expect(getActiveMockTools()).toBeDefined();
+    expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined();
+  });
 });
diff --git a/agents/src/voice/testing/run_result.ts b/agents/src/voice/testing/run_result.ts
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+import { AsyncLocalStorage } from 'node:async_hooks';
 import { z } from 'zod';
 import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js';
 import { ChatContext } from '../../llm/chat_context.js';
@@ -961,14 +962,30 @@ export type MockToolFn = (...args: any[]) => any;
 /** Map from agent constructor to a record of mocked tools by name. */
 export type MockToolsMap = Map<AgentConstructor, Record<string, MockToolFn>>;
 
-/** @internal */
-export let activeMockTools: MockToolsMap | undefined;
+/**
+ * Per-async-context storage for the active mock tool registry. Using
+ * {@link AsyncLocalStorage} (rather than a module-level mutable global) isolates the
+ * registry to the async context that installed it, so overlapping/concurrent tests
+ * cannot clobber each other's mock maps. This mirrors Python's per-async-context
+ * `ContextVar` (`_MockToolsContextVar`).
+ */
+const mockToolsStorage = new AsyncLocalStorage<MockToolsMap>();
+
+/**
+ * Returns the mock tool registry active in the current async context, if any.
+ *
+ * @internal
+ */
+export function getActiveMockTools(): MockToolsMap | undefined {
+  return mockToolsStorage.getStore();
+}
 
 /** @internal */
 export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefined {
-  if (!activeMockTools) return undefined;
+  const active = getActiveMockTools();
+  if (!active) return undefined;
 
-  for (const [agentConstructor, mocks] of activeMockTools) {
+  for (const [agentConstructor, mocks] of active) {
     if (agent.constructor === agentConstructor) {
       return mocks[toolName];
     }
@@ -984,7 +1001,9 @@ export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefi
  * the enclosing block exits.
  *
  * Mirrors the Python `mock_tools` context manager, adapted to JS via the explicit
- * resource management `using` syntax (Python uses `with`).
+ * resource management `using` syntax (Python uses `with`). The registry is stored in
+ * an {@link AsyncLocalStorage}, so the binding is isolated to the current async
+ * context — matching the per-async-context isolation Python gets from `ContextVar`.
  *
  * @param agent - The Agent constructor whose tools should be mocked.
  * @param mocks - A record mapping tool name to a mock implementation.
@@ -1006,14 +1025,16 @@ export function withMockTools(
   agent: AgentConstructor,
   mocks: Record<string, MockToolFn>,
 ): Disposable {
-  const previous = activeMockTools;
+  const previous = getActiveMockTools();
   const updated: MockToolsMap = new Map(previous ?? []);
   updated.set(agent, mocks);
-  activeMockTools = updated;
+  // `enterWith` mutates the current async context in place, preserving the synchronous
+  // enter/exit ergonomics of `using` while still isolating the registry per async context.
+  mockToolsStorage.enterWith(updated);
 
   return {
     [Symbol.dispose]() {
-      activeMockTools = previous;
+      mockToolsStorage.enterWith(previous as MockToolsMap);
-      mockToolsStorage.enterWith(previous as MockToolsMap);
+      if (previous) {
+        mockToolsStorage.enterWith(previous);
+      } else {
+        mockToolsStorage.disable();
+      }
+
-      mockToolsStorage.enterWith(previous as MockToolsMap);
+      if (previous) {
+        mockToolsStorage.enterWith(previous);
+      } else {
+        mockToolsStorage.disable();
+      }
+
     },
   };
 }