diff --git a/.changeset/mocktools-async-local-storage.md b/.changeset/mocktools-async-local-storage.md new file mode 100644 index 000000000..e2a6b956a --- /dev/null +++ b/.changeset/mocktools-async-local-storage.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents': patch +--- + +Isolate the `withMockTools` test utility per async context. The active mock registry now lives in an `AsyncLocalStorage` instead of a module-level mutable global, so overlapping/concurrent tests no longer clobber each other's mock maps. This matches Python's per-async-context `ContextVar`. The public `using withMockTools(...)` Disposable API is unchanged. diff --git a/agents/src/voice/testing/run_result.test.ts b/agents/src/voice/testing/run_result.test.ts index 3aa01aba0..0ce780420 100644 --- a/agents/src/voice/testing/run_result.test.ts +++ b/agents/src/voice/testing/run_result.test.ts @@ -2,14 +2,20 @@ // // SPDX-License-Identifier: Apache-2.0 import { ReadableStream } from 'node:stream/web'; -import { describe, expect, it } from 'vitest'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { z } from 'zod'; import { FunctionCall } from '../../llm/chat_context.js'; import { ToolContext, tool } from '../../llm/tool_context.js'; +import { initializeLogger } from '../../log.js'; +import { Future } from '../../utils.js'; import { Agent } from '../agent.js'; +import { AgentSession } from '../agent_session.js'; import { performToolExecutions } from '../generation.js'; import { SpeechHandle } from '../speech_handle.js'; -import { activeMockTools, withMockTools } from './run_result.js'; +import { FakeLLM } from './fake_llm.js'; +import { getActiveMockTools, getMockTool, withMockTools } from './run_result.js'; + +initializeLogger({ pretty: false, level: 'silent' }); class AgentA extends Agent { constructor() { @@ -23,17 +29,44 @@ class AgentB extends Agent { } } +// Probes for the activity-loop tests below: which implementation actually executed. +let realToolRan = false; +let mockRan = false; + +class ProbeAgent extends Agent { + constructor() { + super({ + instructions: 'You are a probe agent.', + tools: [ + tool({ + name: 'theTool', + description: 'A real tool whose execution we can detect.', + parameters: z.object({}), + execute: async () => { + realToolRan = true; + return 'REAL'; + }, + }), + ], + }); + } +} + +function makeFakeLLM(): FakeLLM { + return new FakeLLM([{ input: 'order', toolCalls: [{ name: 'theTool', args: {} }] }]); +} + describe('withMockTools', () => { it('sets the mock registry for the given agent inside the block', () => { const mock = () => 'mocked'; { using _mock = withMockTools(AgentA, { tool1: mock }); - expect(activeMockTools).toBeDefined(); - expect(activeMockTools?.get(AgentA)?.tool1).toBe(mock); + expect(getActiveMockTools()).toBeDefined(); + expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mock); } - expect(activeMockTools).toBeUndefined(); + expect(getActiveMockTools()).toBeUndefined(); }); it('merges mocks across nested blocks and isolates per agent', () => { @@ -44,12 +77,12 @@ describe('withMockTools', () => { using _mockA = withMockTools(AgentA, { toolA: mockA }); { using _mockB = withMockTools(AgentB, { toolB: mockB }); - expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA); - expect(activeMockTools?.get(AgentB)?.toolB).toBe(mockB); + expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA); + expect(getActiveMockTools()?.get(AgentB)?.toolB).toBe(mockB); } - expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA); - expect(activeMockTools?.get(AgentB)).toBeUndefined(); + expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA); + expect(getActiveMockTools()?.get(AgentB)).toBeUndefined(); } }); @@ -61,15 +94,15 @@ describe('withMockTools', () => { using _outer = withMockTools(AgentA, { tool1: outer }); { using _inner = withMockTools(AgentA, { tool1: inner }); - expect(activeMockTools?.get(AgentA)?.tool1).toBe(inner); + expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(inner); } - expect(activeMockTools?.get(AgentA)?.tool1).toBe(outer); + expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(outer); } }); it('exposes the mock for invocation within the block', async () => { using _mock = withMockTools(AgentA, { tool1: async () => 42 }); - const mock = activeMockTools?.get(AgentA)?.tool1; + const mock = getActiveMockTools()?.get(AgentA)?.tool1; expect(await mock?.()).toBe(42); }); @@ -179,4 +212,128 @@ describe('withMockTools', () => { expect(output.output[0]?.rawException?.message).toBe('test failure'); expect(output.output[0]?.toolCallOutput?.isError).toBe(true); }); + + it('propagates the mock registry to child async tasks started within the block', async () => { + const mock = () => 'child-visible'; + using _mock = withMockTools(AgentA, { tool1: mock }); + + // A child async task started after withMockTools should inherit the registry. + const childSaw = await (async () => { + await Promise.resolve(); + return getActiveMockTools()?.get(AgentA)?.tool1; + })(); + + expect(childSaw).toBe(mock); + expect(getMockTool(new AgentA(), 'tool1')).toBe(mock); + }); + + it('isolates mock registries across overlapping async contexts', async () => { + const mockA = () => 'a'; + const mockB = () => 'b'; + + const aEntered = new Future(); + const bEntered = new Future(); + + // Scope A installs its mock first, then stays alive while scope B installs a + // conflicting mock for the SAME agent/tool. With a module-level global, B would + // clobber A's registry; with AsyncLocalStorage each scope keeps its own view. + const scopeA = async () => { + // Detach into this scope's own async context before installing the mock. + await Promise.resolve(); + using _mockA = withMockTools(AgentA, { tool1: mockA }); + aEntered.resolve(); + await bEntered.await; + expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockA); + expect(getMockTool(new AgentA(), 'tool1')).toBe(mockA); + }; + + const scopeB = async () => { + await aEntered.await; + using _mockB = withMockTools(AgentA, { tool1: mockB }); + bEntered.resolve(); + await Promise.resolve(); + expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockB); + expect(getMockTool(new AgentA(), 'tool1')).toBe(mockB); + }; + + await Promise.all([scopeA(), scopeB()]); + + // Both scopes have exited: nothing leaks into the outer context. + expect(getActiveMockTools()).toBeUndefined(); + }); +}); + +describe('withMockTools reaches the agent-activity loop', () => { + let session: AgentSession; + + beforeAll(async () => { + // Start the activity loop in the setup async context, before any mock exists, + // mirroring the real `session.start()` (e.g. drive-thru) usage pattern. + session = new AgentSession({ llm: makeFakeLLM() }); + await session.start({ agent: new ProbeAgent() }); + }, 30_000); + + afterAll(async () => { + await session?.close(); + }); + + it('routes the activity-loop tool execution to a mock installed in the test body', async () => { + realToolRan = false; + mockRan = false; + + using _mock = withMockTools(ProbeAgent, { + theTool: () => { + mockRan = true; + return 'MOCKED'; + }, + }); + + const result = session.run({ userInput: 'order' }); + await result.wait(); + + result.expect.containsFunctionCall({ name: 'theTool' }); + expect(mockRan).toBe(true); + expect(realToolRan).toBe(false); + // The tool output is JSON-serialized, so the raw string 'MOCKED' surfaces as '"MOCKED"'. + result.expect.containsFunctionCallOutput({ output: '"MOCKED"' }); + }, 30_000); + + it('executes the real tool when no mock is installed (harness sanity)', async () => { + realToolRan = false; + mockRan = false; + + const result = session.run({ userInput: 'order' }); + await result.wait(); + + result.expect.containsFunctionCall({ name: 'theTool' }); + expect(realToolRan).toBe(true); + expect(mockRan).toBe(false); + result.expect.containsFunctionCallOutput({ output: '"REAL"' }); + }, 30_000); +}); + +describe('withMockTools caller-leak inside an async helper (known limitation)', () => { + it('leaks the mock into the caller continuation after the using block', async () => { + // No mock active at the outer scope. + expect(getActiveMockTools()).toBeUndefined(); + + async function helper(): Promise { + using _mock = withMockTools(ProbeAgent, { theTool: () => 'X' }); + // The mock is visible inside the helper. + expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined(); + await Promise.resolve(); + await new Promise((r) => setTimeout(r, 1)); + } + + await helper(); + + // KNOWN LIMITATION: `withMockTools` uses `AsyncLocalStorage.enterWith`, which mutates the + // caller's context synchronously; the `using` dispose runs in the helper's post-await child + // context and restores that context rather than the caller's, so the caller still observes + // the mock after `await helper()`. The canonical synchronous `using` usage in a test body is + // unaffected. Flip these to `toBeUndefined()` if the leak is fixed (e.g. scope via + // `mockToolsStorage.run(...)` instead of `enterWith`). + expect(getActiveMockTools()).toBeDefined(); + expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined(); + }); }); diff --git a/agents/src/voice/testing/run_result.ts b/agents/src/voice/testing/run_result.ts index 21c3a9e11..b1c32e1b0 100644 --- a/agents/src/voice/testing/run_result.ts +++ b/agents/src/voice/testing/run_result.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2025 LiveKit, Inc. // // SPDX-License-Identifier: Apache-2.0 +import { AsyncLocalStorage } from 'node:async_hooks'; import { z } from 'zod'; import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js'; import { ChatContext } from '../../llm/chat_context.js'; @@ -961,14 +962,30 @@ export type MockToolFn = (...args: any[]) => any; /** Map from agent constructor to a record of mocked tools by name. */ export type MockToolsMap = Map>; -/** @internal */ -export let activeMockTools: MockToolsMap | undefined; +/** + * Per-async-context storage for the active mock tool registry. Using + * {@link AsyncLocalStorage} (rather than a module-level mutable global) isolates the + * registry to the async context that installed it, so overlapping/concurrent tests + * cannot clobber each other's mock maps. This mirrors Python's per-async-context + * `ContextVar` (`_MockToolsContextVar`). + */ +const mockToolsStorage = new AsyncLocalStorage(); + +/** + * Returns the mock tool registry active in the current async context, if any. + * + * @internal + */ +export function getActiveMockTools(): MockToolsMap | undefined { + return mockToolsStorage.getStore(); +} /** @internal */ export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefined { - if (!activeMockTools) return undefined; + const active = getActiveMockTools(); + if (!active) return undefined; - for (const [agentConstructor, mocks] of activeMockTools) { + for (const [agentConstructor, mocks] of active) { if (agent.constructor === agentConstructor) { return mocks[toolName]; } @@ -984,7 +1001,9 @@ export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefi * the enclosing block exits. * * Mirrors the Python `mock_tools` context manager, adapted to JS via the explicit - * resource management `using` syntax (Python uses `with`). + * resource management `using` syntax (Python uses `with`). The registry is stored in + * an {@link AsyncLocalStorage}, so the binding is isolated to the current async + * context — matching the per-async-context isolation Python gets from `ContextVar`. * * @param agent - The Agent constructor whose tools should be mocked. * @param mocks - A record mapping tool name to a mock implementation. @@ -1006,14 +1025,16 @@ export function withMockTools( agent: AgentConstructor, mocks: Record, ): Disposable { - const previous = activeMockTools; + const previous = getActiveMockTools(); const updated: MockToolsMap = new Map(previous ?? []); updated.set(agent, mocks); - activeMockTools = updated; + // `enterWith` mutates the current async context in place, preserving the synchronous + // enter/exit ergonomics of `using` while still isolating the registry per async context. + mockToolsStorage.enterWith(updated); return { [Symbol.dispose]() { - activeMockTools = previous; + mockToolsStorage.enterWith(previous as MockToolsMap); }, }; }