-
Notifications
You must be signed in to change notification settings - Fork 311
fix(testing): isolate withMockTools per async context (AsyncLocalStorage) #1865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 1.5.0
Are you sure you want to change the base?
Changes from all commits
f6fa6b8
d40eb2d
e8a93ec
c6785e5
225520a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| '@livekit/agents': patch | ||
| --- | ||
|
|
||
| Isolate the `withMockTools` test utility per async context. The active mock registry now lives in an `AsyncLocalStorage` instead of a module-level mutable global, so overlapping/concurrent tests no longer clobber each other's mock maps. This matches Python's per-async-context `ContextVar`. The public `using withMockTools(...)` Disposable API is unchanged. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,14 +2,20 @@ | |
| // | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
| import { ReadableStream } from 'node:stream/web'; | ||
| import { describe, expect, it } from 'vitest'; | ||
| import { afterAll, beforeAll, describe, expect, it } from 'vitest'; | ||
| import { z } from 'zod'; | ||
| import { FunctionCall } from '../../llm/chat_context.js'; | ||
| import { ToolContext, tool } from '../../llm/tool_context.js'; | ||
| import { initializeLogger } from '../../log.js'; | ||
| import { Future } from '../../utils.js'; | ||
| import { Agent } from '../agent.js'; | ||
| import { AgentSession } from '../agent_session.js'; | ||
| import { performToolExecutions } from '../generation.js'; | ||
| import { SpeechHandle } from '../speech_handle.js'; | ||
| import { activeMockTools, withMockTools } from './run_result.js'; | ||
| import { FakeLLM } from './fake_llm.js'; | ||
| import { getActiveMockTools, getMockTool, withMockTools } from './run_result.js'; | ||
|
|
||
| initializeLogger({ pretty: false, level: 'silent' }); | ||
|
|
||
| class AgentA extends Agent { | ||
| constructor() { | ||
|
|
@@ -23,17 +29,44 @@ class AgentB extends Agent { | |
| } | ||
| } | ||
|
|
||
| // Probes for the activity-loop tests below: which implementation actually executed. | ||
| let realToolRan = false; | ||
| let mockRan = false; | ||
|
|
||
| class ProbeAgent extends Agent { | ||
| constructor() { | ||
| super({ | ||
| instructions: 'You are a probe agent.', | ||
| tools: [ | ||
| tool({ | ||
| name: 'theTool', | ||
| description: 'A real tool whose execution we can detect.', | ||
| parameters: z.object({}), | ||
| execute: async () => { | ||
| realToolRan = true; | ||
| return 'REAL'; | ||
| }, | ||
| }), | ||
| ], | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| function makeFakeLLM(): FakeLLM { | ||
| return new FakeLLM([{ input: 'order', toolCalls: [{ name: 'theTool', args: {} }] }]); | ||
| } | ||
|
|
||
| describe('withMockTools', () => { | ||
| it('sets the mock registry for the given agent inside the block', () => { | ||
| const mock = () => 'mocked'; | ||
|
|
||
| { | ||
| using _mock = withMockTools(AgentA, { tool1: mock }); | ||
| expect(activeMockTools).toBeDefined(); | ||
| expect(activeMockTools?.get(AgentA)?.tool1).toBe(mock); | ||
| expect(getActiveMockTools()).toBeDefined(); | ||
| expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mock); | ||
| } | ||
|
|
||
| expect(activeMockTools).toBeUndefined(); | ||
| expect(getActiveMockTools()).toBeUndefined(); | ||
| }); | ||
|
|
||
| it('merges mocks across nested blocks and isolates per agent', () => { | ||
|
|
@@ -44,12 +77,12 @@ describe('withMockTools', () => { | |
| using _mockA = withMockTools(AgentA, { toolA: mockA }); | ||
| { | ||
| using _mockB = withMockTools(AgentB, { toolB: mockB }); | ||
| expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA); | ||
| expect(activeMockTools?.get(AgentB)?.toolB).toBe(mockB); | ||
| expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA); | ||
| expect(getActiveMockTools()?.get(AgentB)?.toolB).toBe(mockB); | ||
| } | ||
|
|
||
| expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA); | ||
| expect(activeMockTools?.get(AgentB)).toBeUndefined(); | ||
| expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA); | ||
| expect(getActiveMockTools()?.get(AgentB)).toBeUndefined(); | ||
| } | ||
| }); | ||
|
|
||
|
|
@@ -61,15 +94,15 @@ describe('withMockTools', () => { | |
| using _outer = withMockTools(AgentA, { tool1: outer }); | ||
| { | ||
| using _inner = withMockTools(AgentA, { tool1: inner }); | ||
| expect(activeMockTools?.get(AgentA)?.tool1).toBe(inner); | ||
| expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(inner); | ||
| } | ||
| expect(activeMockTools?.get(AgentA)?.tool1).toBe(outer); | ||
| expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(outer); | ||
| } | ||
| }); | ||
|
|
||
| it('exposes the mock for invocation within the block', async () => { | ||
| using _mock = withMockTools(AgentA, { tool1: async () => 42 }); | ||
| const mock = activeMockTools?.get(AgentA)?.tool1; | ||
| const mock = getActiveMockTools()?.get(AgentA)?.tool1; | ||
| expect(await mock?.()).toBe(42); | ||
| }); | ||
|
|
||
|
|
@@ -179,4 +212,128 @@ describe('withMockTools', () => { | |
| expect(output.output[0]?.rawException?.message).toBe('test failure'); | ||
| expect(output.output[0]?.toolCallOutput?.isError).toBe(true); | ||
| }); | ||
|
|
||
| it('propagates the mock registry to child async tasks started within the block', async () => { | ||
| const mock = () => 'child-visible'; | ||
| using _mock = withMockTools(AgentA, { tool1: mock }); | ||
|
|
||
| // A child async task started after withMockTools should inherit the registry. | ||
| const childSaw = await (async () => { | ||
| await Promise.resolve(); | ||
| return getActiveMockTools()?.get(AgentA)?.tool1; | ||
| })(); | ||
|
|
||
| expect(childSaw).toBe(mock); | ||
| expect(getMockTool(new AgentA(), 'tool1')).toBe(mock); | ||
| }); | ||
|
|
||
| it('isolates mock registries across overlapping async contexts', async () => { | ||
| const mockA = () => 'a'; | ||
| const mockB = () => 'b'; | ||
|
|
||
| const aEntered = new Future<void>(); | ||
| const bEntered = new Future<void>(); | ||
|
|
||
| // Scope A installs its mock first, then stays alive while scope B installs a | ||
| // conflicting mock for the SAME agent/tool. With a module-level global, B would | ||
| // clobber A's registry; with AsyncLocalStorage each scope keeps its own view. | ||
| const scopeA = async () => { | ||
| // Detach into this scope's own async context before installing the mock. | ||
| await Promise.resolve(); | ||
| using _mockA = withMockTools(AgentA, { tool1: mockA }); | ||
| aEntered.resolve(); | ||
| await bEntered.await; | ||
| expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockA); | ||
| expect(getMockTool(new AgentA(), 'tool1')).toBe(mockA); | ||
| }; | ||
|
|
||
| const scopeB = async () => { | ||
| await aEntered.await; | ||
| using _mockB = withMockTools(AgentA, { tool1: mockB }); | ||
| bEntered.resolve(); | ||
| await Promise.resolve(); | ||
| expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockB); | ||
| expect(getMockTool(new AgentA(), 'tool1')).toBe(mockB); | ||
| }; | ||
|
|
||
| await Promise.all([scopeA(), scopeB()]); | ||
|
|
||
| // Both scopes have exited: nothing leaks into the outer context. | ||
| expect(getActiveMockTools()).toBeUndefined(); | ||
| }); | ||
| }); | ||
|
|
||
| describe('withMockTools reaches the agent-activity loop', () => { | ||
| let session: AgentSession; | ||
|
|
||
| beforeAll(async () => { | ||
| // Start the activity loop in the setup async context, before any mock exists, | ||
| // mirroring the real `session.start()` (e.g. drive-thru) usage pattern. | ||
| session = new AgentSession({ llm: makeFakeLLM() }); | ||
| await session.start({ agent: new ProbeAgent() }); | ||
| }, 30_000); | ||
|
|
||
| afterAll(async () => { | ||
| await session?.close(); | ||
| }); | ||
|
|
||
| it('routes the activity-loop tool execution to a mock installed in the test body', async () => { | ||
| realToolRan = false; | ||
| mockRan = false; | ||
|
|
||
| using _mock = withMockTools(ProbeAgent, { | ||
| theTool: () => { | ||
| mockRan = true; | ||
| return 'MOCKED'; | ||
| }, | ||
| }); | ||
|
|
||
| const result = session.run({ userInput: 'order' }); | ||
| await result.wait(); | ||
|
|
||
| result.expect.containsFunctionCall({ name: 'theTool' }); | ||
| expect(mockRan).toBe(true); | ||
| expect(realToolRan).toBe(false); | ||
| // The tool output is JSON-serialized, so the raw string 'MOCKED' surfaces as '"MOCKED"'. | ||
| result.expect.containsFunctionCallOutput({ output: '"MOCKED"' }); | ||
| }, 30_000); | ||
|
Comment on lines
+280
to
+299
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🚩 Mock visibility in activity loop depends on async context inheritance from session.run() The test at Was this helpful? React with 👍 or 👎 to provide feedback. |
||
|
|
||
| it('executes the real tool when no mock is installed (harness sanity)', async () => { | ||
| realToolRan = false; | ||
| mockRan = false; | ||
|
|
||
| const result = session.run({ userInput: 'order' }); | ||
| await result.wait(); | ||
|
|
||
| result.expect.containsFunctionCall({ name: 'theTool' }); | ||
| expect(realToolRan).toBe(true); | ||
| expect(mockRan).toBe(false); | ||
| result.expect.containsFunctionCallOutput({ output: '"REAL"' }); | ||
| }, 30_000); | ||
| }); | ||
|
|
||
| describe('withMockTools caller-leak inside an async helper (known limitation)', () => { | ||
| it('leaks the mock into the caller continuation after the using block', async () => { | ||
| // No mock active at the outer scope. | ||
| expect(getActiveMockTools()).toBeUndefined(); | ||
|
|
||
| async function helper(): Promise<void> { | ||
| using _mock = withMockTools(ProbeAgent, { theTool: () => 'X' }); | ||
| // The mock is visible inside the helper. | ||
| expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined(); | ||
| await Promise.resolve(); | ||
| await new Promise((r) => setTimeout(r, 1)); | ||
| } | ||
|
|
||
| await helper(); | ||
|
|
||
| // KNOWN LIMITATION: `withMockTools` uses `AsyncLocalStorage.enterWith`, which mutates the | ||
| // caller's context synchronously; the `using` dispose runs in the helper's post-await child | ||
| // context and restores that context rather than the caller's, so the caller still observes | ||
| // the mock after `await helper()`. The canonical synchronous `using` usage in a test body is | ||
| // unaffected. Flip these to `toBeUndefined()` if the leak is fixed (e.g. scope via | ||
| // `mockToolsStorage.run(...)` instead of `enterWith`). | ||
| expect(getActiveMockTools()).toBeDefined(); | ||
| expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined(); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||||||||||||||
| // SPDX-FileCopyrightText: 2025 LiveKit, Inc. | ||||||||||||||||
| // | ||||||||||||||||
| // SPDX-License-Identifier: Apache-2.0 | ||||||||||||||||
| import { AsyncLocalStorage } from 'node:async_hooks'; | ||||||||||||||||
| import { z } from 'zod'; | ||||||||||||||||
| import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js'; | ||||||||||||||||
| import { ChatContext } from '../../llm/chat_context.js'; | ||||||||||||||||
|
|
@@ -961,14 +962,30 @@ export type MockToolFn = (...args: any[]) => any; | |||||||||||||||
| /** Map from agent constructor to a record of mocked tools by name. */ | ||||||||||||||||
| export type MockToolsMap = Map<AgentConstructor, Record<string, MockToolFn>>; | ||||||||||||||||
|
|
||||||||||||||||
| /** @internal */ | ||||||||||||||||
| export let activeMockTools: MockToolsMap | undefined; | ||||||||||||||||
| /** | ||||||||||||||||
| * Per-async-context storage for the active mock tool registry. Using | ||||||||||||||||
| * {@link AsyncLocalStorage} (rather than a module-level mutable global) isolates the | ||||||||||||||||
| * registry to the async context that installed it, so overlapping/concurrent tests | ||||||||||||||||
| * cannot clobber each other's mock maps. This mirrors Python's per-async-context | ||||||||||||||||
| * `ContextVar` (`_MockToolsContextVar`). | ||||||||||||||||
| */ | ||||||||||||||||
| const mockToolsStorage = new AsyncLocalStorage<MockToolsMap>(); | ||||||||||||||||
|
|
||||||||||||||||
| /** | ||||||||||||||||
| * Returns the mock tool registry active in the current async context, if any. | ||||||||||||||||
| * | ||||||||||||||||
| * @internal | ||||||||||||||||
| */ | ||||||||||||||||
| export function getActiveMockTools(): MockToolsMap | undefined { | ||||||||||||||||
| return mockToolsStorage.getStore(); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| /** @internal */ | ||||||||||||||||
| export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefined { | ||||||||||||||||
| if (!activeMockTools) return undefined; | ||||||||||||||||
| const active = getActiveMockTools(); | ||||||||||||||||
| if (!active) return undefined; | ||||||||||||||||
|
|
||||||||||||||||
| for (const [agentConstructor, mocks] of activeMockTools) { | ||||||||||||||||
| for (const [agentConstructor, mocks] of active) { | ||||||||||||||||
| if (agent.constructor === agentConstructor) { | ||||||||||||||||
| return mocks[toolName]; | ||||||||||||||||
| } | ||||||||||||||||
|
|
@@ -984,7 +1001,9 @@ export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefi | |||||||||||||||
| * the enclosing block exits. | ||||||||||||||||
| * | ||||||||||||||||
| * Mirrors the Python `mock_tools` context manager, adapted to JS via the explicit | ||||||||||||||||
| * resource management `using` syntax (Python uses `with`). | ||||||||||||||||
| * resource management `using` syntax (Python uses `with`). The registry is stored in | ||||||||||||||||
| * an {@link AsyncLocalStorage}, so the binding is isolated to the current async | ||||||||||||||||
| * context — matching the per-async-context isolation Python gets from `ContextVar`. | ||||||||||||||||
| * | ||||||||||||||||
| * @param agent - The Agent constructor whose tools should be mocked. | ||||||||||||||||
| * @param mocks - A record mapping tool name to a mock implementation. | ||||||||||||||||
|
|
@@ -1006,14 +1025,16 @@ export function withMockTools( | |||||||||||||||
| agent: AgentConstructor, | ||||||||||||||||
| mocks: Record<string, MockToolFn>, | ||||||||||||||||
| ): Disposable { | ||||||||||||||||
| const previous = activeMockTools; | ||||||||||||||||
| const previous = getActiveMockTools(); | ||||||||||||||||
| const updated: MockToolsMap = new Map(previous ?? []); | ||||||||||||||||
| updated.set(agent, mocks); | ||||||||||||||||
| activeMockTools = updated; | ||||||||||||||||
| // `enterWith` mutates the current async context in place, preserving the synchronous | ||||||||||||||||
| // enter/exit ergonomics of `using` while still isolating the registry per async context. | ||||||||||||||||
| mockToolsStorage.enterWith(updated); | ||||||||||||||||
|
toubatbrian marked this conversation as resolved.
|
||||||||||||||||
|
|
||||||||||||||||
| return { | ||||||||||||||||
| [Symbol.dispose]() { | ||||||||||||||||
| activeMockTools = previous; | ||||||||||||||||
| mockToolsStorage.enterWith(previous as MockToolsMap); | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Unsafe cast of When
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||||||||||||
| }, | ||||||||||||||||
|
devin-ai-integration[bot] marked this conversation as resolved.
|
||||||||||||||||
| }; | ||||||||||||||||
|
toubatbrian marked this conversation as resolved.
|
||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.