Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/mocktools-async-local-storage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@livekit/agents': patch
---

Isolate the `withMockTools` test utility per async context. The active mock registry now lives in an `AsyncLocalStorage` instead of a module-level mutable global, so overlapping/concurrent tests no longer clobber each other's mock maps. This matches Python's per-async-context `ContextVar`. The public `using withMockTools(...)` Disposable API is unchanged.
181 changes: 169 additions & 12 deletions agents/src/voice/testing/run_result.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@
//
// SPDX-License-Identifier: Apache-2.0
import { ReadableStream } from 'node:stream/web';
import { describe, expect, it } from 'vitest';
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { z } from 'zod';
import { FunctionCall } from '../../llm/chat_context.js';
import { ToolContext, tool } from '../../llm/tool_context.js';
import { initializeLogger } from '../../log.js';
import { Future } from '../../utils.js';
import { Agent } from '../agent.js';
import { AgentSession } from '../agent_session.js';
import { performToolExecutions } from '../generation.js';
import { SpeechHandle } from '../speech_handle.js';
import { activeMockTools, withMockTools } from './run_result.js';
import { FakeLLM } from './fake_llm.js';
import { getActiveMockTools, getMockTool, withMockTools } from './run_result.js';

initializeLogger({ pretty: false, level: 'silent' });

class AgentA extends Agent {
constructor() {
Expand All @@ -23,17 +29,44 @@ class AgentB extends Agent {
}
}

// Probes for the activity-loop tests below: which implementation actually executed.
let realToolRan = false;
let mockRan = false;

class ProbeAgent extends Agent {
constructor() {
super({
instructions: 'You are a probe agent.',
tools: [
tool({
name: 'theTool',
description: 'A real tool whose execution we can detect.',
parameters: z.object({}),
execute: async () => {
realToolRan = true;
return 'REAL';
},
}),
],
});
}
}

function makeFakeLLM(): FakeLLM {
return new FakeLLM([{ input: 'order', toolCalls: [{ name: 'theTool', args: {} }] }]);
}

describe('withMockTools', () => {
it('sets the mock registry for the given agent inside the block', () => {
const mock = () => 'mocked';

{
using _mock = withMockTools(AgentA, { tool1: mock });
expect(activeMockTools).toBeDefined();
expect(activeMockTools?.get(AgentA)?.tool1).toBe(mock);
expect(getActiveMockTools()).toBeDefined();
expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mock);
}

expect(activeMockTools).toBeUndefined();
expect(getActiveMockTools()).toBeUndefined();
});

it('merges mocks across nested blocks and isolates per agent', () => {
Expand All @@ -44,12 +77,12 @@ describe('withMockTools', () => {
using _mockA = withMockTools(AgentA, { toolA: mockA });
{
using _mockB = withMockTools(AgentB, { toolB: mockB });
expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA);
expect(activeMockTools?.get(AgentB)?.toolB).toBe(mockB);
expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA);
expect(getActiveMockTools()?.get(AgentB)?.toolB).toBe(mockB);
}

expect(activeMockTools?.get(AgentA)?.toolA).toBe(mockA);
expect(activeMockTools?.get(AgentB)).toBeUndefined();
expect(getActiveMockTools()?.get(AgentA)?.toolA).toBe(mockA);
expect(getActiveMockTools()?.get(AgentB)).toBeUndefined();
}
});

Expand All @@ -61,15 +94,15 @@ describe('withMockTools', () => {
using _outer = withMockTools(AgentA, { tool1: outer });
{
using _inner = withMockTools(AgentA, { tool1: inner });
expect(activeMockTools?.get(AgentA)?.tool1).toBe(inner);
expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(inner);
}
expect(activeMockTools?.get(AgentA)?.tool1).toBe(outer);
expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(outer);
}
});

it('exposes the mock for invocation within the block', async () => {
using _mock = withMockTools(AgentA, { tool1: async () => 42 });
const mock = activeMockTools?.get(AgentA)?.tool1;
const mock = getActiveMockTools()?.get(AgentA)?.tool1;
expect(await mock?.()).toBe(42);
});

Expand Down Expand Up @@ -179,4 +212,128 @@ describe('withMockTools', () => {
expect(output.output[0]?.rawException?.message).toBe('test failure');
expect(output.output[0]?.toolCallOutput?.isError).toBe(true);
});

it('propagates the mock registry to child async tasks started within the block', async () => {
const mock = () => 'child-visible';
using _mock = withMockTools(AgentA, { tool1: mock });

// A child async task started after withMockTools should inherit the registry.
const childSaw = await (async () => {
await Promise.resolve();
return getActiveMockTools()?.get(AgentA)?.tool1;
})();

expect(childSaw).toBe(mock);
expect(getMockTool(new AgentA(), 'tool1')).toBe(mock);
});

it('isolates mock registries across overlapping async contexts', async () => {
const mockA = () => 'a';
const mockB = () => 'b';

const aEntered = new Future<void>();
const bEntered = new Future<void>();

// Scope A installs its mock first, then stays alive while scope B installs a
// conflicting mock for the SAME agent/tool. With a module-level global, B would
// clobber A's registry; with AsyncLocalStorage each scope keeps its own view.
const scopeA = async () => {
// Detach into this scope's own async context before installing the mock.
await Promise.resolve();
using _mockA = withMockTools(AgentA, { tool1: mockA });
aEntered.resolve();
await bEntered.await;
expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockA);
expect(getMockTool(new AgentA(), 'tool1')).toBe(mockA);
};

const scopeB = async () => {
await aEntered.await;
using _mockB = withMockTools(AgentA, { tool1: mockB });
bEntered.resolve();
await Promise.resolve();
expect(getActiveMockTools()?.get(AgentA)?.tool1).toBe(mockB);
expect(getMockTool(new AgentA(), 'tool1')).toBe(mockB);
};

await Promise.all([scopeA(), scopeB()]);

// Both scopes have exited: nothing leaks into the outer context.
expect(getActiveMockTools()).toBeUndefined();
});
Comment thread
toubatbrian marked this conversation as resolved.
});

describe('withMockTools reaches the agent-activity loop', () => {
let session: AgentSession;

beforeAll(async () => {
// Start the activity loop in the setup async context, before any mock exists,
// mirroring the real `session.start()` (e.g. drive-thru) usage pattern.
session = new AgentSession({ llm: makeFakeLLM() });
await session.start({ agent: new ProbeAgent() });
}, 30_000);

afterAll(async () => {
await session?.close();
});

it('routes the activity-loop tool execution to a mock installed in the test body', async () => {
realToolRan = false;
mockRan = false;

using _mock = withMockTools(ProbeAgent, {
theTool: () => {
mockRan = true;
return 'MOCKED';
},
});

const result = session.run({ userInput: 'order' });
await result.wait();

result.expect.containsFunctionCall({ name: 'theTool' });
expect(mockRan).toBe(true);
expect(realToolRan).toBe(false);
// The tool output is JSON-serialized, so the raw string 'MOCKED' surfaces as '"MOCKED"'.
result.expect.containsFunctionCallOutput({ output: '"MOCKED"' });
}, 30_000);
Comment on lines +280 to +299

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Mock visibility in activity loop depends on async context inheritance from session.run()

The test at run_result.test.ts:280-299 expects mocks installed via enterWith in the test body to be visible during tool execution in the agent activity loop. This works because session.run() (agent_session.ts:1155) spawns an async IIFE synchronously from the test body's execution context, inheriting the mock store. The IIFE then calls generateReplycreateSpeechTaskTask.from which starts the task in the same inherited context. In generation.ts:1171, getMockTool() reads the store. However, this relies on the entire chain NOT introducing an async boundary that breaks the inheritance before getMockTool is called. If the internal scheduling changes (e.g., dispatching through an event emitter or setTimeout), the mock visibility could break silently.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


it('executes the real tool when no mock is installed (harness sanity)', async () => {
realToolRan = false;
mockRan = false;

const result = session.run({ userInput: 'order' });
await result.wait();

result.expect.containsFunctionCall({ name: 'theTool' });
expect(realToolRan).toBe(true);
expect(mockRan).toBe(false);
result.expect.containsFunctionCallOutput({ output: '"REAL"' });
}, 30_000);
});

describe('withMockTools caller-leak inside an async helper (known limitation)', () => {
it('leaks the mock into the caller continuation after the using block', async () => {
// No mock active at the outer scope.
expect(getActiveMockTools()).toBeUndefined();

async function helper(): Promise<void> {
using _mock = withMockTools(ProbeAgent, { theTool: () => 'X' });
// The mock is visible inside the helper.
expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined();
await Promise.resolve();
await new Promise((r) => setTimeout(r, 1));
}

await helper();

// KNOWN LIMITATION: `withMockTools` uses `AsyncLocalStorage.enterWith`, which mutates the
// caller's context synchronously; the `using` dispose runs in the helper's post-await child
// context and restores that context rather than the caller's, so the caller still observes
// the mock after `await helper()`. The canonical synchronous `using` usage in a test body is
// unaffected. Flip these to `toBeUndefined()` if the leak is fixed (e.g. scope via
// `mockToolsStorage.run(...)` instead of `enterWith`).
expect(getActiveMockTools()).toBeDefined();
expect(getActiveMockTools()?.get(ProbeAgent)?.theTool).toBeDefined();
});
});
37 changes: 29 additions & 8 deletions agents/src/voice/testing/run_result.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { AsyncLocalStorage } from 'node:async_hooks';
import { z } from 'zod';
import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js';
import { ChatContext } from '../../llm/chat_context.js';
Expand Down Expand Up @@ -961,14 +962,30 @@ export type MockToolFn = (...args: any[]) => any;
/** Map from agent constructor to a record of mocked tools by name. */
export type MockToolsMap = Map<AgentConstructor, Record<string, MockToolFn>>;

/** @internal */
export let activeMockTools: MockToolsMap | undefined;
/**
* Per-async-context storage for the active mock tool registry. Using
* {@link AsyncLocalStorage} (rather than a module-level mutable global) isolates the
* registry to the async context that installed it, so overlapping/concurrent tests
* cannot clobber each other's mock maps. This mirrors Python's per-async-context
* `ContextVar` (`_MockToolsContextVar`).
*/
const mockToolsStorage = new AsyncLocalStorage<MockToolsMap>();

/**
* Returns the mock tool registry active in the current async context, if any.
*
* @internal
*/
export function getActiveMockTools(): MockToolsMap | undefined {
return mockToolsStorage.getStore();
}

/** @internal */
export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefined {
if (!activeMockTools) return undefined;
const active = getActiveMockTools();
if (!active) return undefined;

for (const [agentConstructor, mocks] of activeMockTools) {
for (const [agentConstructor, mocks] of active) {
if (agent.constructor === agentConstructor) {
return mocks[toolName];
}
Expand All @@ -984,7 +1001,9 @@ export function getMockTool(agent: Agent, toolName: string): MockToolFn | undefi
* the enclosing block exits.
*
* Mirrors the Python `mock_tools` context manager, adapted to JS via the explicit
* resource management `using` syntax (Python uses `with`).
* resource management `using` syntax (Python uses `with`). The registry is stored in
* an {@link AsyncLocalStorage}, so the binding is isolated to the current async
* context — matching the per-async-context isolation Python gets from `ContextVar`.
*
* @param agent - The Agent constructor whose tools should be mocked.
* @param mocks - A record mapping tool name to a mock implementation.
Expand All @@ -1006,14 +1025,16 @@ export function withMockTools(
agent: AgentConstructor,
mocks: Record<string, MockToolFn>,
): Disposable {
const previous = activeMockTools;
const previous = getActiveMockTools();
const updated: MockToolsMap = new Map(previous ?? []);
updated.set(agent, mocks);
activeMockTools = updated;
// `enterWith` mutates the current async context in place, preserving the synchronous
// enter/exit ergonomics of `using` while still isolating the registry per async context.
mockToolsStorage.enterWith(updated);
Comment thread
toubatbrian marked this conversation as resolved.

return {
[Symbol.dispose]() {
activeMockTools = previous;
mockToolsStorage.enterWith(previous as MockToolsMap);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Unsafe cast of undefined to MockToolsMap in dispose can pass undefined to enterWith

When withMockTools is called without a pre-existing store, previous is undefined. In the Symbol.dispose handler at line 1037, mockToolsStorage.enterWith(previous as MockToolsMap) passes undefined (cast to MockToolsMap) to enterWith. While AsyncLocalStorage<T>.enterWith() expects a value of type T (here MockToolsMap), it receives undefined. At runtime this happens to work because getStore() returns undefined in both cases, but it violates the enterWith API contract and could break if Node.js ever adds runtime validation. The fix should use mockToolsStorage.disable() when previous is undefined, or change the storage type to AsyncLocalStorage<MockToolsMap | undefined>.

Suggested change
mockToolsStorage.enterWith(previous as MockToolsMap);
if (previous) {
mockToolsStorage.enterWith(previous);
} else {
mockToolsStorage.disable();
}
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

},
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
};
Comment thread
toubatbrian marked this conversation as resolved.
}
Expand Down