diff --git a/README.md b/README.md
index ef9fbea..a36e491 100644
--- a/README.md
+++ b/README.md
@@ -1,219 +1,312 @@
 # @metamask/client-mcp-core
 
-MCP (Model Context Protocol) server for MetaMask Extension visual testing with LLM agents.
+HTTP daemon and CLI architecture for agent-driven browser extension testing with Playwright.
 
 ## Overview
 
-This package provides the core MCP server infrastructure for enabling LLM agents to interact with the MetaMask browser extension through Playwright.
+This package provides the core infrastructure for enabling LLM agents to interact with browser extensions through Playwright. It ships a persistent HTTP daemon that manages browser lifecycle and a unified `mm` CLI that agents (and developers) use to drive sessions.
+
+The design is **consumer-agnostic**: the core handles protocol, tooling, and knowledge — consumers provide extension-specific logic by implementing the `ISessionManager` interface and injecting capabilities.
+
+```
+                         ┌─────────────────────────────────┐
+                         │         LLM Agent / Dev         │
+                         └────────────┬────────────────────┘
+                                      │  mm CLI commands
+                                      ▼
+                         ┌─────────────────────────────────┐
+                         │     mm CLI  (src/cli/mm.ts)     │
+                         │  discover / auto-start daemon   │
+                         └────────────┬────────────────────┘
+                                      │  HTTP (127.0.0.1)
+                                      ▼
+  ┌───────────────────────────────────────────────────────────────────┐
+  │                    HTTP Daemon (createServer)                     │
+  │                                                                   │
+  │  ┌──────────┐  ┌──────────────┐  ┌────────────┐  ┌────────────┐ │
+  │  │  Routes   │  │ RequestQueue │  │   Tool     │  │ Knowledge  │ │
+  │  │ /health   │  │ (async mutex)│  │  Registry  │  │   Store    │ │
+  │  │ /status   │  │              │  │  25+ tools │  │            │ │
+  │  │ /launch   │  └──────────────┘  └─────┬──────┘  └────────────┘ │
+  │  │ /cleanup  │                          │                         │
+  │  │ /tool/:n  │                          ▼                         │
+  │  └──────────┘               ┌──────────────────┐                 │
+  │                             │   ToolContext     │                 │
+  │                             │  sessionManager   │                 │
+  │                             │  page / refMap    │                 │
+  │                             │  workflowContext  │                 │
+  │                             │  knowledgeStore   │                 │
+  │                             └────────┬─────────┘                 │
+  └──────────────────────────────────────┼───────────────────────────┘
+                                         │
+                   ┌─────────────────────┼─────────────────────┐
+                   │          ISessionManager                   │
+                   │       (consumer implementation)            │
+                   │                                            │
+                   │  Session lifecycle   Page management       │
+                   │  Extension state     A11y reference map    │
+                   │  Navigation          Screenshots           │
+                   │  Capabilities (opt)  Environment config    │
+                   └─────────────────────┬─────────────────────┘
+                                         │
+                   ┌─────────────────────┼─────────────────────┐
+                   │          WorkflowContext                   │
+                   │                                            │
+                   │  build?            fixture?                │
+                   │  chain?            contractSeeding?        │
+                   │  stateSnapshot?    mockServer?             │
+                   │  config: EnvironmentConfig                 │
+                   └─────────────────────┬─────────────────────┘
+                                         │
+                                         ▼
+                   ┌───────────────────────────────────────────┐
+                   │        Playwright  →  Chrome Browser      │
+                   │            Browser Extension               │
+                   └───────────────────────────────────────────┘
+```
 
 ## Requirements
 
-- **Node.js ^20 || ^22 || >=24** (required)
-- **TypeScript >=5.0** (for consumer type definitions)
-- Playwright `^1.49.0` (peer dependency)
+- **Node.js** `^20 || ^22 || >=24`
+- **TypeScript** `>=5.0` (for consumer type definitions)
+- **Playwright** `^1.49.0` (peer dependency)
 
 ## Installation
 
+As a project dependency (the CLI is available via `npx mm` or `yarn mm`):
+
 ```bash
 yarn add @metamask/client-mcp-core
 ```
 
-## Architecture
+As a global CLI (puts `mm` directly on your PATH — recommended for LLM agents):
+
+```bash
+npm install -g @metamask/client-mcp-core
+```
+
+The global CLI can target any project via `--project` or `MM_PROJECT` (see [Project Targeting](#project-targeting)).
+
+## Getting Started
 
-### High-Level Overview
+Consuming this package requires two things: a **daemon entry point** and a **configuration file**.
+
+### 1. Create a daemon entry point
+
+```typescript
+// daemon.ts
+import { createServer, allocatePort } from '@metamask/client-mcp-core';
+import { MySessionManager } from './my-session-manager';
+import { createMyContext } from './my-context';
+
+const server = createServer({
+  sessionManager: new MySessionManager(),
+  contextFactory: async () => {
+    // Consumer owns port allocation — use the allocatePort() helper
+    // or any other strategy that fits your infrastructure.
+    const anvil = await allocatePort();
+    const fixture = await allocatePort();
+    await Promise.all([
+      new Promise<void>((r) => anvil.server.close(() => r())),
+      new Promise<void>((r) => fixture.server.close(() => r())),
+    ]);
+
+    return createMyContext({
+      ports: { anvil: anvil.port, fixture: fixture.port },
+    });
+  },
+});
 
+server.start().then((state) => {
+  console.error(`Daemon started on port ${state.port}`);
+});
 ```
-┌─────────────────────────────────────────────────────────────────────────┐
-│                           LLM Agent                                     │
-│                    (Claude, GPT, etc.)                                  │
-└─────────────────────────────────────────────────────────────────────────┘
-                                  │
-                                  │ MCP Protocol (stdio)
-                                  ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                    @metamask/client-mcp-core                     │
-│                                                                         │
-│  Core MCP Server + Generic Tools                                        │
-│  - Session management                                                   │
-│  - Element interaction (click, type, wait)                              │
-│  - Discovery (testIds, accessibility tree)                              │
-│  - Screenshots                                                          │
-│  - Knowledge store (cross-session learning)                             │
-└─────────────────────────────────────────────────────────────────────────┘
-                                  │
-                                  │ Capability Injection
-                                  ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                   MetaMask Extension Provider                           │
-│                                                                         │
-│  - Build capability (yarn build:test)                                   │
-│  - Fixture/state management                                             │
-│  - Anvil blockchain integration                                         │
-│  - Contract seeding                                                     │
-└─────────────────────────────────────────────────────────────────────────┘
-                                  │
-                                  │ Playwright
-                                  ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│                    Headed Chrome Browser                                │
-│                    + MetaMask Extension                                 │
-└─────────────────────────────────────────────────────────────────────────┘
+
+### 2. Create a configuration file
+
+Create `mm-client-cli.config.ts` in your project root:
+
+```typescript
+export default {
+  daemon: 'path/to/daemon.ts',
+  runtime: 'tsx',
+};
 ```
 
-### Detailed Architecture
+The `daemon` field tells the CLI where the daemon entry point lives. The `runtime` field specifies the TypeScript runner (defaults to `tsx`).
 
-The package follows a **capability-based dependency injection** pattern that separates concerns between:
+The CLI uses [cosmiconfig](https://github.com/cosmiconfig/cosmiconfig) for config discovery, so you can also use `mm-client-cli.config.js`, `.mm-client-clirc.json`, or other supported formats.
 
-1. **Core MCP Server** - Protocol handling, tool routing, and generic browser interactions
-2. **Session Manager Interface** - Abstract contract for extension-specific session management
-3. **Capabilities** - Optional features injected by consumer implementations
+### 3. Use the CLI
 
+```bash
+mm launch              # auto-starts daemon, opens browser session
+mm describe-screen     # get element references
+mm click e3            # interact using a11y refs
+mm cleanup --shutdown  # stop browser and daemon
 ```
-┌─────────────────────────────────────────────────────────────────────────┐
-│                         createMcpServer()                               │
-│                                                                         │
-│  ┌─────────────────────┐    ┌─────────────────────────────────────┐    │
-│  │   Tool Definitions  │───▶│         Tool Handlers               │    │
-│  │   (mm_click, etc.)  │    │   (registry.ts + individual tools)  │    │
-│  └─────────────────────┘    └──────────────┬──────────────────────┘    │
-│                                            │                            │
-│                                            ▼                            │
-│  ┌─────────────────────────────────────────────────────────────────┐   │
-│  │                    ISessionManager Interface                     │   │
-│  │  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌───────────┐  │   │
-│  │  │ Page Mgmt   │ │ Navigation  │ │ Screenshots │ │ A11y Refs │  │   │
-│  │  └─────────────┘ └─────────────┘ └─────────────┘ └───────────┘  │   │
-│  │  ┌──────────────────────────────────────────────────────────┐   │   │
-│  │  │              Optional Capabilities                        │   │   │
-│  │  │  • BuildCapability      • FixtureCapability              │   │   │
-│  │  │  • ChainCapability      • ContractSeedingCapability      │   │   │
-│  │  │  • StateSnapshotCapability                               │   │   │
-│  │  └──────────────────────────────────────────────────────────┘   │   │
-│  └─────────────────────────────────────────────────────────────────┘   │
-└─────────────────────────────────────────────────────────────────────────┘
-                                  │
-                                  │ setSessionManager()
-                                  ▼
-┌─────────────────────────────────────────────────────────────────────────┐
-│              Consumer Implementation (e.g., MetaMask)                   │
-│                                                                         │
-│  class MetaMaskSessionManager implements ISessionManager {              │
-│    // Browser context, page tracking, extension-specific logic          │
-│    // Capability implementations for build, fixtures, chain, etc.       │
-│  }                                                                      │
-└─────────────────────────────────────────────────────────────────────────┘
+
+If running from outside the project directory (e.g., a parent folder containing multiple repos):
+
+```bash
+mm --project ./my-extension launch
+mm --project ./my-extension describe-screen
+
+# Or set once via environment variable
+export MM_PROJECT=/path/to/my-extension
+mm launch
 ```
 
-### Core Components
+## Core Concepts
 
-| Component             | Description                                                    |
-| --------------------- | -------------------------------------------------------------- |
-| `createMcpServer()`   | Factory function that creates the MCP server instance          |
-| `ISessionManager`     | Interface that consumers must implement for session management |
-| `setSessionManager()` | Injects the consumer's session manager into the core           |
-| `WorkflowContext`     | Container for browser capability and optional capabilities     |
-| `EnvironmentConfig`   | Configuration discriminated by `'e2e'` or `'prod'` mode        |
+### Daemon Model
 
-### Capability System
+The architecture relies on a persistent background HTTP daemon that manages the browser lifecycle:
 
-The package defines several capabilities that consumers can provide.
+- **Worktree Isolation**: Each git worktree runs its own daemon instance, tracked via a `.mm-server` state file in the project root. This allows parallel work across branches.
+- **Port Allocation**: The daemon allocates its own HTTP port automatically. Sub-service ports (Anvil, fixture server, etc.) are allocated by the consumer's `contextFactory` and reported back via `allocatedPorts`. The `allocatePort()` helper is exported for convenience.
+- **Auto-Start**: The daemon starts automatically on `mm launch` if not already running, and shuts down after a period of inactivity (default: 30 minutes).
+- **Request Serialization**: A `RequestQueue` (async mutex) ensures only one tool executes at a time, preventing race conditions on shared browser state.
+- **Health Checks**: Each daemon generates a unique nonce on startup. The CLI verifies daemon identity via `GET /health` to detect stale `.mm-server` files from crashed processes.
+- **Logs**: Daemon activity is logged to `.mm-daemon.log`.
 
-#### BuildCapability (Optional)
+### Session Manager Interface
 
-Enables the `mm_build` tool. Implement this to allow LLM agents to build the extension from source.
+`ISessionManager` is the core abstraction boundary between this package and consumer implementations. Consumers must implement this interface to provide extension-specific browser control.
 
 ```typescript
-type BuildCapability = {
-  // Build the extension (e.g., yarn build:test)
-  build(options?: BuildOptions): Promise<BuildResult>;
+type ISessionManager = {
+  // Session Lifecycle
+  hasActiveSession(): boolean;
+  getSessionId(): string | undefined;
+  launch(input: SessionLaunchInput): Promise<SessionLaunchResult>;
+  cleanup(): Promise<boolean>;
 
-  // Get path to built extension directory
-  getExtensionPath(): string;
+  // Page Management
+  getPage(): Page;
+  setActivePage(page: Page): void;
+  getTrackedPages(): TrackedPage[];
+  classifyPageRole(page: Page, extensionId?: string): TabRole;
+  getContext(): BrowserContext;
 
-  // Check if extension is already built
-  isBuilt(): Promise<boolean>;
-};
+  // Extension State
+  getExtensionState(): Promise<ExtensionState>;
+
+  // A11y Reference Map
+  setRefMap(map: Map<string, string>): void;
+  getRefMap(): Map<string, string>;
+  resolveA11yRef(ref: string): string | undefined;
 
-type BuildOptions = {
-  buildType?: string; // e.g., "build:test"
-  force?: boolean; // Force rebuild even if exists
+  // Navigation
+  navigateToHome(): Promise<void>;
+  navigateToSettings(): Promise<void>;
+  navigateToUrl(url: string): Promise<Page>;
+  navigateToNotification(): Promise<Page>;
+  waitForNotificationPage(timeoutMs: number): Promise<Page>;
+
+  // Screenshots
+  screenshot(options: SessionScreenshotOptions): Promise<ScreenshotResult>;
+
+  // Capabilities (optional, extension-specific)
+  getBuildCapability(): BuildCapability | undefined;
+  getFixtureCapability(): FixtureCapability | undefined;
+  getChainCapability(): ChainCapability | undefined;
+  getContractSeedingCapability(): ContractSeedingCapability | undefined;
+  getStateSnapshotCapability(): StateSnapshotCapability | undefined;
+
+  // Environment
+  getEnvironmentMode(): EnvironmentMode;
+  setContext(context: 'e2e' | 'prod', options?: Record<string, unknown>): void;
+  getContextInfo(): { currentContext: 'e2e' | 'prod'; ... };
 };
+```
 
-type BuildResult = {
-  success: boolean;
-  extensionPath: string;
-  durationMs: number;
-  error?: string;
+### Workflow Context & Capabilities
+
+The `WorkflowContext` aggregates optional capabilities that consumers inject through the `contextFactory`. The tool system checks for capabilities at runtime — tools that depend on missing capabilities return clear errors.
+
+```typescript
+type WorkflowContext = {
+  build?: BuildCapability;
+  fixture?: FixtureCapability;
+  chain?: ChainCapability;
+  contractSeeding?: ContractSeedingCapability;
+  stateSnapshot?: StateSnapshotCapability;
+  mockServer?: MockServerCapability;
+  config: EnvironmentConfig;
+  allocatedPorts?: PortMap; // reported to /status and persisted in .mm-server
 };
 ```
 
----
+Capabilities are created by the consumer's `contextFactory` function. The factory is responsible for allocating any sub-service ports it needs (the `allocatePort()` helper is exported for convenience):
 
-#### FixtureCapability (Optional)
+```typescript
+async function createMyContext(options: {
+  ports: { anvil: number; fixture: number };
+}): Promise<WorkflowContext> {
+  return {
+    build: new MyBuildCapability(),
+    fixture: new MyFixtureCapability(options.ports.fixture),
+    chain: new MyChainCapability(options.ports.anvil),
+    allocatedPorts: {
+      anvil: options.ports.anvil,
+      fixture: options.ports.fixture,
+    },
+    config: {
+      environment: 'e2e',
+      extensionName: 'MyExtension',
+      defaultPassword: 'test-password',
+      artifactsDir: './test-artifacts',
+      defaultChainId: 1337,
+      ports: {
+        anvil: options.ports.anvil,
+        fixtureServer: options.ports.fixture,
+      },
+    },
+  };
+}
+```
 
-Enables wallet state management through fixtures. Essential for E2E testing where you need reproducible wallet states.
+### Capability Reference
+
+| Capability                  | Purpose                                 | Enables Tools                                                               |
+| --------------------------- | --------------------------------------- | --------------------------------------------------------------------------- |
+| `BuildCapability`           | Build extension from source             | `build`                                                                     |
+| `FixtureCapability`         | Manage wallet state via fixtures        | `launch` (state modes)                                                      |
+| `ChainCapability`           | Local blockchain (Anvil) lifecycle      | Chain interactions                                                          |
+| `ContractSeedingCapability` | Deploy smart contracts to Anvil         | `seed_contract`, `seed_contracts`, `get_contract_address`, `list_contracts` |
+| `StateSnapshotCapability`   | Read extension state and detect screens | `get_state`                                                                 |
+| `MockServerCapability`      | HTTP mock server for API stubbing       | Mock-dependent tests                                                        |
+
+Each capability interface is defined in `src/capabilities/types.ts`:
 
 ```typescript
+type BuildCapability = {
+  build(options?: BuildOptions): Promise<BuildResult>;
+  getExtensionPath(): string;
+  isBuilt(): Promise<boolean>;
+};
+
 type FixtureCapability = {
-  // Start fixture server with given wallet state
   start(state: WalletState): Promise<void>;
-
-  // Stop fixture server
   stop(): Promise<void>;
-
-  // Get default pre-onboarded wallet state (25 ETH, unlocked)
   getDefaultState(): WalletState;
-
-  // Get fresh onboarding state (no wallet configured)
   getOnboardingState(): WalletState;
-
-  // Resolve a named preset to fixture data
   resolvePreset(presetName: string): WalletState;
 };
 
-type WalletState = {
-  data: Record<string, unknown>; // Extension storage state
-  meta?: { version: number };
-};
-```
-
----
-
-#### ChainCapability (Optional)
-
-Manages local blockchain (Anvil) for E2E testing. Required for contract interactions.
-
-```typescript
 type ChainCapability = {
-  // Start the local Anvil node
   start(): Promise<void>;
-
-  // Stop the Anvil node
   stop(): Promise<void>;
-
-  // Check if Anvil is running
   isRunning(): boolean;
-
-  // Set the port for the Anvil node
   setPort(port: number): void;
 };
-```
-
----
 
-#### ContractSeedingCapability (Optional)
-
-Enables smart contract deployment tools (`mm_seed_contract`, `mm_seed_contracts`, etc.).
-
-```typescript
 type ContractSeedingCapability = {
-  // Deploy a single contract
   deployContract(
     name: string,
     options?: DeployOptions,
   ): Promise<ContractDeployment>;
-
-  // Deploy multiple contracts in sequence
   deployContracts(
     names: string[],
     options?: DeployOptions,
@@ -221,1264 +314,317 @@ type ContractSeedingCapability = {
     deployed: ContractDeployment[];
     failed: { name: string; error: string }[];
   }>;
-
-  // Get deployed contract address by name
   getContractAddress(name: string): string | null;
-
-  // List all deployed contracts in this session
   listDeployedContracts(): ContractInfo[];
-
-  // Get available contract names
   getAvailableContracts(): string[];
-
-  // Clear the deployment registry
   clearRegistry(): void;
-
-  // Initialize the capability (called during session launch)
   initialize(): void;
 };
 
-type DeployOptions = {
-  hardfork?: string; // EVM hardfork (default: "prague")
-  deployerOptions?: {
-    fromAddress?: string; // Impersonate address
-    fromPrivateKey?: string; // Deploy from specific key
-  };
-};
-```
-
----
-
-#### StateSnapshotCapability (Optional)
-
-```typescript
 type StateSnapshotCapability = {
-  // Get detailed state snapshot
   getState(page: Page, options: StateOptions): Promise<StateSnapshot>;
-
-  // Detect current screen from page content
   detectCurrentScreen(page: Page): Promise<string>;
 };
 
-type StateOptions = {
-  extensionId?: string;
-  chainId?: number;
-};
-```
-
----
-
-#### MockServerCapability (Optional)
-
-Enables mock server for API testing scenarios.
-
-```typescript
 type MockServerCapability = {
-  // Start the mock server
   start(): Promise<void>;
-
-  // Stop the mock server
   stop(): Promise<void>;
-
-  // Check if mock server is running
   isRunning(): boolean;
-
-  // Get the server instance
   getServer(): unknown;
-
-  // Get the port the server is running on
   getPort(): number;
 };
 ```
 
-## Client Integration
-
-### How to Consume the Package
-
-Consumers must:
-
-1. **Implement `ISessionManager`** - The core interface for session management
-2. **Inject the session manager** - Call `setSessionManager()` before starting the server
-3. **Start the MCP server** - Call `server.start()`
-
-### McpServerConfig
-
-The `createMcpServer()` function accepts a configuration object:
+### Tool System
 
-```typescript
-export type McpServerConfig = {
-  name: string;
-  version: string;
-  onCleanup?: () => Promise<void>;
-  logger?: (message: string) => void;
-};
-```
-
-### Minimal Integration Example
+Tools are standalone functions registered in a central `toolRegistry`. Each tool receives a `ToolContext` and returns a `ToolResponse`.
 
 ```typescript
-import {
-  createMcpServer,
-  setSessionManager,
-  ISessionManager,
-  type McpServerConfig,
-} from '@metamask/client-mcp-core';
-
-// 1. Implement the ISessionManager interface
-class MyExtensionSessionManager implements ISessionManager {
-  // ... implement all required methods
-  // See ISessionManager interface for full contract
-}
-
-// 2. Create and inject your session manager
-const sessionManager = new MyExtensionSessionManager();
-setSessionManager(sessionManager);
+type ToolFunction<TParams, TResult> = (
+  params: TParams,
+  context: ToolContext,
+) => Promise<ToolResponse<TResult>>;
 
-// 3. Create and start the MCP server
-const config: McpServerConfig = {
-  name: 'my-extension-mcp',
-  version: '1.0.0',
-  onCleanup: async () => {
-    // Optional cleanup logic
-  },
+type ToolContext = {
+  sessionManager: ISessionManager;
+  page: Page;
+  refMap: Map<string, string>;
+  workflowContext: WorkflowContext;
+  knowledgeStore: KnowledgeStore;
 };
-
-const server = createMcpServer(config);
-await server.start();
 ```
 
-### Full Integration Example
-
-```typescript
-import {
-  createMcpServer,
-  setSessionManager,
-  ISessionManager,
-  SessionLaunchInput,
-  SessionLaunchResult,
-  TrackedPage,
-  type ExtensionState,
-  type BuildCapability,
-  type FixtureCapability,
-  type ChainCapability,
-  type ContractSeedingCapability,
-  type EnvironmentMode,
-} from '@metamask/client-mcp-core';
-import type { Page, BrowserContext } from '@playwright/test';
-
-class MetaMaskSessionManager implements ISessionManager {
-  private context?: BrowserContext;
-  private activePage?: Page;
-  private extensionId?: string;
-  private sessionId?: string;
-  private refMap = new Map<string, string>();
-
-  // Capabilities (inject via constructor or lazy-load)
-  private buildCapability?: BuildCapability;
-  private fixtureCapability?: FixtureCapability;
-  private chainCapability?: ChainCapability;
-  private contractSeedingCapability?: ContractSeedingCapability;
-
-  // Session Lifecycle
-  hasActiveSession(): boolean {
-    return this.context !== undefined;
-  }
-
-  getSessionId(): string | undefined {
-    return this.sessionId;
-  }
-
-  async launch(input: SessionLaunchInput): Promise<SessionLaunchResult> {
-    // 1. Start local chain if needed
-    if (this.chainCapability) {
-      await this.chainCapability.start();
-    }
-
-    // 2. Start fixture server if needed
-    if (this.fixtureCapability && input.stateMode !== 'onboarding') {
-      const fixture = input.fixture ?? this.fixtureCapability.getDefaultState();
-      await this.fixtureCapability.start(fixture);
-    }
-
-    // 3. Launch browser with extension
-    // ... Playwright browser launch logic
-
-    // 4. Return session info
-    return {
-      sessionId: this.sessionId!,
-      extensionId: this.extensionId!,
-      state: await this.getExtensionState(),
-    };
-  }
-
-  async cleanup(): Promise<boolean> {
-    if (!this.hasActiveSession()) return false;
-
-    // Close browser, stop services
-    await this.context?.close();
-    await this.chainCapability?.stop();
-    await this.fixtureCapability?.stop();
-
-    this.context = undefined;
-    this.activePage = undefined;
-    return true;
-  }
-
-  // Page Management
-  getPage(): Page {
-    if (!this.activePage) throw new Error('No active session');
-    return this.activePage;
-  }
-
-  setActivePage(page: Page): void {
-    this.activePage = page;
-  }
-
-  getTrackedPages(): TrackedPage[] {
-    // Return all tracked pages with roles
-    return [];
-  }
+The daemon routes `POST /tool/:name` requests through the registry, applies Zod validation on inputs, executes the tool through the request queue, and captures observations (extension state, test IDs, a11y snapshot) after each execution.
 
-  getContext(): BrowserContext {
-    if (!this.context) throw new Error('No active session');
-    return this.context;
-  }
+**Registered tools:**
 
-  // Extension State
-  async getExtensionState(): Promise<ExtensionState> {
-    // Query extension for current state
-    return {
-      isLoaded: true,
-      currentUrl: this.activePage?.url() ?? '',
-      extensionId: this.extensionId ?? '',
-      isUnlocked: false,
-      currentScreen: 'unknown',
-      accountAddress: null,
-      networkName: null,
-      chainId: null,
-      balance: null,
-    };
-  }
+| Tool                     | Description                                                                                                                                                                                                                                                                                                                                                                       |
+| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Lifecycle**            |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `build`                  | Triggers an extension build using the configured `BuildCapability`. Accepts build type and force options.                                                                                                                                                                                                                                                                         |
+| `launch`                 | Launches a new browser session with the configured extension. Supports state modes (`default`, `onboarding`, `custom`), fixture presets, goal/tag metadata, and optional contract seeding on start.                                                                                                                                                                               |
+| `cleanup`                | Tears down the active browser session and cleans up all resources (browser, services, fixtures).                                                                                                                                                                                                                                                                                  |
+| **Interaction**          |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `click`                  | Clicks an element identified by a11y ref, test ID, or CSS selector. Waits for the element to be visible before clicking. Supports `within` to scope the target inside a parent element.                                                                                                                                                                                           |
+| `type`                   | Types text into an input element identified by a11y ref, test ID, or CSS selector. Clears the field first, then sets the new value (uses Playwright's `fill()`). Supports `within` scoping.                                                                                                                                                                                       |
+| `wait_for`               | Waits for an element to become visible on the page within a configurable timeout. Supports `within` to scope the target inside a parent element.                                                                                                                                                                                                                                  |
+| `get_text`               | Reads the text content of an element identified by a11y ref, test ID, or CSS selector. Returns the text, target descriptor, and character length. Supports `within` scoping. Categorized as read-only (no observations in response).                                                                                                                                              |
+| `clipboard`              | Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses.                                                                                                                                                                                                                                                  |
+| **Navigation**           |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `navigate`               | Navigates the browser to a named screen (`home`, `settings`, `notification`) or an arbitrary URL.                                                                                                                                                                                                                                                                                 |
+| `switch_to_tab`          | Switches the active page to a tab matching a given role (e.g., `extension`, `dapp`) or URL prefix.                                                                                                                                                                                                                                                                                |
+| `close_tab`              | Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed.                                                                                                                                                                                                                                                                   |
+| `wait_for_notification`  | Waits for the extension notification popup to appear within a timeout. Returns the notification page URL.                                                                                                                                                                                                                                                                         |
+| **Discovery**            |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `describe_screen`        | Captures a comprehensive screen snapshot: extension state, visible test IDs, trimmed a11y tree with refs, optional screenshot, and prior knowledge from historical sessions.                                                                                                                                                                                                      |
+| `accessibility_snapshot` | Captures a trimmed accessibility tree of the current page with deterministic refs (`e1`, `e2`, ...). Supports scoping to a root CSS selector.                                                                                                                                                                                                                                     |
+| `list_testids`           | Collects all visible `data-testid` attributes from the current page with text previews and visibility status.                                                                                                                                                                                                                                                                     |
+| **State**                |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `get_state`              | Retrieves the current extension state (URL, screen, network, balance, account) and tracked tab information.                                                                                                                                                                                                                                                                       |
+| `get_context`            | Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed.                                                                                                                                                                                                                                      |
+| `set_context`            | Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active.                                                                                                                                                                                                                                                                               |
+| **Screenshots**          |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `screenshot`             | Captures a screenshot of the current page. Supports naming, full-page capture, scoping to a CSS selector, and optional base64 output.                                                                                                                                                                                                                                             |
+| **Knowledge**            |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `knowledge_last`         | Retrieves the N most recent step records from the knowledge store, with optional scope and filter parameters.                                                                                                                                                                                                                                                                     |
+| `knowledge_search`       | Searches step records by query string with token-based matching and synonym expansion. Scores results by relevance to screen, URL, test IDs, and a11y nodes.                                                                                                                                                                                                                      |
+| `knowledge_summarize`    | Generates a recipe-style summary of a session's tool invocations, showing the step sequence with targets and outcomes.                                                                                                                                                                                                                                                            |
+| `knowledge_sessions`     | Lists available knowledge sessions with metadata (goal, flow tags, timestamps), with optional filtering.                                                                                                                                                                                                                                                                          |
+| **Contracts**            |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `seed_contract`          | Deploys a single smart contract to the local Anvil chain by name. Requires `ContractSeedingCapability`.                                                                                                                                                                                                                                                                           |
+| `seed_contracts`         | Deploys multiple smart contracts in sequence. Returns both successful deployments and individual failures.                                                                                                                                                                                                                                                                        |
+| `get_contract_address`   | Looks up the deployed address of a contract by name from the session's deployment registry.                                                                                                                                                                                                                                                                                       |
+| `list_contracts`         | Lists all contracts deployed in the current session with addresses and deployment timestamps.                                                                                                                                                                                                                                                                                     |
+| **Batching**             |                                                                                                                                                                                                                                                                                                                                                                                   |
+| `run_steps`              | Executes a batch of tool invocations sequentially. Supports `stopOnError` to halt on first failure, `includeObservations` (`'all'`, `'none'`, `'failures'`) to control observations, and `batchTimeoutMs` to set an overall deadline (remaining steps are skipped on timeout). Accepts tool aliases like `navigate_home` / `navigate-home`. Returns per-step results with timing. |
 
-  // A11y Reference Map
-  setRefMap(map: Map<string, string>): void {
-    this.refMap = map;
-  }
+### Accessibility References
 
-  getRefMap(): Map<string, string> {
-    return this.refMap;
-  }
+The core uses Playwright's `ariaSnapshot()` to build a deterministic reference map of interactive elements. Each element gets a short ref like `e1`, `e2`, etc., mapped to an ARIA selector.
 
-  clearRefMap(): void {
-    this.refMap.clear();
-  }
-
-  resolveA11yRef(ref: string): string | undefined {
-    return this.refMap.get(ref);
-  }
-
-  // Navigation
-  async navigateToHome(): Promise<void> {
-    // Navigate to extension home page
-  }
+Agents call `describe_screen` to get the current reference map, then use refs for interaction:
 
-  async navigateToSettings(): Promise<void> {
-    // Navigate to extension settings page
-  }
+```
+mm describe-screen    → { ..., a11y: [{ ref: "e1", role: "button", name: "Submit" }, ...] }
+mm click e1           → clicks the "Submit" button
+mm type e3 "hello"    → types into the element mapped to e3
+```
 
-  async navigateToUrl(url: string): Promise<Page> {
-    // Open URL in new tab and return the page
-    return this.activePage!;
-  }
+This accessibility-first approach provides reliable element targeting that survives minor UI changes.
 
-  async navigateToNotification(): Promise<Page> {
-    // Navigate to notification page
-    return this.activePage!;
-  }
+### Knowledge Store
 
-  async waitForNotificationPage(timeoutMs: number): Promise<Page> {
-    // Wait for notification popup to appear
-    return this.activePage!;
-  }
+The `KnowledgeStore` provides cross-session learning by recording every tool execution as a structured step record:
 
-  // Screenshots
-  async screenshot(options: { name: string; fullPage?: boolean }) {
-    // ... screenshot logic
-    return { path: '', base64: '', width: 0, height: 0 };
-  }
-
-  // Capabilities
-  getBuildCapability() {
-    return this.buildCapability;
-  }
-  getFixtureCapability() {
-    return this.fixtureCapability;
-  }
-  getChainCapability() {
-    return this.chainCapability;
-  }
-  getContractSeedingCapability() {
-    return this.contractSeedingCapability;
-  }
-  getStateSnapshotCapability() {
-    return undefined;
-  }
+- **Step Recording**: Each tool invocation captures the tool name, input, outcome, observation (extension state, visible test IDs, a11y nodes), and timing.
+- **Session Metadata**: Sessions are tagged with goals, flow tags, and free-form tags for filtering.
+- **Prior Knowledge**: Before tool execution, the store can generate context from historical sessions — similar steps, suggested actions, and patterns to avoid — based on the current screen state.
+- **Search**: Token-based search with synonym expansion across sessions, scored by relevance to screen, URL, test IDs, and a11y nodes.
+- **Sensitive Data Handling**: Input text for password fields and other sensitive inputs is automatically redacted.
 
-  // Environment
-  getEnvironmentMode(): EnvironmentMode {
-    return 'e2e';
-  }
-
-  // Required by interface but implementation-specific
-  classifyPageRole(
-    page: Page,
-  ): 'extension' | 'notification' | 'dapp' | 'other' {
-    return 'extension';
-  }
-  getSessionState() {
-    return undefined;
-  }
-  getSessionMetadata() {
-    return undefined;
-  }
-
-  // Context Management
-  setContext(context: 'e2e' | 'prod', options?: Record<string, unknown>): void {
-    if (this.hasActiveSession()) {
-      throw new Error('Cannot switch context while session is active');
-    }
-    // Switch environment context and apply optional context-specific config
-    void options;
-  }
-
-  getContextInfo() {
-    return {
-      currentContext: this.getEnvironmentMode(),
-      hasActiveSession: this.hasActiveSession(),
-      sessionId: this.sessionId ?? null,
-      capabilities: {
-        available: [
-          this.buildCapability && 'build',
-          this.fixtureCapability && 'fixture',
-          this.chainCapability && 'chain',
-          this.contractSeedingCapability && 'contractSeeding',
-        ].filter(Boolean) as string[],
-      },
-      canSwitchContext: !this.hasActiveSession(),
-    };
-  }
-}
+Knowledge artifacts are stored on disk at `test-artifacts/llm-knowledge/` organized by session ID.
 
-// Bootstrap the server
-async function main() {
-  const sessionManager = new MetaMaskSessionManager();
-  setSessionManager(sessionManager);
+### Environment Modes
 
-  const server = createMcpServer({
-    name: 'metamask-mcp',
-    version: '1.0.0',
-  });
+The package supports two environment modes via discriminated union configuration:
 
-  await server.start();
-}
-
-main().catch(console.error);
-```
-
-### Environment Configuration
-
-The package supports two environment modes:
+**E2E Testing** — Full test infrastructure with local chain, fixtures, and contract seeding:
 
 ```typescript
-// E2E Testing Environment
 const e2eConfig: E2EEnvironmentConfig = {
   environment: 'e2e',
   extensionName: 'MetaMask',
   defaultPassword: 'password123',
-  toolPrefix: 'mm',
   artifactsDir: './test-artifacts',
   defaultChainId: 1337,
-  ports: {
-    anvil: 8545,
-    fixtureServer: 12345,
-  },
+  ports: { anvil: 8545, fixtureServer: 12345 },
 };
+```
+
+**Production-like** — Minimal configuration without test infrastructure:
 
-// Production-like Environment
+```typescript
 const prodConfig: ProdEnvironmentConfig = {
   environment: 'prod',
   extensionName: 'MetaMask',
-  toolPrefix: 'mm',
 };
 ```
 
-### Context Switching Options
+Use `set_context` / `get_context` tools to switch between modes at runtime (requires no active session).
 
-`mm_set_context` supports an optional `options` payload that is forwarded to the session manager's `setContext(context, options)` implementation.
+## Server Configuration
 
-```typescript
-type SetContextInput = {
-  context: 'e2e' | 'prod';
-  options?: Record<string, unknown>;
-};
-
-// Example: switch to e2e and pass context-specific overrides
-await handleSetContext({
-  context: 'e2e',
-  options: {
-    mockServer: {
-      enabled: true,
-      port: 18000,
-    },
-  },
-});
-```
-
-Use `options` only for context-specific configuration your `ISessionManager` implementation understands.
-
-### Custom Tool Definitions
-
-The package provides a fixed set of tools prefixed with `mm_`. Custom tool injection is currently not supported. You can inspect the available tool definitions using `getToolDefinitions()`:
+The `createServer()` function accepts a `ServerConfig` object:
 
 ```typescript
-import { getToolDefinitions } from '@metamask/client-mcp-core';
-
-const tools = getToolDefinitions();
-console.log(`Available tools: ${tools.map((t) => t.name).join(', ')}`);
-```
-
-### Registering Custom Tool Handlers
-
-Custom tool handlers are not supported. The server uses a fixed set of handlers for the provided tools.
-
-## Available Tools
-
-All tools are prefixed with `mm_` and return a standardized response format:
-
-```typescript
-type ToolResponse<Result> =
-  | {
-      ok: true;
-      meta: {
-        timestamp: string; // ISO timestamp
-        sessionId?: string; // Current session ID
-        durationMs: number; // Operation duration
-      };
-      result: Result; // Success payload
-    }
-  | {
-      ok: false;
-      meta: {
-        timestamp: string;
-        sessionId?: string;
-        durationMs: number;
-      };
-      error: {
-        code: string;
-        message: string;
-        details?: Record<string, unknown>;
-      };
-    };
-```
-
----
-
-### Session Management Tools
-
-#### `mm_build`
-
-Build the extension from source. Requires `BuildCapability`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `buildType` | `"build:test"` | `"build:test"` | Build script to run |
-| `force` | `boolean` | `false` | Force rebuild even if build exists |
-
-**Output:**
-
-```typescript
-{
-  buildType: 'build:test';
-  extensionPathResolved: string; // Absolute path to built extension
-}
-```
-
-**Example:**
-
-```json
-{ "buildType": "build:test", "force": true }
-```
-
----
-
-#### `mm_launch`
-
-Launch a headed Chrome browser with the extension loaded. This is typically the first tool called.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `autoBuild` | `boolean` | `true` | Auto-build if extension not found |
-| `stateMode` | `"default" \| "onboarding" \| "custom"` | `"default"` | Wallet initialization mode |
-| `fixturePreset` | `string` | - | Named preset when `stateMode="custom"` |
-| `fixture` | `object` | - | Direct fixture object when `stateMode="custom"` |
-| `ports.anvil` | `number` | `8545` | Anvil RPC port |
-| `ports.fixtureServer` | `number` | `12345` | Fixture server port |
-| `slowMo` | `number` | `0` | Slow down actions (ms) for debugging |
-| `extensionPath` | `string` | - | Custom extension directory path |
-| `goal` | `string` | - | Session goal for knowledge store |
-| `flowTags` | `string[]` | - | Flow categorization tags |
-| `tags` | `string[]` | - | Free-form tags |
-| `seedContracts` | `string[]` | - | Contracts to deploy on launch |
-
-**State Modes:**
-
-- `default` - Pre-onboarded wallet with 25 ETH, ready to use
-- `onboarding` - Fresh state, requires wallet setup flow
-- `custom` - Use provided fixture or preset
-
-**Output:**
-
-```typescript
-{
-  sessionId: string;        // Unique session identifier
-  extensionId: string;      // Extension's Chrome ID
-  state: ExtensionState;    // Initial extension state
-  prerequisites?: [{        // Steps taken before launch
-    step: string;
-    description: string;
-  }];
-}
-```
-
-**Example:**
-
-```json
-{
-  "stateMode": "default",
-  "goal": "Test send flow",
-  "flowTags": ["send"],
-  "seedContracts": ["hst"]
-}
-```
-
----
-
-#### `mm_cleanup`
-
-Stop the browser and all services (Anvil, fixture server). Always call when done.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `sessionId` | `string` | - | Optional session ID to clean up |
-
-**Output:**
-
-```typescript
-{
-  cleanedUp: boolean; // Whether cleanup was performed
-}
-```
-
----
-
-### Discovery Tools
-
-#### `mm_get_state`
-
-Get current extension state including screen, balance, network, and account.
-
-**Input:** None
-
-**Output:**
-
-```typescript
-{
-  state: {
-    isLoaded: boolean;
-    currentUrl: string;
-    extensionId: string;
-    isUnlocked: boolean;
-    currentScreen: ScreenName;
-    accountAddress: string | null;
-    networkName: string | null;
-    chainId: number | null;
-    balance: string | null;
-  };
-  tabs?: {
-    active: { role: TabRole; url: string };
-    tracked: { role: TabRole; url: string }[];
-  };
-}
-```
-
----
-
-#### `mm_list_testids`
-
-List all visible `data-testid` attributes on the current page. Use to discover interaction targets.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `limit` | `number` | `150` | Maximum items to return (1-500) |
-
-**Output:**
-
-```typescript
-{
-  items: [{
-    testId: string;    // The data-testid value
-    tag: string;       // HTML tag (button, input, div, etc.)
-    text?: string;     // Visible text content
-    visible: boolean;  // Whether element is visible
-  }];
-}
-```
-
-**Example Output:**
-
-```json
-{
-  "items": [
-    {
-      "testId": "account-menu-icon",
-      "tag": "button",
-      "text": "",
-      "visible": true
-    },
-    {
-      "testId": "eth-overview-send",
-      "tag": "button",
-      "text": "Send",
-      "visible": true
-    },
-    {
-      "testId": "token-balance",
-      "tag": "span",
-      "text": "25 ETH",
-      "visible": true
-    }
-  ]
-}
-```
-
----
-
-#### `mm_accessibility_snapshot`
-
-Get a trimmed accessibility tree with deterministic refs (e1, e2, ...). Refs can be used with `mm_click` and `mm_type`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `rootSelector` | `string` | - | CSS selector to scope the snapshot |
-
-**Included Roles:**
-
-- **Actionable:** button, link, checkbox, radio, switch, textbox, combobox, menuitem
-- **Important:** dialog, alert, status, heading
-
-**Output:**
-
-```typescript
-{
-  nodes: [{
-    ref: string;       // Deterministic ref (e1, e2, e3, ...)
-    role: string;      // ARIA role
-    name: string;      // Accessible name
-    disabled?: boolean;
-    checked?: boolean;
-    expanded?: boolean;
-    path: string[];    // Ancestor path for context
-  }];
-}
-```
-
-**Example Output:**
-
-```json
-{
-  "nodes": [
-    { "ref": "e1", "role": "button", "name": "Send", "path": ["main", "div"] },
-    { "ref": "e2", "role": "button", "name": "Swap", "path": ["main", "div"] },
-    { "ref": "e3", "role": "textbox", "name": "Amount", "path": ["form"] }
-  ]
-}
-```
-
----
-
-#### `mm_describe_screen`
-
-Comprehensive screen state combining extension state, testIds, and accessibility snapshot. Optionally includes screenshot.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `includeScreenshot` | `boolean` | `false` | Capture screenshot |
-| `screenshotName` | `string` | - | Screenshot filename |
-| `includeScreenshotBase64` | `boolean` | `false` | Include base64 in response |
-
-**Output:**
-
-```typescript
-{
-  state: ExtensionState;
-  testIds: { items: TestIdItem[] };
-  a11y: { nodes: A11yNodeTrimmed[] };
-  screenshot: {
-    path: string;
-    width: number;
-    height: number;
-    base64?: string;
-  } | null;
-  priorKnowledge?: PriorKnowledgeV1;  // Past session hints
-}
-```
-
----
-
-### Interaction Tools
-
-#### `mm_click`
-
-Click an element. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `a11yRef` | `string` | - | Accessibility ref from `mm_accessibility_snapshot` (e.g., "e5") |
-| `testId` | `string` | - | `data-testid` attribute value |
-| `selector` | `string` | - | CSS selector |
-| `timeoutMs` | `number` | `15000` | Max wait time (0-60000) |
-
-**Output:**
-
-```typescript
-{
-  clicked: boolean;
-  target: string;                 // Resolved selector
-  pageClosedAfterClick?: boolean; // True if click caused page close
-}
-```
-
-**Examples:**
-
-```json
-{ "a11yRef": "e5" }
-{ "testId": "confirm-btn" }
-{ "selector": "button.primary" }
-```
-
----
-
-#### `mm_type`
-
-Type text into an input element. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `a11yRef` | `string` | - | Accessibility ref |
-| `testId` | `string` | - | `data-testid` value |
-| `selector` | `string` | - | CSS selector |
-| `text` | `string` | **required** | Text to type |
-| `timeoutMs` | `number` | `15000` | Max wait time |
-
-**Output:**
-
-```typescript
-{
-  typed: boolean;
-  target: string;
-  textLength: number;
-}
-```
-
-**Example:**
-
-```json
-{ "testId": "amount-input", "text": "0.5" }
-```
-
----
-
-#### `mm_wait_for`
-
-Wait for an element to become visible. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `a11yRef` | `string` | - | Accessibility ref |
-| `testId` | `string` | - | `data-testid` value |
-| `selector` | `string` | - | CSS selector |
-| `timeoutMs` | `number` | `15000` | Max wait time (100-120000) |
-
-**Output:**
-
-```typescript
-{
-  found: boolean;
-  target: string;
-}
-```
-
----
-
-#### `mm_clipboard`
-
-Read from or write to the browser clipboard. Useful for pasting content (e.g., Secret Recovery Phrase) into components that support paste functionality.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `action` | `"write" \| "read"` | **required** | Clipboard action |
-| `text` | `string` | - | Text to write (required when `action="write"`) |
-
-**Output:**
-
-```typescript
-{
-  action: "write" | "read";
-  success: boolean;
-  text?: string;  // Present when action="read" and successful
-}
-```
-
-**Examples:**
-
-```json
-{ "action": "write", "text": "word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11 word12" }
-{ "action": "read" }
-```
-
-**Use Case - Fast SRP Entry:**
-
-```
-1. mm_clipboard { "action": "write", "text": "abandon abandon ... about" }
-2. mm_click { "testId": "srp-input-import__paste-button" }
-→ All 12 words populated instantly via paste
-```
-
----
-
-#### `mm_navigate`
-
-Navigate to a specific screen in the extension.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `screen` | `"home" \| "settings" \| "notification" \| "url"` | **required** | Target screen |
-| `url` | `string` | - | Required when `screen="url"` |
-
-**Output:**
-
-```typescript
-{
-  navigated: boolean;
-  currentUrl: string;
-}
-```
-
-**Examples:**
-
-```json
-{ "screen": "home" }
-{ "screen": "settings" }
-{ "screen": "url", "url": "https://app.uniswap.org" }
-```
-
----
-
-### Multi-Tab Tools
-
-#### `mm_wait_for_notification`
-
-Wait for a notification popup to appear (e.g., after dApp interaction). Sets the notification page as active.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `timeoutMs` | `number` | `15000` | Max wait time (1000-60000) |
-
-**Output:**
-
-```typescript
-{
-  found: boolean;
-  pageUrl: string;
-}
-```
-
----
-
-#### `mm_switch_to_tab`
-
-Switch the active page for subsequent interactions.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `role` | `"extension" \| "notification" \| "dapp" \| "other"` | - | Tab role to switch to |
-| `url` | `string` | - | URL prefix to match |
-
-**Output:**
-
-```typescript
-{
-  switched: boolean;
-  activeTab: {
-    role: TabRole;
-    url: string;
-  }
-}
-```
-
-**Example:**
-
-```json
-{ "role": "dapp" }
-{ "url": "https://app.uniswap.org" }
-```
-
----
-
-#### `mm_close_tab`
-
-Close a specific tab. Cannot close the extension home page.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `role` | `"notification" \| "dapp" \| "other"` | - | Tab role to close |
-| `url` | `string` | - | URL prefix to match |
-
-**Output:**
-
-```typescript
-{
-  closed: boolean;
-  closedUrl: string;
-}
-```
-
----
-
-### Screenshot Tools
-
-#### `mm_screenshot`
-
-Capture a screenshot and save to `test-artifacts/screenshots/`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `name` | `string` | **required** | Filename (without extension) |
-| `fullPage` | `boolean` | `true` | Capture full page |
-| `selector` | `string` | - | Capture specific element only |
-| `includeBase64` | `boolean` | `false` | Include base64 in response |
-
-**Output:**
-
-```typescript
-{
-  path: string;      // File path
-  width: number;
-  height: number;
-  base64?: string;   // If includeBase64=true
-}
-```
-
----
-
-### Smart Contract Tools
-
-#### `mm_seed_contract`
-
-Deploy a smart contract to the local Anvil node. Requires `ContractSeedingCapability`.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `contractName` | `string` | **required** | Contract to deploy (see list below) |
-| `hardfork` | `string` | `"prague"` | EVM hardfork |
-| `deployerOptions.fromAddress` | `string` | - | Impersonate address |
-| `deployerOptions.fromPrivateKey` | `string` | - | Deploy from specific key |
-
-**Available Contracts:**
-| Name | Description |
-|------|-------------|
-| `hst` | ERC-20 TST token |
-| `nfts` | ERC-721 NFT collection |
-| `erc1155` | ERC-1155 multi-token |
-| `piggybank` | Simple ETH storage |
-| `failing` | Always reverts (error testing) |
-| `multisig` | Multi-signature wallet |
-| `entrypoint` | ERC-4337 EntryPoint |
-| `simpleAccountFactory` | ERC-4337 account factory |
-| `verifyingPaymaster` | ERC-4337 paymaster |
-
-**Output:**
-
-```typescript
-{
-  contractName: string;
-  contractAddress: string;
-  deployedAt: string; // ISO timestamp
-}
-```
-
----
-
-#### `mm_seed_contracts`
-
-Deploy multiple contracts in sequence.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `contracts` | `string[]` | **required** | Contracts to deploy (1-9) |
-| `hardfork` | `string` | `"prague"` | EVM hardfork |
-
-**Output:**
-
-```typescript
-{
-  deployed: [{ contractName, contractAddress, deployedAt }];
-  failed: [{ contractName, error }];
-}
-```
-
----
-
-#### `mm_get_contract_address`
-
-Get the deployed address of a contract.
-
-**Input:**
-| Parameter | Type | Description |
-|-----------|------|-------------|
-| `contractName` | `string` | Contract name to look up |
-
-**Output:**
-
-```typescript
-{
-  contractName: string;
-  contractAddress: string | null;
-}
-```
-
----
-
-#### `mm_list_contracts`
-
-List all contracts deployed in this session.
-
-**Input:** None
-
-**Output:**
-
-```typescript
-{
-  contracts: [{
-    contractName: string;
-    contractAddress: string;
-    deployedAt: string;
-  }];
-}
-```
-
----
-
-### Knowledge Store Tools
-
-The knowledge store enables cross-session learning by recording tool invocations and their context.
-
-#### `mm_knowledge_last`
-
-Get the last N step records from the knowledge store.
-
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `n` | `number` | `20` | Number of steps (1-200) |
-| `scope` | `"current" \| "all" \| { sessionId }` | `"current"` | Which sessions to query |
-| `filters.flowTag` | `string` | - | Filter by flow tag |
-| `filters.tag` | `string` | - | Filter by tag |
-| `filters.screen` | `string` | - | Filter by screen |
-| `filters.sinceHours` | `number` | - | Only steps from last N hours |
-
-**Output:**
-
-```typescript
-{
-  steps: [{
-    timestamp: string;
-    tool: string;
-    screen: ScreenName;
-    snippet: string;      // Human-readable summary
-    sessionId?: string;
-    matchedFields?: string[];
-    sessionGoal?: string;
-  }];
-}
+type ServerConfig = {
+  /** Session manager instance (required) */
+  sessionManager: ISessionManager;
+  /** Factory function to create workflow context (may be sync or async) */
+  contextFactory: () => WorkflowContext | Promise<WorkflowContext>;
+  /** Idle timeout in milliseconds (optional, defaults to 1_800_000 = 30 min) */
+  idleShutdownMs?: number;
+  /** Per-request execution timeout in milliseconds (default: 30_000) */
+  requestTimeoutMs?: number;
+  /** Path to log file (optional) */
+  logFilePath?: string;
+};
 ```
 
----
-
-#### `mm_knowledge_search`
+The `contextFactory` is called once during `start()`. It is responsible for allocating any sub-service ports and returning a `WorkflowContext`. The core validates the returned shape at runtime — `config.environment` must be a string and every value in `allocatedPorts` (if provided) must be a finite number.
 
-Search step records by tool name, screen, testId, or accessibility names.
+The `allocatePort()` utility is exported as a convenience for consumers who need ephemeral port allocation inside their factory.
 
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `query` | `string` | **required** | Search query (1-200 chars) |
-| `limit` | `number` | `20` | Max results (1-100) |
-| `scope` | `"current" \| "all" \| { sessionId }` | `"all"` | Which sessions to search |
-| `filters` | `KnowledgeFilters` | - | Additional filters |
-
-**Output:**
-
-```typescript
-{
-  matches: KnowledgeStepSummary[];
-  query: string;
-}
-```
+The returned `ServerInstance` exposes:
 
----
+- `start(): Promise<DaemonState>` — Calls `contextFactory`, starts HTTP server, writes `.mm-server` state, sets up idle timeout and signal handlers.
+- `stop(): Promise<void>` — Stops accepting connections, cleans up session, removes `.mm-server` state.
 
-#### `mm_knowledge_summarize`
+## HTTP API
 
-Generate a recipe-like summary of steps taken in a session.
+The daemon exposes the following endpoints on `127.0.0.1`:
 
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `scope` | `"current" \| { sessionId }` | `"current"` | Session to summarize |
+| Method | Path          | Description                                  |
+| ------ | ------------- | -------------------------------------------- |
+| `GET`  | `/health`     | Health check with nonce verification         |
+| `GET`  | `/status`     | Daemon status (PID, port, uptime, sub-ports) |
+| `POST` | `/launch`     | Start a browser session                      |
+| `POST` | `/cleanup`    | Stop the current browser session             |
+| `POST` | `/tool/:name` | Execute a registered tool with JSON body     |
 
-**Output:**
+All responses follow a consistent shape:
 
 ```typescript
-{
-  sessionId: string;
-  stepCount: number;
-  recipe: [{
-    stepNumber: number;
-    tool: string;
-    notes: string;
-  }];
-}
-```
-
----
-
-#### `mm_knowledge_sessions`
-
-List recent sessions with metadata.
+// Success
+{ ok: true, result: T, observations?: { state, testIds, a11y } }
 
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `limit` | `number` | `10` | Max sessions (1-50) |
-| `filters` | `KnowledgeFilters` | - | Filter options |
-
-**Output:**
-
-```typescript
-{
-  sessions: [{
-    sessionId: string;
-    createdAt: string;
-    goal?: string;
-    flowTags: string[];
-    tags: string[];
-  }];
-}
+// Error
+{ ok: false, error: { code: string, message: string } }
 ```
 
----
+The `observations` field is included for **mutating** tools (click, type, navigate, launch, cleanup, build, etc.) and for `run_steps` when its `includeObservations` parameter is `'all'` (default) or `'failures'`. **Read-only** and **discovery** tools omit observations from the response.
 
-### Batching Tools
+## CLI Reference
 
-#### `mm_run_steps`
+The `mm` CLI provides a unified interface for agents and developers. All commands communicate with the daemon over HTTP — the daemon is auto-started on `mm launch` if not already running.
 
-Execute multiple tools in sequence. Reduces round trips for multi-step flows.
+### Global Options
 
-**Input:**
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `steps` | `array` | **required** | Tool calls to execute (1-50) |
-| `steps[].tool` | `string` | **required** | Tool name (e.g., `mm_click`) |
-| `steps[].args` | `object` | `{}` | Tool arguments |
-| `stopOnError` | `boolean` | `false` | Stop on first error |
-| `includeObservations` | `"none" \| "failures" \| "all"` | `"all"` | When to include state observations |
+| Option             | Description                                                                                                 |
+| ------------------ | ----------------------------------------------------------------------------------------------------------- |
+| `--project <path>` | Target a specific project directory (absolute or relative). Overrides `MM_PROJECT` and git-based discovery. |
 
-**Output:**
+| Environment Variable | Description                                                                                              |
+| -------------------- | -------------------------------------------------------------------------------------------------------- |
+| `MM_PROJECT`         | Default project directory when `--project` is not provided. Falls back to the current git worktree root. |
 
-```typescript
-{
-  steps: [{
-    tool: string;
-    ok: boolean;
-    result?: unknown;
-    error?: { code: string; message: string; details?: unknown };
-    meta: { durationMs: number; timestamp: string };
-  }];
-  summary: {
-    ok: boolean;      // All steps succeeded
-    total: number;
-    succeeded: number;
-    failed: number;
-    durationMs: number;
-  };
-}
-```
+### Project Targeting
 
-**Example:**
+By default, the CLI resolves the target project from the current git worktree. This works when running from inside the project directory. For other scenarios, the resolution order is:
 
-```json
-{
-  "steps": [
-    { "tool": "mm_click", "args": { "testId": "send-button" } },
-    { "tool": "mm_type", "args": { "testId": "amount-input", "text": "0.1" } },
-    { "tool": "mm_click", "args": { "testId": "confirm-button" } }
-  ],
-  "stopOnError": true
-}
-```
-
-## Development
-
-### Building
+1. **`--project <path>`** — Explicit flag, highest priority. Accepts absolute or relative paths.
+2. **`MM_PROJECT`** — Environment variable. Useful for setting once in agent config or shell profile.
+3. **Git worktree** — `git rev-parse --show-toplevel` from the current working directory (existing behavior).
 
 ```bash
-yarn build
-```
+# From inside the project (unchanged)
+mm launch
+
+# From a parent folder containing multiple repos
+mm --project ./metamask-extension launch
+
+# Via environment variable
+export MM_PROJECT=/path/to/metamask-extension
+mm describe-screen
+```
+
+### Lifecycle
+
+| Command                                                                                                                                          | Description                                                                                                                                                                                                                                                                                                                                                                             |
+| ------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `mm launch [--context e2e\|prod] [--state default\|onboarding\|custom] [--extension-path <path>] [--goal <text>] [--force] [--flow-tags <tags>]` | Auto-starts the daemon if needed, then launches a headed Chrome session with the configured extension. Use `--context` to set the environment context before launching. Use `--state` to control wallet initialization. Use `--extension-path` to override the extension directory. Use `--goal` and `--flow-tags` for knowledge tagging. Use `--force` to replace an existing session. |
+| `mm cleanup [--shutdown]`                                                                                                                        | Stops the browser, tears down test services (fixture server, Anvil, mock server), and releases session resources. Add `--shutdown` to also terminate the daemon process.                                                                                                                                                                                                                |
+| `mm status`                                                                                                                                      | Displays the daemon's current status: PID, port, uptime, allocated sub-ports, and whether a browser session is active.                                                                                                                                                                                                                                                                  |
+| `mm serve [--background]`                                                                                                                        | Manually starts the HTTP daemon without launching a browser session. Use `--background` to detach the process. Fails if a daemon is already running for this worktree.                                                                                                                                                                                                                  |
+
+### Interaction
+
+| Command                                                                                    | Description                                                                                                                                                                                                                                                                              |
+| ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `mm click <ref> [--selector <css>] [--testid <id>] [--within <scope>]`                     | Clicks an element by its accessibility reference (e.g., `e3`). The ref comes from a prior `describe-screen` call. Waits for the element to be visible before clicking. Use `--within` to scope the target inside a parent element (`testid:<id>`, `selector:<css>`, or a bare a11y ref). |
+| `mm type <ref> <text> [--selector <css>] [--testid <id>] [--within <scope>]`               | Types text into an input element identified by its accessibility reference. Clears the field first, then sets the new value (uses Playwright's `fill()`). Use `--within` to scope the target inside a parent element.                                                                    |
+| `mm get-text <ref> [--selector <css>] [--testid <id>] [--within <scope>]`                  | Reads the text content of an element. Returns the inner text, target descriptor, and character length. Useful for asserting visible values without screenshots.                                                                                                                          |
+| `mm describe-screen`                                                                       | Captures the full screen state: extension info, visible test IDs, a trimmed accessibility tree with deterministic refs (`e1`, `e2`, ...), and prior knowledge from historical sessions. This is the primary command for understanding what's on screen before interacting.               |
+| `mm screenshot [--name <name>]`                                                            | Takes a full-page screenshot of the current page. Saves to the artifacts directory. Use `--name` to set a descriptive filename.                                                                                                                                                          |
+| `mm wait-for <ref> [--timeout <ms>] [--selector <css>] [--testid <id>] [--within <scope>]` | Blocks until an element identified by its accessibility reference becomes visible, or the timeout expires. Default timeout is 15 seconds. Use `--within` to scope the target inside a parent element.                                                                                    |
+| `mm wait-for-notification [--timeout <ms>]`                                                | Waits for the extension notification popup to appear within a timeout. Returns the notification page URL.                                                                                                                                                                                |
+| `mm clipboard <read\|write> [text]`                                                        | Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses.                                                                                                                                                         |
+
+### Navigation
+
+| Command                                                   | Description                                                                                                                                      |
+| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `mm navigate <url>`                                       | Opens a new tab and navigates to the given URL. Useful for navigating to dApps or external pages.                                                |
+| `mm navigate-home`                                        | Navigates the extension tab to the wallet home screen.                                                                                           |
+| `mm navigate-settings`                                    | Navigates the extension tab to the settings page.                                                                                                |
+| `mm switch-to-tab <role> \| --role <role> \| --url <url>` | Switches the active page to a tab matching a given role (e.g., `extension`, `dapp`) or URL prefix. Supports a positional role as first argument. |
+| `mm close-tab --role <role> \| --url <url>`               | Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed.                                  |
+
+### State & Context
+
+| Command                      | Description                                                                                                                                                         |
+| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `mm get-state`               | Returns the current extension state: loaded status, current URL, screen name, network, chain ID, account address, and balance. Also lists all tracked browser tabs. |
+| `mm get-context`             | Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed.                        |
+| `mm set-context <e2e\|prod>` | Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active — run `mm cleanup` first.                                        |
+
+### Knowledge
+
+| Command                                   | Description                                                                                                                                          |
+| ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `mm knowledge-search <query>`             | Searches the knowledge store for past tool invocations matching the query. Results are scored by relevance to screen, URL, test IDs, and a11y nodes. |
+| `mm knowledge-last`                       | Retrieves the most recent step records from the current session's knowledge store.                                                                   |
+| `mm knowledge-sessions`                   | Lists recent knowledge sessions with metadata (goal, flow tags, timestamps).                                                                         |
+| `mm knowledge-summarize [--session <id>]` | Generates a recipe-style summary of a session's tool invocations, showing the step sequence with targets and outcomes.                               |
+
+### Batching
+
+| Command               | Description                                                                                                              |
+| --------------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| `mm run-steps <json>` | Executes a batch of tool invocations sequentially from a JSON definition. Each step specifies a tool name and arguments. |
+
+For the full agent-facing reference and workflow guidelines, see [SKILL.md](./SKILL.md).
+
+## Error Classification
+
+Tool errors are classified into specific error codes for structured handling:
+
+| Code                        | Meaning                                         |
+| --------------------------- | ----------------------------------------------- |
+| `MM_TARGET_NOT_FOUND`       | Element not found by ref, testId, or selector   |
+| `MM_WAIT_TIMEOUT`           | Timeout waiting for element or condition        |
+| `MM_CLICK_FAILED`           | Click operation failed                          |
+| `MM_TYPE_FAILED`            | Type operation failed                           |
+| `MM_NAVIGATION_FAILED`      | Navigation error or network failure             |
+| `MM_PAGE_CLOSED`            | Browser page was closed unexpectedly            |
+| `MM_NOTIFICATION_TIMEOUT`   | Notification popup did not appear               |
+| `MM_TAB_NOT_FOUND`          | Tab not found by role or URL                    |
+| `MM_DISCOVERY_FAILED`       | Discovery tool failure                          |
+| `MM_SCREENSHOT_FAILED`      | Screenshot capture failure                      |
+| `MM_BATCH_TIMEOUT`          | `batchTimeoutMs` deadline exceeded in run_steps |
+| `MM_CONTRACT_NOT_FOUND`     | Unknown contract name                           |
+| `MM_SEED_FAILED`            | Contract deployment failure                     |
+| `MM_CONTEXT_SWITCH_BLOCKED` | Context switch while session is active          |
 
-### Testing
-
-```bash
-yarn test
-```
-
-### Local Development with yalc
+## Development
 
 ```bash
-# In this repo
-yarn build && yalc publish
-
-# In consumer repo
-yalc add @metamask/client-mcp-core
+yarn build        # Build the package
+yarn test         # Run tests and type checks
+yarn lint         # Lint everything
+yarn lint:fix     # Auto-fix lint issues
 ```
 
 ## License
 
-MIT
+(MIT OR Apache-2.0)
diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..c0855a9
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,538 @@
+# mm CLI — Agent Reference
+
+You control a browser extension through the `mm` CLI. Every command talks to a local HTTP daemon that manages Playwright and the extension lifecycle. The daemon auto-starts when you run `mm launch`.
+
+If you are running outside the target project directory, use `--project <path>` or set the `MM_PROJECT` environment variable to point at the project root. All commands accept `--project` before the command name (e.g., `mm --project ../metamask-extension launch`).
+
+## Core Loop
+
+```
+mm launch                  # 1. Start browser + extension
+mm describe-screen         # 2. See what's on screen (ALWAYS do this before interacting)
+mm click <ref>             # 3. Interact using refs from describe-screen
+mm describe-screen         # 4. Re-describe after every action to get fresh refs
+mm cleanup --shutdown      # 5. Clean up when done
+```
+
+**Critical rules:**
+
+- **Always `describe-screen` before interacting.** Refs like `e1`, `e2` are ephemeral — they change after every action.
+- **Always `describe-screen` after interacting** — OR use inline `observations` from mutating tool responses. Mutating tools (click, type, navigate, etc.) return an `observations` object with fresh `state`, `testIds`, and `a11y` refs. You can use these refs directly for the next interaction without calling `describe-screen`. Call `describe-screen` when you need `priorKnowledge` or screenshots.
+- **One target per command.** Specify exactly ONE of: a11y ref (`e5`), testId, or CSS selector.
+- **Errors are structured.** Check the `error.code` field to decide recovery strategy (see Error Codes below).
+
+## Observation Behavior
+
+Tool responses include different data based on the tool's category:
+
+| Category      | Examples                                                          | Observations in response?                      |
+| ------------- | ----------------------------------------------------------------- | ---------------------------------------------- |
+| **Mutating**  | click, type, navigate, launch, cleanup, build, clipboard          | Yes — `state` + `a11y` (compacted) + `testIds` |
+| **Read-only** | get_state, get_text, knowledge\_\*, get_context, set_context      | No — faster response                           |
+| **Discovery** | describe_screen, list_testids, accessibility_snapshot, screenshot | Data is already in `result`                    |
+| **Batch**     | run_steps                                                         | Controlled by `includeObservations` param      |
+
+**Observation Compaction:** Mutating tool observations are **compacted** before returning: option runs of 3 or more under a combobox or listbox are replaced with a single summary node (e.g., `"55 options (refs e2–e56)"`). The `describe-screen` tool always returns the **full, unfiltered** a11y tree — use it when you need the complete option list or `priorKnowledge`.
+
+**Diff-Based Observations:** After the first mutating tool call sets a baseline, subsequent mutations return **diff-based** observations. The `observations.a11y.diff` field (when present) shows what changed:
+
+```json
+{
+  "added": ["e4", "e5"], // new node refs
+  "removed": ["e2"], // disappeared node refs
+  "unchanged": 3 // count of unchanged nodes
+}
+```
+
+The `observations.a11y.nodes` field contains **only the changed and new nodes** (not all nodes). The baseline resets after `describe-screen`, `launch`, or `cleanup` — the next mutation returns a full compact observation (no `diff` field). When the diff would be larger than the full observation, the full option-filtered observation is returned instead (no `diff` field).
+
+### Using inline observations (mutating tools)
+
+After a mutating action, the response includes fresh screen state:
+
+```json
+{
+  "ok": true,
+  "result": { ... },
+  "observations": {
+    "state": { "screen": "send", "url": "...", "balance": "1.5 ETH" },
+    "testIds": ["send-amount-input", "send-button"],
+    "a11y": {
+      "nodes": [
+        { "ref": "e1", "role": "textbox", "name": "Amount" },
+        { "ref": "e2", "role": "button", "name": "Send" }
+      ]
+    }
+  }
+}
+```
+
+You can use the `ref` values from `observations.a11y.nodes` for the next interaction — no `describe-screen` needed. Note that refs in compacted observations may be summary nodes (e.g., `"55 options (refs e2–e56)"`) when there are 3+ options under a combobox or listbox.
+
+**Quick reference:**
+
+- Use `observations.state` for quick checks (screen name, loading status, balance, etc.)
+- Use `observations.a11y.nodes` with the compact refs for the next interaction
+- Call `describe-screen` only when you need the full tree or `priorKnowledge`
+
+```bash
+mm click e3                 # mutating: response includes fresh observations
+# observations.a11y.nodes has updated refs — use them directly:
+mm type e1 "0.01"           # use ref from previous response
+```
+
+Call `describe-screen` explicitly when you need:
+
+- `priorKnowledge` (historical actions for this screen)
+- A screenshot via `includeScreenshot`
+- Full context after unexpected navigation
+- The complete, unfiltered a11y tree (e.g., all options in a dropdown)
+
+### `run_steps` and `includeObservations`
+
+The `run_steps` tool collects observations once after all steps complete. Control inclusion with the `includeObservations` parameter:
+
+| Value             | Behavior                                      |
+| ----------------- | --------------------------------------------- |
+| `'all'` (default) | Always include final state observations       |
+| `'none'`          | Never include observations (fastest response) |
+| `'failures'`      | Include observations only if any step failed  |
+
+```json
+{
+  "steps": [
+    { "tool": "click", "args": { "a11yRef": "e3" } },
+    { "tool": "type", "args": { "a11yRef": "e5", "text": "0.01" } }
+  ],
+  "includeObservations": "failures"
+}
+```
+
+## Commands
+
+### Session Lifecycle
+
+#### `mm launch`
+
+Starts the daemon (if not running) and launches a headed Chrome session with the extension.
+
+```
+mm launch [--context e2e|prod] [--state default|onboarding|custom] [--extension-path <path>] [--goal <text>] [--force] [--flow-tags <tags>]
+```
+
+| Flag                      | Description                                                     |
+| ------------------------- | --------------------------------------------------------------- |
+| `--context e2e\|prod`     | Set the environment context before launching                    |
+| `--state default`         | Pre-onboarded wallet with 25 ETH on local Anvil chain (default) |
+| `--state onboarding`      | Fresh wallet requiring manual onboarding setup                  |
+| `--state custom`          | Use a custom fixture for wallet state                           |
+| `--extension-path <path>` | Override the extension build directory                          |
+| `--goal <text>`           | Tag the session with a goal for knowledge store                 |
+| `--force`                 | Replace an existing active session                              |
+| `--flow-tags <tags>`      | Comma-separated flow tags for cross-session knowledge           |
+
+Returns: `sessionId`, `extensionId`, `state` (current extension state).
+
+#### `mm cleanup`
+
+Stops the browser, tears down test services, and releases session resources.
+
+```
+mm cleanup [--shutdown]
+```
+
+| Flag         | Description                       |
+| ------------ | --------------------------------- |
+| `--shutdown` | Also terminate the daemon process |
+
+Without `--shutdown`, the daemon stays running for the next `mm launch`.
+
+#### `mm status`
+
+Shows daemon status: PID, port, uptime, allocated sub-ports.
+
+```
+mm status
+```
+
+#### `mm serve`
+
+Manually starts the daemon without launching a browser. Useful for debugging.
+
+```
+mm serve [--background]
+```
+
+### Screen Discovery
+
+#### `mm describe-screen`
+
+**Your primary observation tool.** Returns the complete screen state:
+
+- **Extension state**: current URL, screen name, network, account, balance
+- **Active tab**: the currently focused tab's role and URL (if tracked)
+- **Test IDs**: visible `data-testid` attributes with text previews
+- **A11y tree**: interactive elements with deterministic refs (`e1`, `e2`, ...)
+- **Prior knowledge**: suggested actions from past sessions on this screen
+
+```
+mm describe-screen
+```
+
+The a11y tree includes actionable roles: `button`, `link`, `checkbox`, `radio`, `switch`, `textbox`, `combobox`, `menuitem`; structural roles: `menu`, `listbox`, `option`, `tab`, `tabpanel`, `list`, `listitem`; and important roles: `dialog`, `alert`, `status`, `heading`.
+
+Each node looks like:
+
+```json
+{
+  "ref": "e3",
+  "role": "button",
+  "name": "Confirm",
+  "path": ["dialog:Transaction"],
+  "testId": "confirm-footer-button",
+  "textContent": "Confirm"
+}
+```
+
+The `testId` and `textContent` fields appear only on nodes with short or generic names — they provide extra context from the DOM to help identify ambiguous elements. Nodes with clear names omit these fields.
+
+When 3+ consecutive identical nodes appear (same role, name, and path), they are collapsed into a summary like `… 3 more "maskicon" (refs e2–e4)` to reduce token waste. Individual refs still work for targeting.
+
+Use the `ref` value (`e3`) for click/type/get-text/wait-for commands.
+
+#### `mm screenshot`
+
+Captures a screenshot of the current page.
+
+```
+mm screenshot [--name <name>]
+```
+
+Returns: file path, dimensions.
+
+### Element Interaction
+
+All interaction commands accept an element reference from `describe-screen`.
+
+#### `mm click <ref>`
+
+Clicks an element. Waits up to 15s for it to become visible.
+
+```
+mm click e3
+mm click --testid end-accessory --within "testid:account-list-item/0"
+```
+
+Use `--within` to scope the target inside a parent element. Values use the format `testid:<id>`, `selector:<css>`, or a bare a11y ref (`e5`).
+
+If the page closes after clicking (e.g., confirmation popup), the response includes `pageClosedAfterClick: true` — this is normal, not an error.
+
+#### `mm type <ref> <text>`
+
+Types text into an input field. **Clears the field first**, then sets the new value (uses Playwright's `fill()`). No `clearFirst` flag needed — clearing is always implicit.
+
+```
+mm type e5 "0x1234abcd..."
+```
+
+#### `mm get-text <ref>`
+
+Reads the text content of an element. Returns the inner text, target descriptor, and character length. Useful for asserting visible values without screenshots. Categorized as read-only (no observations in response).
+
+```
+mm get-text e5
+mm get-text --testid balance-amount
+mm get-text --testid amount --within "testid:tx-row"
+```
+
+Returns: `text` (string content), `target` (descriptor like `testId:balance-amount`), `length` (character count).
+
+#### `mm wait-for <ref>`
+
+Blocks until an element becomes visible. Default timeout: 15s.
+
+```
+mm wait-for e7 [--timeout <ms>]
+mm wait-for --testid confirm-btn --within "testid:dialog-container"
+```
+
+#### `mm wait-for-notification`
+
+Waits for the extension notification popup to appear within a timeout. Returns the notification page URL.
+
+```
+mm wait-for-notification [--timeout <ms>]
+```
+
+#### `mm clipboard`
+
+Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses.
+
+```
+mm clipboard read
+mm clipboard write "0x1234abcd..."
+```
+
+### Navigation
+
+#### `mm navigate <url>`
+
+Opens a new tab and navigates to the given URL.
+
+```
+mm navigate https://app.uniswap.org
+```
+
+#### `mm navigate-home`
+
+Navigates the extension tab to the wallet home screen.
+
+```
+mm navigate-home
+```
+
+#### `mm navigate-settings`
+
+Navigates the extension tab to the settings page.
+
+```
+mm navigate-settings
+```
+
+#### `mm switch-to-tab`
+
+Switches the active page to a tab matching a given role or URL prefix. Supports a positional role as the first argument.
+
+```
+mm switch-to-tab dapp
+mm switch-to-tab --role extension
+mm switch-to-tab --url https://app.uniswap.org
+```
+
+#### `mm close-tab`
+
+Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed.
+
+```
+mm close-tab --role dapp
+mm close-tab --url https://app.uniswap.org
+```
+
+### State & Context
+
+#### `mm get-state`
+
+Returns extension state and tracked tabs without the full a11y tree.
+
+```
+mm get-state
+```
+
+Returns: `state` (extension state) and `tabs` (active + tracked tabs with roles and URLs).
+
+#### `mm get-context`
+
+Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed.
+
+```
+mm get-context
+```
+
+#### `mm set-context`
+
+Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active — run `mm cleanup` first.
+
+```
+mm set-context <e2e|prod>
+```
+
+### Knowledge Store
+
+The knowledge store records every tool invocation and uses past sessions to suggest actions.
+
+#### `mm knowledge-search <query>`
+
+Searches past sessions for steps matching the query. Matches against tool names, screen names, test IDs, and a11y node names.
+
+```
+mm knowledge-search "confirm transaction"
+```
+
+#### `mm knowledge-last`
+
+Gets the most recent step records from the current session.
+
+```
+mm knowledge-last
+```
+
+#### `mm knowledge-sessions`
+
+Lists recent sessions with metadata (goal, flow tags, timestamps).
+
+```
+mm knowledge-sessions
+```
+
+### Batch Execution
+
+#### `mm run-steps <json>`
+
+Executes multiple tool invocations in sequence from a JSON array. Each step specifies a tool name and arguments.
+
+```
+mm run-steps '{"steps":[{"tool":"click","args":{"a11yRef":"e3"}},{"tool":"wait_for","args":{"a11yRef":"e5"}}]}'
+```
+
+Supports `stopOnError` (halt on first failure) and returns per-step results with timing. The `includeObservations` param controls whether final-state observations appear in the response: `'all'` (default), `'none'`, or `'failures'` (only on partial failure). Use `batchTimeoutMs` to set an overall deadline — if exceeded, remaining steps are marked as skipped and partial results are returned immediately. The summary includes a `skipped` count alongside `succeeded` and `failed`.
+
+Tool aliases are supported in steps: `navigate_home` / `navigate-home`, `navigate_settings` / `navigate-settings`, and `navigate_notification` / `navigate-notification` resolve to `navigate` with the appropriate `screen` argument. You can also use `ref` as shorthand for `a11yRef` in step args and within targets.
+
+## Element Targeting
+
+Every interaction command (`click`, `type`, `get-text`, `wait-for`) needs a target. You must provide exactly ONE of:
+
+| Method           | Format              | Stability                       | When to use                                          |
+| ---------------- | ------------------- | ------------------------------- | ---------------------------------------------------- |
+| **a11y ref**     | `e1`, `e2`, ...     | Ephemeral (per describe-screen) | Default — use refs from the latest `describe-screen` |
+| **testId**       | `data-testid` value | Stable across sessions          | When you know the testId from prior knowledge        |
+| **CSS selector** | Any CSS selector    | Fragile                         | Last resort fallback                                 |
+
+**Prefer a11y refs.** They come directly from the accessibility tree and map to ARIA selectors, making them the most reliable for the current screen state.
+
+## Prior Knowledge
+
+When you call `describe-screen`, the response may include a `priorKnowledge` section with:
+
+- **`similarSteps`**: Past tool invocations on the same screen with confidence scores
+- **`suggestedNextActions`**: Ranked actions based on historical success (e.g., "click confirm button")
+- **`avoid`**: Targets that frequently fail on this screen — skip these
+
+Use prior knowledge to guide your actions, but always verify against the current a11y tree.
+
+## Error Codes
+
+When a command fails, the response includes `error.code`. Use this to decide what to do:
+
+| Code                          | Meaning                                      | Recovery                                                  |
+| ----------------------------- | -------------------------------------------- | --------------------------------------------------------- |
+| `MM_NO_ACTIVE_SESSION`        | No browser session running                   | Run `mm launch` first                                     |
+| `MM_SESSION_ALREADY_RUNNING`  | Session already exists                       | Run `mm cleanup` first, or use `--force`                  |
+| `MM_TARGET_NOT_FOUND`         | Element ref/testId/selector not found        | Run `mm describe-screen` to get fresh refs                |
+| `MM_WAIT_TIMEOUT`             | Element didn't appear in time                | Increase timeout or verify you're on the right screen     |
+| `MM_CLICK_FAILED`             | Click failed after finding element           | Element may be obscured; try waiting or scrolling         |
+| `MM_TYPE_FAILED`              | Type failed after finding element            | Element may not be an input; verify with describe-screen  |
+| `MM_PAGE_CLOSED`              | Page was closed unexpectedly                 | Normal after some confirmations; run describe-screen      |
+| `MM_NAVIGATION_FAILED`        | Navigation error or network failure          | Check URL validity; retry once                            |
+| `MM_NOTIFICATION_TIMEOUT`     | Extension notification popup didn't appear   | Action may not have triggered a notification; check state |
+| `MM_TAB_NOT_FOUND`            | Tab role/URL not found                       | Run `mm get-state` to see available tabs                  |
+| `MM_CAPABILITY_NOT_AVAILABLE` | Feature requires a capability not configured | Check environment mode (e2e vs prod)                      |
+| `MM_CONTEXT_SWITCH_BLOCKED`   | Can't switch context with active session     | Run `mm cleanup` first                                    |
+| `MM_INVALID_INPUT`            | Bad parameters                               | Fix input and retry                                       |
+| `MM_BATCH_TIMEOUT`            | `batchTimeoutMs` deadline exceeded           | Remaining steps were skipped; check partial results       |
+| `MM_CONTRACT_NOT_FOUND`       | Unknown contract name for seeding            | See available contracts below                             |
+
+## Available Contracts (E2E only)
+
+These contracts can be deployed to the local Anvil chain via `seed_contract` / `seed_contracts`:
+
+| Name                   | Type                                                |
+| ---------------------- | --------------------------------------------------- |
+| `hst`                  | ERC-20 token                                        |
+| `nfts`                 | ERC-721 NFT                                         |
+| `erc1155`              | ERC-1155 multi-token                                |
+| `piggybank`            | Simple deposit contract                             |
+| `failing`              | Contract that always reverts (for testing failures) |
+| `multisig`             | Multi-signature wallet                              |
+| `entrypoint`           | ERC-4337 EntryPoint                                 |
+| `simpleAccountFactory` | ERC-4337 account factory                            |
+| `verifyingPaymaster`   | ERC-4337 paymaster                                  |
+
+## Flow Tags
+
+When launching, tag your session with flow tags for cross-session knowledge:
+
+| Tag               | Use for                        |
+| ----------------- | ------------------------------ |
+| `send`            | Token send flows               |
+| `swap`            | Token swap flows               |
+| `connect`         | dApp connection flows          |
+| `sign`            | Message/transaction signing    |
+| `onboarding`      | Wallet setup/onboarding        |
+| `settings`        | Settings configuration         |
+| `tx-confirmation` | Transaction confirmation flows |
+
+## Daemon Model
+
+- Daemon runs per project, state tracked in `.mm-server` at the project root
+- Auto-starts on `mm launch` if not running
+- Shuts down after 30 minutes of inactivity
+- Logs to `.mm-daemon.log`
+- One tool executes at a time (requests are queued)
+- Project resolution: `--project` flag → `MM_PROJECT` env var → current git worktree
+
+## Workflow Examples
+
+### Basic Interaction
+
+```bash
+mm launch --state default
+mm describe-screen
+# Response includes a11y nodes: [{ ref: "e1", role: "button", name: "Send" }, ...]
+mm click e1
+mm describe-screen
+# Now on send screen — get new refs
+mm type e3 "0.01"
+mm click e5
+mm cleanup --shutdown
+```
+
+### Transaction with Notification
+
+```bash
+mm launch --state default
+mm navigate https://app.uniswap.org
+mm describe-screen
+# Interact with dApp...
+mm click e4                    # triggers wallet popup
+mm wait-for e2 --timeout 10000 # wait for confirm button in notification
+mm click e2                    # confirm
+mm describe-screen             # check result
+mm cleanup --shutdown
+```
+
+### Running From a Parent Folder
+
+```bash
+# Set once — all subsequent mm commands target this project
+export MM_PROJECT=/path/to/metamask-extension
+
+mm launch --state default
+mm describe-screen
+mm click e1
+mm cleanup --shutdown
+
+# Or use --project per command
+mm --project ../metamask-extension launch
+mm --project ../metamask-extension describe-screen
+```
+
+### Using Prior Knowledge
+
+```bash
+mm launch --state default --goal "Test send flow" --flow-tags send
+mm describe-screen
+# Response includes priorKnowledge.suggestedNextActions:
+# [{ action: "click", preferredTarget: { type: "testId", value: "send-button" }, confidence: 0.85 }]
+# Use the suggestion but verify the target exists in the current a11y tree
+mm click e3
+mm cleanup --shutdown
+```
+
+## Project-Specific Commands
+
+<!-- Consumer repos extend this section -->
+
+## Project-Specific Workflow Examples
+
+<!-- Consumer repos add examples here -->
diff --git a/package.json b/package.json
index e7a0560..8221fe2 100644
--- a/package.json
+++ b/package.json
@@ -1,13 +1,13 @@
 {
   "name": "@metamask/client-mcp-core",
   "version": "0.1.1",
-  "description": "MCP server for MetaMask Extension visual testing with LLM agents",
+  "description": "HTTP daemon and CLI for agent-driven browser extension testing with Playwright",
   "keywords": [
-    "mcp",
     "playwright",
     "llm",
     "visual-testing",
-    "browser-extension"
+    "browser-extension",
+    "cli"
   ],
   "homepage": "https://github.com/MetaMask/client-mcp-core#readme",
   "bugs": {
@@ -35,6 +35,9 @@
   "main": "./dist/index.cjs",
   "module": "./dist/index.mjs",
   "types": "./dist/index.d.cts",
+  "bin": {
+    "mm": "./dist/cli/mm.cjs"
+  },
   "files": [
     "dist"
   ],
@@ -57,7 +60,8 @@
     "@isaacs/brace-expansion": "5.0.1"
   },
   "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.26.0",
+    "cosmiconfig": "^9.0.0",
+    "express": "^5.2.1",
     "zod": "^4.3.5"
   },
   "devDependencies": {
@@ -71,6 +75,7 @@
     "@metamask/eslint-config-vitest": "^15.0.0",
     "@playwright/test": "^1.49.0",
     "@ts-bridge/cli": "^0.6.3",
+    "@types/express": "^5.0.6",
     "@types/node": "^20.0.0",
     "@typescript-eslint/utils": "^8.6.0",
     "@vitest/coverage-istanbul": "^3.0.7",
diff --git a/scripts/prepack.sh b/scripts/prepack.sh
index ad99af5..e741983 100755
--- a/scripts/prepack.sh
+++ b/scripts/prepack.sh
@@ -9,3 +9,5 @@ if [[ -n $SKIP_PREPACK ]]; then
 fi
 
 yarn build
+
+chmod +x dist/cli/mm.cjs
diff --git a/src/capabilities/context.test.ts b/src/capabilities/context.test.ts
index 25410cc..e953b93 100644
--- a/src/capabilities/context.test.ts
+++ b/src/capabilities/context.test.ts
@@ -20,7 +20,6 @@ describe('isE2EConfig', () => {
       environment: 'e2e',
       extensionName: 'MetaMask',
       defaultPassword: 'password123',
-      toolPrefix: 'mm',
       artifactsDir: './test-artifacts',
       defaultChainId: 1337,
       ports: {
@@ -54,7 +53,6 @@ describe('isE2EConfig', () => {
       environment: 'prod',
       extensionName: 'MetaMask',
       defaultPassword: 'password123',
-      toolPrefix: 'mm',
       defaultChainId: 1,
     };
 
@@ -93,7 +91,6 @@ describe('isProdConfig', () => {
       environment: 'prod',
       extensionName: 'MetaMask',
       defaultPassword: 'password123',
-      toolPrefix: 'mm',
       artifactsDir: './artifacts',
       defaultChainId: 1,
     };
@@ -119,7 +116,6 @@ describe('isProdConfig', () => {
       environment: 'e2e',
       extensionName: 'MetaMask',
       defaultPassword: 'password123',
-      toolPrefix: 'mm',
       artifactsDir: './test-artifacts',
       defaultChainId: 1337,
       ports: {
@@ -438,3 +434,34 @@ describe('hasCapability', () => {
     expect(hasCapability(context, 'mockServer')).toBe(true);
   });
 });
+
+describe('WorkflowContext with allocatedPorts', () => {
+  it('accepts allocatedPorts with port mappings', () => {
+    const context: WorkflowContext = {
+      config: {
+        environment: 'e2e',
+        extensionName: 'MetaMask',
+      },
+      allocatedPorts: {
+        anvil: 3000,
+        fixture: 4000,
+      },
+    };
+
+    expect(context.allocatedPorts).toStrictEqual({
+      anvil: 3000,
+      fixture: 4000,
+    });
+  });
+
+  it('allows WorkflowContext without allocatedPorts (field is optional)', () => {
+    const context: WorkflowContext = {
+      config: {
+        environment: 'e2e',
+        extensionName: 'MetaMask',
+      },
+    };
+
+    expect(context.allocatedPorts).toBeUndefined();
+  });
+});
diff --git a/src/capabilities/context.ts b/src/capabilities/context.ts
index a88fa1b..25b4d9e 100644
--- a/src/capabilities/context.ts
+++ b/src/capabilities/context.ts
@@ -7,6 +7,9 @@ import type {
   MockServerCapability,
 } from './types.js';
 
+/** Sparse port-name → port-number map. `Partial` ensures lookups resolve to `number | undefined`. */
+export type PortMap = Partial<Record<string, number>>;
+
 /**
  * Environment mode discriminator.
  * - 'e2e': End-to-end testing environment with local chain, fixtures, and contract seeding
@@ -22,8 +25,6 @@ export type BaseEnvironmentConfig = {
   extensionName: string;
   /** Default password for wallet unlock operations */
   defaultPassword?: string;
-  /** Prefix for MCP tool names (e.g., "mm" -> "mm_build", "mm_launch") */
-  toolPrefix?: string;
   /** Directory for storing screenshots and other artifacts */
   artifactsDir?: string;
 };
@@ -112,6 +113,8 @@ export type WorkflowContext = {
   stateSnapshot?: StateSnapshotCapability;
   mockServer?: MockServerCapability;
   config: EnvironmentConfig;
+  /** Port metadata reported back to core from the contextFactory. Used for DaemonState persistence and /status endpoint. */
+  allocatedPorts?: PortMap;
 };
 
 /**
diff --git a/src/capabilities/types.ts b/src/capabilities/types.ts
index 6ac77c5..8a15177 100644
--- a/src/capabilities/types.ts
+++ b/src/capabilities/types.ts
@@ -55,6 +55,11 @@ export type ExtensionState = {
   networkName: string | null;
   chainId: number | null;
   balance: string | null;
+  activeTab?: {
+    role: string;
+    url: string;
+    title?: string;
+  };
 };
 
 export type LaunchOptions = {
diff --git a/src/cli/mm.test.ts b/src/cli/mm.test.ts
new file mode 100644
index 0000000..a73a2b4
--- /dev/null
+++ b/src/cli/mm.test.ts
@@ -0,0 +1,1973 @@
+/* eslint-disable n/no-unsupported-features/node-builtins */
+/* eslint-disable n/no-process-env */
+/* eslint-disable n/no-sync */
+/* eslint-disable require-atomic-updates */
+import { cosmiconfig } from 'cosmiconfig';
+import { existsSync } from 'node:fs';
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import type { MockInstance } from 'vitest';
+
+import {
+  extractProjectFlag,
+  resolveTargetFromArgs,
+  resolveWithinFromArgs,
+  getPositionalTarget,
+  isTransientError,
+  parseIntFlag,
+  parseStringFlag,
+  parseLaunchArgs,
+  printHelp,
+  resolveRuntime,
+  sendRequest,
+  routeCommand,
+  resolveWorktreeRoot,
+  readDaemonConfig,
+  shutdownDaemon,
+  waitForDaemon,
+  discoverDaemon,
+  autoStartDaemon,
+  handleServe,
+  sleep,
+  main,
+} from './mm.js';
+
+vi.mock('node:child_process', () => ({
+  execSync: vi.fn(() => Buffer.from('/mock/worktree\n')),
+  spawn: vi.fn(() => {
+    const child = {
+      unref: vi.fn(),
+      on: vi.fn(
+        (event: string, handler: (code: number | null) => void) =>
+          event === 'exit' && setTimeout(() => handler(0), 10),
+      ),
+    };
+    return child;
+  }),
+}));
+
+vi.mock('node:fs', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:fs')>();
+  return { ...actual, existsSync: vi.fn(() => true) };
+});
+
+vi.mock('node:fs/promises', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('node:fs/promises')>();
+  return {
+    ...actual,
+    realpath: vi.fn(async (p: string) => p),
+    stat: vi.fn(async () => ({ isDirectory: () => true })),
+    readFile: vi.fn(),
+  };
+});
+
+vi.mock('../server/daemon-state.js', () => ({
+  readDaemonState: vi.fn(async () => null),
+  isDaemonAlive: vi.fn(async () => false),
+  isDaemonVersionMatch: vi.fn(() => true),
+  removeDaemonState: vi.fn(async () => {}),
+  acquireStartupLock: vi.fn(async () => true),
+  releaseStartupLock: vi.fn(async () => {}),
+}));
+
+const mockSearch = vi.fn();
+
+vi.mock('cosmiconfig', () => ({
+  cosmiconfig: vi.fn(() => ({
+    search: mockSearch,
+  })),
+}));
+
+let exitSpy: MockInstance;
+let stderrSpy: MockInstance;
+let stdoutSpy: MockInstance;
+
+// eslint-disable-next-line vitest/require-top-level-describe
+beforeEach(() => {
+  vi.clearAllMocks();
+  mockSearch.mockResolvedValue({
+    config: { daemon: './daemon.ts', runtime: 'tsx' },
+    filepath: '/mock/worktree/mm-client-cli.config.ts',
+    isEmpty: false,
+  });
+  exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {
+    throw new Error('process.exit');
+  }) as never);
+  stderrSpy = vi.spyOn(process.stderr, 'write').mockReturnValue(true);
+  stdoutSpy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+});
+
+// eslint-disable-next-line vitest/require-top-level-describe
+afterEach(() => {
+  exitSpy.mockRestore();
+  stderrSpy.mockRestore();
+  stdoutSpy.mockRestore();
+  vi.restoreAllMocks();
+});
+
+describe('extractProjectFlag', () => {
+  it('returns args unchanged when no --project flag', () => {
+    const result = extractProjectFlag(['launch', '--force']);
+    expect(result).toStrictEqual({
+      args: ['launch', '--force'],
+      projectPath: undefined,
+    });
+  });
+
+  it('extracts project path and removes flag from args', () => {
+    const result = extractProjectFlag([
+      '--project',
+      '/path/to/project',
+      'launch',
+    ]);
+    expect(result).toStrictEqual({
+      args: ['launch'],
+      projectPath: '/path/to/project',
+    });
+  });
+
+  it('handles --project in the middle of args', () => {
+    const result = extractProjectFlag([
+      'launch',
+      '--project',
+      '/my/path',
+      '--force',
+    ]);
+    expect(result).toStrictEqual({
+      args: ['launch', '--force'],
+      projectPath: '/my/path',
+    });
+  });
+
+  it('exits when --project has no value', () => {
+    expect(() => extractProjectFlag(['--project'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --project requires a path value\n',
+    );
+  });
+
+  it('exits when --project value starts with --', () => {
+    expect(() => extractProjectFlag(['--project', '--force'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --project requires a path value\n',
+    );
+  });
+});
+
+describe('resolveTargetFromArgs', () => {
+  it('returns selector for --selector flag', () => {
+    expect(resolveTargetFromArgs(['--selector', '.my-button'])).toStrictEqual({
+      selector: '.my-button',
+    });
+  });
+
+  it('returns testId for --testid flag', () => {
+    expect(resolveTargetFromArgs(['--testid', 'my-btn'])).toStrictEqual({
+      testId: 'my-btn',
+    });
+  });
+
+  it('returns a11yRef for e-number patterns', () => {
+    expect(resolveTargetFromArgs(['e3'])).toStrictEqual({ a11yRef: 'e3' });
+    expect(resolveTargetFromArgs(['e123'])).toStrictEqual({ a11yRef: 'e123' });
+  });
+
+  it('returns testId for non-e-number strings', () => {
+    expect(resolveTargetFromArgs(['submit-button'])).toStrictEqual({
+      testId: 'submit-button',
+    });
+    expect(resolveTargetFromArgs(['eabc'])).toStrictEqual({
+      testId: 'eabc',
+    });
+  });
+
+  it('exits when --selector has no value', () => {
+    expect(() => resolveTargetFromArgs(['--selector'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --selector requires a value\n',
+    );
+  });
+
+  it('exits when --selector value starts with --', () => {
+    expect(() => resolveTargetFromArgs(['--selector', '--other'])).toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('exits when --testid has no value', () => {
+    expect(() => resolveTargetFromArgs(['--testid'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --testid requires a value\n',
+    );
+  });
+
+  it('exits when no target provided', () => {
+    expect(() => resolveTargetFromArgs([])).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: element target is required\n',
+    );
+  });
+});
+
+describe('resolveWithinFromArgs', () => {
+  it('returns undefined when --within is not present', () => {
+    expect(resolveWithinFromArgs(['e1', '--timeout', '5000'])).toBeUndefined();
+    expect(resolveWithinFromArgs([])).toBeUndefined();
+  });
+
+  it('returns testId when value starts with "testid:"', () => {
+    expect(resolveWithinFromArgs(['--within', 'testid:parent'])).toStrictEqual({
+      testId: 'parent',
+    });
+  });
+
+  it('returns selector when value starts with "selector:"', () => {
+    expect(
+      resolveWithinFromArgs(['--within', 'selector:.container']),
+    ).toStrictEqual({
+      selector: '.container',
+    });
+  });
+
+  it('returns a11yRef when value matches /^e\\d+$/', () => {
+    expect(resolveWithinFromArgs(['--within', 'e1'])).toStrictEqual({
+      a11yRef: 'e1',
+    });
+    expect(resolveWithinFromArgs(['--within', 'e123'])).toStrictEqual({
+      a11yRef: 'e123',
+    });
+  });
+
+  it('returns testId for bare non-ref value', () => {
+    expect(resolveWithinFromArgs(['--within', 'parent-id'])).toStrictEqual({
+      testId: 'parent-id',
+    });
+    expect(resolveWithinFromArgs(['--within', 'eabc'])).toStrictEqual({
+      testId: 'eabc',
+    });
+  });
+
+  it('exits when --within has no value', () => {
+    expect(() => resolveWithinFromArgs(['--within'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --within requires a value\n',
+    );
+  });
+
+  it('exits when --within value starts with --', () => {
+    expect(() => resolveWithinFromArgs(['--within', '--other'])).toThrowError(
+      'process.exit',
+    );
+  });
+});
+
+describe('getPositionalTarget', () => {
+  it('returns first non-flag argument', () => {
+    expect(getPositionalTarget(['e1', '--timeout', '5000'])).toBe('e1');
+  });
+
+  it('skips flag-value pairs', () => {
+    expect(getPositionalTarget(['--timeout', '5000', 'e1'])).toBe('e1');
+  });
+
+  it('returns undefined for empty args', () => {
+    expect(getPositionalTarget([])).toBeUndefined();
+  });
+
+  it('returns undefined when only flags present', () => {
+    expect(getPositionalTarget(['--timeout', '5000'])).toBeUndefined();
+  });
+});
+
+describe('isTransientError', () => {
+  it('returns true for ECONNREFUSED', () => {
+    expect(isTransientError(new Error('ECONNREFUSED'))).toBe(true);
+  });
+
+  it('returns true for ECONNRESET', () => {
+    expect(isTransientError(new Error('ECONNRESET'))).toBe(true);
+  });
+
+  it('returns true for EPIPE', () => {
+    expect(isTransientError(new Error('EPIPE'))).toBe(true);
+  });
+
+  it('returns true for UND_ERR_SOCKET', () => {
+    expect(isTransientError(new Error('UND_ERR_SOCKET'))).toBe(true);
+  });
+
+  it('returns true for fetch failed', () => {
+    expect(isTransientError(new Error('fetch failed'))).toBe(true);
+  });
+
+  it('returns false for other errors', () => {
+    expect(isTransientError(new Error('timeout'))).toBe(false);
+    expect(isTransientError(new Error('404 not found'))).toBe(false);
+  });
+});
+
+describe('parseIntFlag', () => {
+  it('returns parsed integer value', () => {
+    expect(parseIntFlag(['--timeout', '5000'], '--timeout')).toBe(5000);
+  });
+
+  it('returns undefined when flag is absent', () => {
+    expect(parseIntFlag(['--other', '5000'], '--timeout')).toBeUndefined();
+  });
+
+  it('returns undefined for NaN values', () => {
+    expect(parseIntFlag(['--timeout', 'abc'], '--timeout')).toBeUndefined();
+  });
+
+  it('returns undefined when no value follows flag', () => {
+    expect(parseIntFlag(['--timeout'], '--timeout')).toBeUndefined();
+  });
+});
+
+describe('parseStringFlag', () => {
+  it('returns string value', () => {
+    expect(parseStringFlag(['--role', 'extension'], '--role')).toBe(
+      'extension',
+    );
+  });
+
+  it('returns undefined when flag is absent', () => {
+    expect(parseStringFlag(['--other', 'val'], '--role')).toBeUndefined();
+  });
+
+  it('returns undefined when value starts with --', () => {
+    expect(parseStringFlag(['--role', '--other'], '--role')).toBeUndefined();
+  });
+
+  it('returns undefined when no value follows', () => {
+    expect(parseStringFlag(['--role'], '--role')).toBeUndefined();
+  });
+});
+
+describe('parseLaunchArgs', () => {
+  it('returns empty object for no args', () => {
+    expect(parseLaunchArgs([])).toStrictEqual({});
+  });
+
+  it('parses --force flag', () => {
+    expect(parseLaunchArgs(['--force'])).toStrictEqual({ force: true });
+  });
+
+  it('parses --state value', () => {
+    expect(parseLaunchArgs(['--state', 'onboarding'])).toStrictEqual({
+      stateMode: 'onboarding',
+    });
+  });
+
+  it('parses --extension-path value', () => {
+    expect(parseLaunchArgs(['--extension-path', '/ext'])).toStrictEqual({
+      extensionPath: '/ext',
+    });
+  });
+
+  it('parses --goal value', () => {
+    expect(parseLaunchArgs(['--goal', 'test swap'])).toStrictEqual({
+      goal: 'test swap',
+    });
+  });
+
+  it('parses --flow-tags as comma-separated array', () => {
+    expect(parseLaunchArgs(['--flow-tags', 'send, swap'])).toStrictEqual({
+      flowTags: ['send', 'swap'],
+    });
+  });
+
+  it('parses multiple flags together', () => {
+    expect(
+      parseLaunchArgs(['--state', 'default', '--force', '--goal', 'test it']),
+    ).toStrictEqual({
+      stateMode: 'default',
+      force: true,
+      goal: 'test it',
+    });
+  });
+
+  it('exits for --state without value', () => {
+    expect(() => parseLaunchArgs(['--state'])).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --state requires a value (default|onboarding|custom)\n',
+    );
+  });
+
+  it('exits for --state with flag as value', () => {
+    expect(() => parseLaunchArgs(['--state', '--force'])).toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('exits for --extension-path without value', () => {
+    expect(() => parseLaunchArgs(['--extension-path'])).toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --extension-path requires a value\n',
+    );
+  });
+
+  it('exits for --goal without value', () => {
+    expect(() => parseLaunchArgs(['--goal'])).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith('Error: --goal requires a value\n');
+  });
+
+  it('exits for --flow-tags without value', () => {
+    expect(() => parseLaunchArgs(['--flow-tags'])).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --flow-tags requires a comma-separated value\n',
+    );
+  });
+
+  it('parses --context value', () => {
+    expect(parseLaunchArgs(['--context', 'prod'])).toStrictEqual({
+      context: 'prod',
+    });
+  });
+
+  it('parses --context with other flags', () => {
+    expect(
+      parseLaunchArgs(['--context', 'prod', '--state', 'onboarding']),
+    ).toStrictEqual({
+      context: 'prod',
+      stateMode: 'onboarding',
+    });
+  });
+
+  it('exits for --context without value', () => {
+    expect(() => parseLaunchArgs(['--context'])).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Error: --context requires a value (e2e|prod)\n',
+    );
+  });
+
+  it('exits for --context with flag as value', () => {
+    expect(() => parseLaunchArgs(['--context', '--force'])).toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('writes warning for unknown flags', () => {
+    parseLaunchArgs(['--unknown']);
+    expect(stderrSpy).toHaveBeenCalledWith(
+      "Warning: unknown launch flag '--unknown'\n",
+    );
+  });
+});
+
+describe('printHelp', () => {
+  it('writes help text to stdout', () => {
+    printHelp();
+    expect(stdoutSpy).toHaveBeenCalledTimes(1);
+    const output = (stdoutSpy.mock.calls[0] as string[])[0];
+    expect(output).toContain('mm — MetaMask CLI');
+    expect(output).toContain('Usage:');
+    expect(output).toContain('mm launch');
+  });
+});
+
+describe('resolveRuntime', () => {
+  it('returns node for node runtime', () => {
+    expect(resolveRuntime('/root', 'node')).toBe('node');
+  });
+
+  it('returns bin path when runtime exists', () => {
+    vi.mocked(existsSync).mockReturnValue(true);
+    const result = resolveRuntime('/root', 'tsx');
+    expect(result).toBe(path.join('/root', 'node_modules', '.bin', 'tsx'));
+  });
+
+  it('exits when runtime binary not found', () => {
+    vi.mocked(existsSync).mockReturnValue(false);
+    expect(() => resolveRuntime('/root', 'tsx')).toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining("Runtime 'tsx' not found"),
+    );
+  });
+});
+
+describe('sleep', () => {
+  it('resolves after delay', async () => {
+    vi.useFakeTimers();
+    const promise = sleep(100);
+    vi.advanceTimersByTime(100);
+    expect(await promise).toBeUndefined();
+    vi.useRealTimers();
+  });
+});
+
+describe('shutdownDaemon', () => {
+  it('sends SIGTERM and removes state', async () => {
+    const { removeDaemonState } = await import('../server/daemon-state.js');
+    const killSpy = vi
+      .spyOn(process, 'kill')
+      .mockImplementation(vi.fn() as unknown as typeof process.kill);
+
+    await shutdownDaemon('/root', {
+      port: 3000,
+      pid: 12345,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    });
+
+    expect(killSpy).toHaveBeenCalledWith(12345, 'SIGTERM');
+    expect(removeDaemonState).toHaveBeenCalledWith('/root');
+    killSpy.mockRestore();
+  });
+
+  it('ignores kill errors for dead processes', async () => {
+    const killSpy = vi.spyOn(process, 'kill').mockImplementation((() => {
+      throw new Error('ESRCH');
+    }) as unknown as typeof process.kill);
+
+    await shutdownDaemon('/root', {
+      port: 3000,
+      pid: 12345,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    });
+
+    expect(killSpy).toHaveBeenCalled();
+    killSpy.mockRestore();
+  });
+
+  it('skips kill when pid is falsy', async () => {
+    const killSpy = vi
+      .spyOn(process, 'kill')
+      .mockImplementation(vi.fn() as unknown as typeof process.kill);
+
+    await shutdownDaemon('/root', {
+      port: 3000,
+      pid: 0,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    });
+
+    expect(killSpy).not.toHaveBeenCalled();
+    killSpy.mockRestore();
+  });
+});
+
+describe('readDaemonConfig', () => {
+  it('reads and parses config from cosmiconfig', async () => {
+    mockSearch.mockResolvedValueOnce({
+      config: { daemon: './my-daemon.ts', runtime: 'tsx' },
+      filepath: '/project/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    const result = await readDaemonConfig('/project');
+
+    expect(result).toStrictEqual({
+      daemonPath: './my-daemon.ts',
+      runtime: 'tsx',
+    });
+    expect(cosmiconfig).toHaveBeenCalledWith('mm-client-cli', {
+      searchPlaces: [
+        'mm-client-cli.config.ts',
+        'mm-client-cli.config.js',
+        'mm-client-cli.config.cjs',
+        'mm-client-cli.config.mjs',
+        '.mm-client-clirc',
+        '.mm-client-clirc.json',
+        '.mm-client-clirc.yaml',
+        '.mm-client-clirc.yml',
+        '.mm-client-clirc.js',
+        '.mm-client-clirc.ts',
+        '.mm-client-clirc.cjs',
+      ],
+      stopDir: '/project',
+    });
+    expect(mockSearch).toHaveBeenCalledWith('/project');
+  });
+
+  it('defaults runtime to tsx when not specified', async () => {
+    mockSearch.mockResolvedValueOnce({
+      config: { daemon: './d.ts' },
+      filepath: '/project/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    const result = await readDaemonConfig('/project');
+
+    expect(result.runtime).toBe('tsx');
+  });
+
+  it('exits when no config file is found', async () => {
+    mockSearch.mockResolvedValueOnce(null);
+
+    await expect(readDaemonConfig('/project')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('No mm-client-cli config found'),
+    );
+  });
+
+  it('exits when config file is empty', async () => {
+    mockSearch.mockResolvedValueOnce({
+      config: undefined,
+      filepath: '/project/mm-client-cli.config.ts',
+      isEmpty: true,
+    });
+
+    await expect(readDaemonConfig('/project')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('No mm-client-cli config found'),
+    );
+  });
+
+  it('exits when daemon is not configured', async () => {
+    mockSearch.mockResolvedValueOnce({
+      config: { runtime: 'tsx' },
+      filepath: '/project/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    await expect(readDaemonConfig('/project')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('No daemon entry point configured'),
+    );
+  });
+});
+
+describe('resolveWorktreeRoot', () => {
+  it('resolves path from --project flag', async () => {
+    vi.mocked(fs.realpath).mockResolvedValueOnce('/resolved/path');
+    vi.mocked(fs.stat).mockResolvedValueOnce({
+      isDirectory: () => true,
+    } as any);
+
+    const result = await resolveWorktreeRoot('/some/path');
+    expect(result).toBe('/resolved/path');
+  });
+
+  it('resolves path from MM_PROJECT env when no flag', async () => {
+    const origEnv = process.env.MM_PROJECT;
+    process.env.MM_PROJECT = '/env/path';
+
+    vi.mocked(fs.realpath).mockResolvedValueOnce('/env/path');
+    vi.mocked(fs.stat).mockResolvedValueOnce({
+      isDirectory: () => true,
+    } as any);
+
+    const result = await resolveWorktreeRoot(undefined);
+    expect(result).toBe('/env/path');
+
+    process.env.MM_PROJECT = origEnv;
+  });
+
+  it('exits when path does not exist', async () => {
+    vi.mocked(fs.realpath).mockRejectedValueOnce(new Error('ENOENT'));
+
+    await expect(resolveWorktreeRoot('/bad/path')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('project path does not exist'),
+    );
+  });
+
+  it('exits when path is not a directory', async () => {
+    vi.mocked(fs.realpath).mockResolvedValueOnce('/some/file.txt');
+    vi.mocked(fs.stat).mockResolvedValueOnce({
+      isDirectory: () => false,
+    } as any);
+
+    await expect(resolveWorktreeRoot('/some/file.txt')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('project path is not a directory'),
+    );
+  });
+
+  it('exits when stat fails', async () => {
+    vi.mocked(fs.realpath).mockResolvedValueOnce('/some/path');
+    vi.mocked(fs.stat).mockRejectedValueOnce(new Error('EACCES'));
+
+    await expect(resolveWorktreeRoot('/some/path')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('cannot access project path'),
+    );
+  });
+
+  it('falls back to git worktree when no explicit path', async () => {
+    const origEnv = process.env.MM_PROJECT;
+    delete process.env.MM_PROJECT;
+
+    const { execSync } = await import('node:child_process');
+    vi.mocked(execSync).mockReturnValueOnce(Buffer.from('/git/root\n'));
+
+    const result = await resolveWorktreeRoot(undefined);
+    expect(result).toBe('/git/root');
+
+    process.env.MM_PROJECT = origEnv;
+  });
+
+  it('exits when not in a git repository', async () => {
+    const origEnv = process.env.MM_PROJECT;
+    delete process.env.MM_PROJECT;
+
+    const { execSync } = await import('node:child_process');
+    vi.mocked(execSync).mockImplementation(() => {
+      throw new Error('not a git repo');
+    });
+
+    await expect(resolveWorktreeRoot(undefined)).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('not in a git repository'),
+    );
+
+    process.env.MM_PROJECT = origEnv;
+  });
+});
+
+describe('sendRequest', () => {
+  const originalFetch = globalThis.fetch;
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it('sends GET request and prints JSON result', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, result: { status: 'running' } }),
+    } as Response);
+
+    await sendRequest(3000, 'GET', '/status', null);
+
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/status',
+      expect.objectContaining({ method: 'GET' }),
+    );
+    expect(stdoutSpy).toHaveBeenCalled();
+  });
+
+  it('sends POST request with JSON body', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, result: 'launched' }),
+    } as Response);
+
+    await sendRequest(3000, 'POST', '/launch', { state: 'default' });
+
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/launch',
+      expect.objectContaining({
+        method: 'POST',
+        body: '{"state":"default"}',
+        headers: { 'Content-Type': 'application/json' },
+      }),
+    );
+  });
+
+  it('prints string results directly', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, result: 'simple string' }),
+    } as Response);
+
+    await sendRequest(3000, 'GET', '/status', null);
+
+    expect(stdoutSpy).toHaveBeenCalledWith('simple string\n');
+  });
+
+  it('exits on error response', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: false,
+      json: async () => ({
+        ok: false,
+        error: { message: 'No session' },
+      }),
+    } as Response);
+
+    await expect(
+      sendRequest(3000, 'POST', '/tool/click', {}),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith('Error: No session\n');
+  });
+
+  it('exits on ok:false in response body', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({
+        ok: false,
+        error: { message: 'Tool failed' },
+      }),
+    } as Response);
+
+    await expect(
+      sendRequest(3000, 'POST', '/tool/click', {}),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith('Error: Tool failed\n');
+  });
+
+  it('retries transient errors', async () => {
+    let attempts = 0;
+    vi.spyOn(globalThis, 'fetch').mockImplementation(async () => {
+      attempts += 1;
+      if (attempts <= 2) {
+        throw new Error('ECONNREFUSED');
+      }
+      return {
+        ok: true,
+        json: async () => ({ ok: true, result: 'ok' }),
+      } as Response;
+    });
+
+    await sendRequest(3000, 'GET', '/health', null);
+
+    expect(attempts).toBe(3);
+  });
+
+  it('exits after max retries for transient errors', async () => {
+    vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED'));
+
+    await expect(
+      sendRequest(3000, 'GET', '/health', null),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('ECONNREFUSED'),
+    );
+  });
+
+  it('exits immediately for non-transient errors', async () => {
+    vi.spyOn(globalThis, 'fetch').mockRejectedValue(
+      new Error('some other error'),
+    );
+
+    await expect(
+      sendRequest(3000, 'GET', '/health', null),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('some other error'),
+    );
+  });
+
+  it('exits on request timeout (AbortError)', async () => {
+    const abortError = new Error('The operation was aborted');
+    abortError.name = 'AbortError';
+    vi.spyOn(globalThis, 'fetch').mockRejectedValue(abortError);
+
+    await expect(sendRequest(3000, 'POST', '/launch', {})).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('request timed out'),
+    );
+  });
+
+  it('falls back to data when no result key', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, status: 'running' }),
+    } as Response);
+
+    await sendRequest(3000, 'GET', '/status', null);
+
+    expect(stdoutSpy).toHaveBeenCalled();
+    const output = (stdoutSpy.mock.calls[0] as string[])[0];
+    expect(output).toContain('running');
+  });
+
+  it('falls back to "Request failed" when error has no message', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: false,
+      json: async () => ({ ok: false }),
+    } as Response);
+
+    await expect(sendRequest(3000, 'POST', '/tool/x', {})).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith('Error: Request failed\n');
+  });
+
+  it('reaches the final fallback after repeated transient failures when exit does not throw', async () => {
+    vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED'));
+    exitSpy.mockRestore();
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {
+      return undefined as never;
+    }) as never);
+
+    await sendRequest(3000, 'GET', '/health', null);
+
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('request failed after 4 attempts'),
+    );
+    expect(process.exit).toHaveBeenCalledWith(1);
+  });
+});
+
+describe('routeCommand', () => {
+  const originalFetch = globalThis.fetch;
+
+  beforeEach(() => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, result: {} }),
+    } as Response);
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it('routes status to GET /status', async () => {
+    await routeCommand('status', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/status',
+      expect.objectContaining({ method: 'GET' }),
+    );
+  });
+
+  it('routes click with a11y ref', async () => {
+    await routeCommand('click', ['e3'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/click',
+      expect.objectContaining({
+        body: JSON.stringify({ a11yRef: 'e3' }),
+      }),
+    );
+  });
+
+  it('routes click with --selector', async () => {
+    await routeCommand('click', ['--selector', '.btn'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/click',
+      expect.objectContaining({
+        body: JSON.stringify({ selector: '.btn' }),
+      }),
+    );
+  });
+
+  it('exits when click has no target', async () => {
+    await expect(routeCommand('click', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('Usage: mm click'),
+    );
+  });
+
+  it('routes type with ref and text', async () => {
+    await routeCommand('type', ['e1', 'hello'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/type',
+      expect.objectContaining({
+        body: JSON.stringify({ a11yRef: 'e1', text: 'hello' }),
+      }),
+    );
+  });
+
+  it('routes type with --testid', async () => {
+    await routeCommand('type', ['--testid', 'input', 'text'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/type',
+      expect.objectContaining({
+        body: JSON.stringify({ testId: 'input', text: 'text' }),
+      }),
+    );
+  });
+
+  it('exits when type has no target', async () => {
+    await expect(routeCommand('type', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('exits when type has no text', async () => {
+    await expect(routeCommand('type', ['e1'], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith('Usage: mm type <ref> <text>\n');
+  });
+
+  it('routes describe-screen', async () => {
+    await routeCommand('describe-screen', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/describe_screen',
+      expect.anything(),
+    );
+  });
+
+  it('routes screenshot', async () => {
+    await routeCommand('screenshot', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/screenshot',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes screenshot with --name', async () => {
+    await routeCommand('screenshot', ['--name', 'my-shot'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/screenshot',
+      expect.objectContaining({
+        body: JSON.stringify({ name: 'my-shot' }),
+      }),
+    );
+  });
+
+  it('routes wait-for with ref', async () => {
+    await routeCommand('wait-for', ['e5'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/wait_for',
+      expect.objectContaining({
+        body: JSON.stringify({ a11yRef: 'e5' }),
+      }),
+    );
+  });
+
+  it('routes wait-for with --timeout', async () => {
+    await routeCommand('wait-for', ['e5', '--timeout', '10000'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/wait_for',
+      expect.objectContaining({
+        body: JSON.stringify({ a11yRef: 'e5', timeoutMs: 10000 }),
+      }),
+    );
+  });
+
+  it('exits when wait-for has no target', async () => {
+    await expect(routeCommand('wait-for', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes navigate with url', async () => {
+    await routeCommand('navigate', ['http://example.com'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/navigate',
+      expect.objectContaining({
+        body: JSON.stringify({ screen: 'url', url: 'http://example.com' }),
+      }),
+    );
+  });
+
+  it('exits when navigate has no url', async () => {
+    await expect(routeCommand('navigate', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes navigate-home', async () => {
+    await routeCommand('navigate-home', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/navigate',
+      expect.objectContaining({
+        body: JSON.stringify({ screen: 'home' }),
+      }),
+    );
+  });
+
+  it('routes navigate-settings', async () => {
+    await routeCommand('navigate-settings', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/navigate',
+      expect.objectContaining({
+        body: JSON.stringify({ screen: 'settings' }),
+      }),
+    );
+  });
+
+  it('routes get-state', async () => {
+    await routeCommand('get-state', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_state',
+      expect.anything(),
+    );
+  });
+
+  it('routes get-context', async () => {
+    await routeCommand('get-context', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_context',
+      expect.anything(),
+    );
+  });
+
+  it('routes set-context with e2e', async () => {
+    await routeCommand('set-context', ['e2e'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/set_context',
+      expect.objectContaining({
+        body: JSON.stringify({ context: 'e2e' }),
+      }),
+    );
+  });
+
+  it('routes set-context with prod', async () => {
+    await routeCommand('set-context', ['prod'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/set_context',
+      expect.objectContaining({
+        body: JSON.stringify({ context: 'prod' }),
+      }),
+    );
+  });
+
+  it('exits when set-context has invalid value', async () => {
+    await expect(
+      routeCommand('set-context', ['other'], 3000),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      'Usage: mm set-context <e2e|prod>\n',
+    );
+  });
+
+  it('exits when set-context has no value', async () => {
+    await expect(routeCommand('set-context', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes build', async () => {
+    await routeCommand('build', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/build',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes build with --force', async () => {
+    await routeCommand('build', ['--force'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/build',
+      expect.objectContaining({
+        body: JSON.stringify({ force: true }),
+      }),
+    );
+  });
+
+  it('routes wait-for-notification', async () => {
+    await routeCommand('wait-for-notification', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/wait_for_notification',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes wait-for-notification with --timeout', async () => {
+    await routeCommand('wait-for-notification', ['--timeout', '5000'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/wait_for_notification',
+      expect.objectContaining({
+        body: JSON.stringify({ timeoutMs: 5000 }),
+      }),
+    );
+  });
+
+  it('routes switch-to-tab with --role', async () => {
+    await routeCommand('switch-to-tab', ['--role', 'extension'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/switch_to_tab',
+      expect.objectContaining({
+        body: JSON.stringify({ role: 'extension' }),
+      }),
+    );
+  });
+
+  it('routes switch-to-tab with --url', async () => {
+    await routeCommand('switch-to-tab', ['--url', 'http://dapp.io'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/switch_to_tab',
+      expect.objectContaining({
+        body: JSON.stringify({ url: 'http://dapp.io' }),
+      }),
+    );
+  });
+
+  it('routes switch-to-tab with positional role', async () => {
+    await routeCommand('switch-to-tab', ['dapp'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/switch_to_tab',
+      expect.objectContaining({
+        body: JSON.stringify({ role: 'dapp' }),
+      }),
+    );
+  });
+
+  it('exits when switch-to-tab has no flags', async () => {
+    await expect(routeCommand('switch-to-tab', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes get-text with positional ref', async () => {
+    await routeCommand('get-text', ['e1'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_text',
+      expect.objectContaining({
+        body: JSON.stringify({ a11yRef: 'e1' }),
+      }),
+    );
+  });
+
+  it('routes get-text with --testid', async () => {
+    await routeCommand('get-text', ['--testid', 'result-box'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_text',
+      expect.objectContaining({
+        body: JSON.stringify({ testId: 'result-box' }),
+      }),
+    );
+  });
+
+  it('routes get-text with --selector', async () => {
+    await routeCommand('get-text', ['--selector', '#output'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_text',
+      expect.objectContaining({
+        body: JSON.stringify({ selector: '#output' }),
+      }),
+    );
+  });
+
+  it('routes get-text with --within scoping', async () => {
+    await routeCommand(
+      'get-text',
+      ['--testid', 'amount', '--within', 'testid:tx-row'],
+      3000,
+    );
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_text',
+      expect.objectContaining({
+        body: JSON.stringify({
+          testId: 'amount',
+          within: { testId: 'tx-row' },
+        }),
+      }),
+    );
+  });
+
+  it('exits when get-text has no target', async () => {
+    await expect(routeCommand('get-text', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes close-tab with --role', async () => {
+    await routeCommand('close-tab', ['--role', 'dapp'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/close_tab',
+      expect.objectContaining({
+        body: JSON.stringify({ role: 'dapp' }),
+      }),
+    );
+  });
+
+  it('routes close-tab with --url', async () => {
+    await routeCommand('close-tab', ['--url', 'http://x.io'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/close_tab',
+      expect.objectContaining({
+        body: JSON.stringify({ url: 'http://x.io' }),
+      }),
+    );
+  });
+
+  it('exits when close-tab has no flags', async () => {
+    await expect(routeCommand('close-tab', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes clipboard read', async () => {
+    await routeCommand('clipboard', ['read'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/clipboard',
+      expect.objectContaining({
+        body: JSON.stringify({ action: 'read' }),
+      }),
+    );
+  });
+
+  it('routes clipboard write with text', async () => {
+    await routeCommand('clipboard', ['write', 'hello'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/clipboard',
+      expect.objectContaining({
+        body: JSON.stringify({ action: 'write', text: 'hello' }),
+      }),
+    );
+  });
+
+  it('exits when clipboard has invalid action', async () => {
+    await expect(
+      routeCommand('clipboard', ['invalid'], 3000),
+    ).rejects.toThrowError('process.exit');
+  });
+
+  it('exits when clipboard has no action', async () => {
+    await expect(routeCommand('clipboard', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('exits when clipboard write has no text', async () => {
+    await expect(
+      routeCommand('clipboard', ['write'], 3000),
+    ).rejects.toThrowError('process.exit');
+  });
+
+  it('routes seed-contract', async () => {
+    await routeCommand('seed-contract', ['hst'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/seed_contract',
+      expect.objectContaining({
+        body: JSON.stringify({ contractName: 'hst' }),
+      }),
+    );
+  });
+
+  it('routes seed-contract with --hardfork', async () => {
+    await routeCommand(
+      'seed-contract',
+      ['hst', '--hardfork', 'shanghai'],
+      3000,
+    );
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/seed_contract',
+      expect.objectContaining({
+        body: JSON.stringify({ contractName: 'hst', hardfork: 'shanghai' }),
+      }),
+    );
+  });
+
+  it('exits when seed-contract has no name', async () => {
+    await expect(routeCommand('seed-contract', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes seed-contracts with multiple names', async () => {
+    await routeCommand('seed-contracts', ['hst', 'nfts'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/seed_contracts',
+      expect.objectContaining({
+        body: JSON.stringify({ contracts: ['hst', 'nfts'] }),
+      }),
+    );
+  });
+
+  it('routes seed-contracts with --hardfork', async () => {
+    await routeCommand(
+      'seed-contracts',
+      ['hst', '--hardfork', 'shanghai'],
+      3000,
+    );
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/seed_contracts',
+      expect.objectContaining({
+        body: expect.stringContaining('"hardfork":"shanghai"'),
+      }),
+    );
+  });
+
+  it('exits when seed-contracts has no names', async () => {
+    await expect(routeCommand('seed-contracts', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('routes get-contract-address', async () => {
+    await routeCommand('get-contract-address', ['hst'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/get_contract_address',
+      expect.objectContaining({
+        body: JSON.stringify({ contractName: 'hst' }),
+      }),
+    );
+  });
+
+  it('exits when get-contract-address has no name', async () => {
+    await expect(
+      routeCommand('get-contract-address', [], 3000),
+    ).rejects.toThrowError('process.exit');
+  });
+
+  it('routes list-contracts', async () => {
+    await routeCommand('list-contracts', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/list_contracts',
+      expect.anything(),
+    );
+  });
+
+  it('routes list-testids', async () => {
+    await routeCommand('list-testids', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/list_testids',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes list-testids with --limit', async () => {
+    await routeCommand('list-testids', ['--limit', '50'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/list_testids',
+      expect.objectContaining({
+        body: JSON.stringify({ limit: 50 }),
+      }),
+    );
+  });
+
+  it('routes accessibility-snapshot', async () => {
+    await routeCommand('accessibility-snapshot', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/accessibility_snapshot',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes accessibility-snapshot with --root', async () => {
+    await routeCommand('accessibility-snapshot', ['--root', '#main'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/accessibility_snapshot',
+      expect.objectContaining({
+        body: JSON.stringify({ rootSelector: '#main' }),
+      }),
+    );
+  });
+
+  it('routes knowledge-search', async () => {
+    await routeCommand('knowledge-search', ['send flow'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/knowledge_search',
+      expect.objectContaining({
+        body: JSON.stringify({ query: 'send flow' }),
+      }),
+    );
+  });
+
+  it('exits when knowledge-search has no query', async () => {
+    await expect(
+      routeCommand('knowledge-search', [], 3000),
+    ).rejects.toThrowError('process.exit');
+  });
+
+  it('routes knowledge-last', async () => {
+    await routeCommand('knowledge-last', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/knowledge_last',
+      expect.anything(),
+    );
+  });
+
+  it('routes knowledge-sessions', async () => {
+    await routeCommand('knowledge-sessions', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/knowledge_sessions',
+      expect.anything(),
+    );
+  });
+
+  it('routes knowledge-summarize', async () => {
+    await routeCommand('knowledge-summarize', [], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/knowledge_summarize',
+      expect.objectContaining({ body: JSON.stringify({}) }),
+    );
+  });
+
+  it('routes knowledge-summarize with --session', async () => {
+    await routeCommand('knowledge-summarize', ['--session', 'sid'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/knowledge_summarize',
+      expect.objectContaining({
+        body: JSON.stringify({ scope: { sessionId: 'sid' } }),
+      }),
+    );
+  });
+
+  it('routes run-steps with JSON input', async () => {
+    const input = JSON.stringify({
+      steps: [{ tool: 'click', args: { a11yRef: 'e1' } }],
+    });
+    await routeCommand('run-steps', [input], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/run_steps',
+      expect.objectContaining({ body: input }),
+    );
+  });
+
+  it('exits when run-steps has no input', async () => {
+    await expect(routeCommand('run-steps', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+  });
+
+  it('exits when run-steps has invalid JSON', async () => {
+    await expect(
+      routeCommand('run-steps', ['{bad json}'], 3000),
+    ).rejects.toThrowError('process.exit');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('invalid JSON'),
+    );
+  });
+
+  it('exits for unknown command', async () => {
+    await expect(routeCommand('unknown-cmd', [], 3000)).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining("unknown command 'unknown-cmd'"),
+    );
+  });
+});
+
+describe('discoverDaemon', () => {
+  it('returns existing alive daemon with matching version', async () => {
+    const { readDaemonState, isDaemonAlive, isDaemonVersionMatch } =
+      await import('../server/daemon-state.js');
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      version: '1.0.0',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+    vi.mocked(isDaemonVersionMatch).mockReturnValueOnce(true);
+
+    const result = await discoverDaemon('/root', 'click');
+    expect(result).toStrictEqual(mockState);
+  });
+
+  it('restarts daemon on version mismatch', async () => {
+    const {
+      readDaemonState,
+      isDaemonAlive,
+      isDaemonVersionMatch,
+      removeDaemonState,
+    } = await import('../server/daemon-state.js');
+
+    const killSpy = vi
+      .spyOn(process, 'kill')
+      .mockImplementation(vi.fn() as unknown as typeof process.kill);
+
+    const oldState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      version: '0.0.1',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(oldState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+    vi.mocked(isDaemonVersionMatch).mockReturnValueOnce(false);
+
+    await expect(discoverDaemon('/root', 'click')).rejects.toThrowError(
+      'process.exit',
+    );
+
+    expect(removeDaemonState).toHaveBeenCalledWith('/root');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('Daemon version mismatch'),
+    );
+
+    killSpy.mockRestore();
+  });
+
+  it('auto-starts daemon for launch command when no daemon running', async () => {
+    const {
+      readDaemonState,
+      isDaemonAlive,
+      acquireStartupLock,
+      releaseStartupLock,
+    } = await import('../server/daemon-state.js');
+
+    vi.mocked(readDaemonState).mockResolvedValueOnce(null);
+
+    vi.mocked(acquireStartupLock).mockResolvedValueOnce(true);
+
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    const result = await discoverDaemon('/root', 'launch');
+
+    expect(result).toStrictEqual(mockState);
+    expect(releaseStartupLock).toHaveBeenCalledWith('/root');
+  });
+
+  it('removes stale daemon state when not alive', async () => {
+    const { readDaemonState, isDaemonAlive, removeDaemonState } =
+      await import('../server/daemon-state.js');
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(false);
+
+    await expect(discoverDaemon('/root', 'click')).rejects.toThrowError(
+      'process.exit',
+    );
+
+    expect(removeDaemonState).toHaveBeenCalledWith('/root');
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('no daemon running'),
+    );
+  });
+
+  it('exits for non-auto-start commands when no daemon', async () => {
+    const { readDaemonState } = await import('../server/daemon-state.js');
+    vi.mocked(readDaemonState).mockResolvedValueOnce(null);
+
+    await expect(discoverDaemon('/root', 'status')).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('no daemon running'),
+    );
+  });
+});
+
+describe('waitForDaemon', () => {
+  it('returns daemon state when daemon becomes alive', async () => {
+    const { readDaemonState, isDaemonAlive } =
+      await import('../server/daemon-state.js');
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    vi.useFakeTimers();
+    const promise = waitForDaemon('/root');
+    for (let i = 0; i < 3; i++) {
+      await vi.advanceTimersByTimeAsync(200);
+    }
+    const result = await promise;
+    vi.useRealTimers();
+
+    expect(result).toStrictEqual(mockState);
+  });
+
+  it('throws when daemon fails to start within timeout', async () => {
+    const { readDaemonState } = await import('../server/daemon-state.js');
+    vi.mocked(readDaemonState).mockResolvedValue(null);
+
+    vi.useFakeTimers();
+    const promise = waitForDaemon('/root').catch((error: Error) => error);
+    for (let i = 0; i < 55; i++) {
+      await vi.advanceTimersByTimeAsync(200);
+    }
+    const result = await promise;
+    expect(result).toBeInstanceOf(Error);
+    expect((result as Error).message).toContain('Daemon failed to start');
+    vi.useRealTimers();
+  });
+});
+
+describe('main', () => {
+  it('prints help when no args', async () => {
+    const origArgv = process.argv;
+    process.argv = ['node', 'mm'];
+
+    await expect(main()).rejects.toThrowError('process.exit');
+    expect(stdoutSpy).toHaveBeenCalledWith(expect.stringContaining('mm —'));
+
+    process.argv = origArgv;
+  });
+
+  it('prints help for --help flag', async () => {
+    const origArgv = process.argv;
+    process.argv = ['node', 'mm', '--help'];
+
+    await expect(main()).rejects.toThrowError('process.exit');
+    expect(stdoutSpy).toHaveBeenCalledWith(expect.stringContaining('Usage:'));
+
+    process.argv = origArgv;
+  });
+
+  it('prints help for -h flag', async () => {
+    const origArgv = process.argv;
+    process.argv = ['node', 'mm', '-h'];
+
+    await expect(main()).rejects.toThrowError('process.exit');
+
+    process.argv = origArgv;
+  });
+});
+
+describe('type command --selector/--testid text resolution', () => {
+  const originalFetch = globalThis.fetch;
+
+  beforeEach(() => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      json: async () => ({ ok: true, result: {} }),
+    } as Response);
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it('routes type with --selector and text after selector', async () => {
+    await routeCommand('type', ['--selector', '.input', 'hello world'], 3000);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      'http://127.0.0.1:3000/tool/type',
+      expect.objectContaining({
+        body: JSON.stringify({
+          selector: '.input',
+          text: 'hello world',
+        }),
+      }),
+    );
+  });
+});
+
+describe('handleServe', () => {
+  it('exits when daemon is already running', async () => {
+    const { readDaemonState, isDaemonAlive } =
+      await import('../server/daemon-state.js');
+    vi.mocked(readDaemonState).mockResolvedValueOnce({
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    });
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    await expect(handleServe('/root', false)).rejects.toThrowError(
+      'process.exit',
+    );
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('daemon already running'),
+    );
+  });
+
+  it('starts daemon in background mode', async () => {
+    const { readDaemonState, isDaemonAlive } =
+      await import('../server/daemon-state.js');
+    const { spawn } = await import('node:child_process');
+
+    vi.mocked(readDaemonState).mockResolvedValueOnce(null);
+
+    vi.mocked(existsSync).mockReturnValue(true);
+    mockSearch.mockResolvedValueOnce({
+      config: { daemon: './daemon.ts', runtime: 'node' },
+      filepath: '/root/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    const mockState = {
+      port: 4000,
+      pid: 456,
+      nonce: 'xyz',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    vi.useFakeTimers();
+    const promise = handleServe('/root', true);
+    for (let i = 0; i < 3; i++) {
+      await vi.advanceTimersByTimeAsync(200);
+    }
+    await promise;
+    vi.useRealTimers();
+
+    expect(spawn).toHaveBeenCalledWith('node', ['./daemon.ts'], {
+      detached: true,
+      stdio: ['ignore', 'ignore', 'ignore'],
+      cwd: '/root',
+    });
+    expect(stdoutSpy).toHaveBeenCalledWith(
+      'Daemon started on port 4000 (PID 456)\n',
+    );
+  });
+
+  it('cleans stale state before starting', async () => {
+    const { readDaemonState, isDaemonAlive, removeDaemonState } =
+      await import('../server/daemon-state.js');
+    const staleState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(staleState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(false);
+
+    vi.mocked(existsSync).mockReturnValue(true);
+    mockSearch.mockResolvedValueOnce({
+      config: { daemon: './d.ts', runtime: 'node' },
+      filepath: '/root/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    const { spawn } = await import('node:child_process');
+    vi.mocked(spawn).mockReturnValue({
+      stdio: 'inherit',
+      on: vi.fn((event: string, handler: (code: number | null) => void) => {
+        if (event === 'exit') {
+          setTimeout(() => handler(0), 10);
+        }
+      }),
+    } as any);
+
+    const promise = handleServe('/root', false);
+    await new Promise((resolve) => setTimeout(resolve, 50));
+    await promise;
+
+    expect(removeDaemonState).toHaveBeenCalledWith('/root');
+  });
+});
+
+describe('autoStartDaemon', () => {
+  it('returns existing daemon if one appeared after locking', async () => {
+    const {
+      acquireStartupLock,
+      readDaemonState,
+      isDaemonAlive,
+      releaseStartupLock,
+    } = await import('../server/daemon-state.js');
+
+    vi.mocked(acquireStartupLock).mockResolvedValueOnce(true);
+
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState).mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    const result = await autoStartDaemon('/root');
+
+    expect(result).toStrictEqual(mockState);
+    expect(releaseStartupLock).toHaveBeenCalledWith('/root');
+  });
+
+  it('spawns daemon when no existing daemon is found', async () => {
+    const {
+      acquireStartupLock,
+      readDaemonState,
+      isDaemonAlive,
+      releaseStartupLock,
+    } = await import('../server/daemon-state.js');
+    const { spawn } = await import('node:child_process');
+
+    vi.mocked(acquireStartupLock).mockResolvedValueOnce(true);
+    vi.mocked(readDaemonState).mockResolvedValueOnce(null);
+
+    vi.mocked(existsSync).mockReturnValue(true);
+    mockSearch.mockResolvedValueOnce({
+      config: { daemon: './daemon.ts', runtime: 'node' },
+      filepath: '/root/mm-client-cli.config.ts',
+      isEmpty: false,
+    });
+
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    vi.useFakeTimers();
+    const promise = autoStartDaemon('/root');
+    for (let i = 0; i < 3; i++) {
+      await vi.advanceTimersByTimeAsync(200);
+    }
+    const result = await promise;
+    vi.useRealTimers();
+
+    expect(spawn).toHaveBeenCalledWith('node', ['./daemon.ts'], {
+      detached: true,
+      stdio: ['ignore', 'ignore', 'ignore'],
+      cwd: '/root',
+    });
+    expect(releaseStartupLock).toHaveBeenCalledWith('/root');
+    expect(result).toStrictEqual(mockState);
+  });
+
+  it('waits when lock is held by another process', async () => {
+    const { acquireStartupLock, readDaemonState, isDaemonAlive } =
+      await import('../server/daemon-state.js');
+
+    vi.mocked(acquireStartupLock).mockResolvedValueOnce(false);
+
+    const mockState = {
+      port: 3000,
+      pid: 123,
+      nonce: 'abc',
+      startedAt: '2024-01-01',
+      subPorts: { anvil: 8545, fixture: 8546, mock: 8547 },
+    };
+    vi.mocked(readDaemonState)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(mockState);
+    vi.mocked(isDaemonAlive).mockResolvedValueOnce(true);
+
+    vi.useFakeTimers();
+    const promise = autoStartDaemon('/root');
+    for (let i = 0; i < 3; i++) {
+      await vi.advanceTimersByTimeAsync(200);
+    }
+    const result = await promise;
+    vi.useRealTimers();
+
+    expect(result).toStrictEqual(mockState);
+  });
+});
diff --git a/src/cli/mm.ts b/src/cli/mm.ts
new file mode 100644
index 0000000..ed8a896
--- /dev/null
+++ b/src/cli/mm.ts
@@ -0,0 +1,1139 @@
+#!/usr/bin/env node
+import { cosmiconfig } from 'cosmiconfig';
+import { execSync, spawn } from 'node:child_process';
+import { existsSync } from 'node:fs';
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+
+import pkg from '../../package.json';
+import {
+  acquireStartupLock,
+  isDaemonAlive,
+  isDaemonVersionMatch,
+  readDaemonState,
+  releaseStartupLock,
+  removeDaemonState,
+} from '../server/daemon-state.js';
+import type { DaemonState } from '../types/http.js';
+
+const COMMAND_TIMEOUTS_MS: Record<string, number> = {
+  launch: 120_000,
+  cleanup: 30_000,
+  default: 30_000,
+};
+
+const AUTO_START_COMMANDS = new Set(['launch', 'serve']);
+
+const DAEMON_POLL_INTERVAL_MS = 200;
+const DAEMON_POLL_MAX_ATTEMPTS = 50; // 50 * 200ms = 10s
+const SEND_MAX_RETRIES = 3;
+const SEND_RETRY_BASE_DELAY_MS = 200;
+const CONFIG_MODULE_NAME = 'mm-client-cli';
+
+/**
+ * Configuration shape for mm-client-cli config files.
+ * Used in mm-client-cli.config.ts or equivalent.
+ */
+export type MmClientCliConfig = {
+  /** Path to the daemon entry point (TypeScript or JavaScript file). */
+  daemon: string;
+  /** TypeScript runner to use. Defaults to 'tsx'. */
+  runtime?: string;
+};
+
+type DaemonConfig = {
+  daemonPath: string;
+  runtime: string;
+};
+
+/**
+ * Extracts and consumes the `--project <path>` flag from argv, returning
+ * the remaining args and the extracted project path (if any).
+ *
+ * @param argv - Raw CLI arguments (after the node/script entries).
+ * @returns The remaining arguments and the optional project path.
+ */
+export function extractProjectFlag(argv: string[]): {
+  args: string[];
+  projectPath: string | undefined;
+} {
+  const idx = argv.indexOf('--project');
+  if (idx < 0) {
+    return { args: argv, projectPath: undefined };
+  }
+  const value = argv[idx + 1];
+  if (!value || value.startsWith('--')) {
+    process.stderr.write('Error: --project requires a path value\n');
+    process.exit(1);
+  }
+  const remaining = [...argv.slice(0, idx), ...argv.slice(idx + 2)];
+  return { args: remaining, projectPath: value };
+}
+
+/**
+ * Resolves the target project root directory using the following precedence:
+ *   1. `--project <path>` CLI flag
+ *   2. `MM_PROJECT` environment variable
+ *   3. `git rev-parse --show-toplevel` (current working directory)
+ *
+ * Both explicit sources accept absolute or relative paths (resolved from cwd).
+ * The resolved path is normalized via `fs.realpath` to handle symlinks.
+ *
+ * @param projectFlag - The value of `--project`, if provided.
+ * @returns The absolute, real path to the project root.
+ */
+export async function resolveWorktreeRoot(
+  projectFlag: string | undefined,
+): Promise<string> {
+  const explicit = projectFlag ?? process.env.MM_PROJECT;
+
+  if (explicit) {
+    const resolved = path.resolve(process.cwd(), explicit);
+    let real: string;
+    try {
+      real = await fs.realpath(resolved);
+    } catch {
+      process.stderr.write(`Error: project path does not exist: ${resolved}\n`);
+      process.exit(1);
+    }
+
+    try {
+      const stat = await fs.stat(real);
+      if (!stat.isDirectory()) {
+        process.stderr.write(
+          `Error: project path is not a directory: ${real}\n`,
+        );
+        process.exit(1);
+      }
+    } catch {
+      process.stderr.write(`Error: cannot access project path: ${real}\n`);
+      process.exit(1);
+    }
+
+    return real;
+  }
+
+  try {
+    return execSync('git rev-parse --show-toplevel', {
+      stdio: ['pipe', 'pipe', 'pipe'],
+    })
+      .toString()
+      .trim();
+  } catch {
+    process.stderr.write(
+      'Error: not in a git repository. Use --project <path> or set MM_PROJECT to target a project.\n',
+    );
+    return process.exit(1);
+  }
+}
+
+/**
+ * CLI entry point that parses arguments and routes to the appropriate handler.
+ */
+export async function main(): Promise<void> {
+  const { args: remainingArgs, projectPath } = extractProjectFlag(
+    process.argv.slice(2),
+  );
+
+  if (
+    remainingArgs.length === 0 ||
+    remainingArgs[0] === '--help' ||
+    remainingArgs[0] === '-h'
+  ) {
+    printHelp();
+    process.exit(0);
+  }
+
+  const worktreeRoot = await resolveWorktreeRoot(projectPath);
+  const args = remainingArgs;
+  const command = args[0];
+
+  // mm serve manages daemon lifecycle directly (no discovery needed)
+  if (command === 'serve') {
+    const background = args.includes('--background');
+    await handleServe(worktreeRoot, background);
+    return;
+  }
+
+  // Discover existing daemon or auto-start for launch
+  const daemonState = await discoverDaemon(worktreeRoot, command);
+
+  if (command === 'launch') {
+    const launchArgs = parseLaunchArgs(args.slice(1));
+    await sendRequest(daemonState.port, 'POST', '/launch', launchArgs);
+    return;
+  }
+
+  if (command === 'cleanup') {
+    const shutdown = args.includes('--shutdown');
+    await sendRequest(daemonState.port, 'POST', '/cleanup', {});
+    if (shutdown) {
+      await shutdownDaemon(worktreeRoot, daemonState);
+    }
+    return;
+  }
+
+  await routeCommand(command, args.slice(1), daemonState.port);
+}
+
+/**
+ * Resolves `--within` scoping from CLI arguments.
+ *
+ * @param args - The CLI arguments to scan.
+ * @returns A within target object, or undefined if `--within` is absent.
+ */
+export function resolveWithinFromArgs(
+  args: string[],
+): { a11yRef: string } | { testId: string } | { selector: string } | undefined {
+  const withinIdx = args.indexOf('--within');
+  if (withinIdx < 0) {
+    return undefined;
+  }
+  const val = args[withinIdx + 1];
+  if (!val || val.startsWith('--')) {
+    process.stderr.write('Error: --within requires a value\n');
+    process.exit(1);
+  }
+
+  // "testid:value" → testId, "selector:value" → selector, otherwise auto-detect
+  if (val.startsWith('testid:')) {
+    return { testId: val.slice('testid:'.length) };
+  }
+  if (val.startsWith('selector:')) {
+    return { selector: val.slice('selector:'.length) };
+  }
+  return /^e[0-9]+$/u.test(val) ? { a11yRef: val } : { testId: val };
+}
+
+/**
+ * Resolves element targeting from CLI arguments. Supports three targeting modes:
+ * --selector <css>  → CSS selector (explicit)
+ * --testid <id>     → data-testid value (explicit)
+ * positional arg    → a11yRef if /^e\d+$/, otherwise testId (auto-detected)
+ *
+ * @param args - The CLI arguments after the command name.
+ * @returns An object with exactly one of `a11yRef`, `testId`, or `selector`.
+ */
+export function resolveTargetFromArgs(
+  args: string[],
+): { a11yRef: string } | { testId: string } | { selector: string } {
+  const selectorIdx = args.indexOf('--selector');
+  if (selectorIdx >= 0) {
+    const val = args[selectorIdx + 1];
+    if (!val || val.startsWith('--')) {
+      process.stderr.write('Error: --selector requires a value\n');
+      process.exit(1);
+    }
+    return { selector: val };
+  }
+
+  const testIdIdx = args.indexOf('--testid');
+  if (testIdIdx >= 0) {
+    const val = args[testIdIdx + 1];
+    if (!val || val.startsWith('--')) {
+      process.stderr.write('Error: --testid requires a value\n');
+      process.exit(1);
+    }
+    return { testId: val };
+  }
+
+  const target = args[0];
+  if (!target) {
+    process.stderr.write('Error: element target is required\n');
+    process.exit(1);
+  }
+  return /^e[0-9]+$/u.test(target) ? { a11yRef: target } : { testId: target };
+}
+
+/**
+ * Returns the positional target argument from a CLI args list,
+ * skipping any --flag/value pairs.
+ *
+ * @param args - The CLI arguments to scan.
+ * @returns The first non-flag argument, or undefined.
+ */
+export function getPositionalTarget(args: string[]): string | undefined {
+  for (let i = 0; i < args.length; i++) {
+    if (args[i].startsWith('--')) {
+      i += 1;
+      continue;
+    }
+    return args[i];
+  }
+  return undefined;
+}
+
+/**
+ * Routes a CLI command to the appropriate daemon HTTP endpoint.
+ *
+ * @param command - The CLI command to route.
+ * @param args - Additional arguments for the command.
+ * @param port - The daemon HTTP server port.
+ */
+export async function routeCommand(
+  command: string,
+  args: string[],
+  port: number,
+): Promise<void> {
+  switch (command) {
+    case 'status':
+      await sendRequest(port, 'GET', '/status', null);
+      break;
+    case 'click': {
+      const target = getPositionalTarget(args);
+      if (
+        !target &&
+        !args.includes('--selector') &&
+        !args.includes('--testid')
+      ) {
+        process.stderr.write(
+          'Usage: mm click <ref> [--selector <css>] [--testid <id>] [--within <scope>]\n',
+        );
+        process.exit(1);
+      }
+      const clickWithin = resolveWithinFromArgs(args);
+      await sendRequest(port, 'POST', '/tool/click', {
+        ...resolveTargetFromArgs(args),
+        ...(clickWithin ? { within: clickWithin } : {}),
+      });
+      break;
+    }
+    case 'type': {
+      const typeTarget = getPositionalTarget(args);
+      if (
+        !typeTarget &&
+        !args.includes('--selector') &&
+        !args.includes('--testid')
+      ) {
+        process.stderr.write(
+          'Usage: mm type <ref> <text> [--selector <css>] [--testid <id>] [--within <scope>]\n',
+        );
+        process.exit(1);
+      }
+      let textArgIdx = 1;
+      if (args.includes('--selector')) {
+        textArgIdx = args.indexOf('--selector') + 2;
+      } else if (args.includes('--testid')) {
+        textArgIdx = args.indexOf('--testid') + 2;
+      }
+      const text = args[textArgIdx] ?? args[1];
+      if (text === undefined) {
+        process.stderr.write('Usage: mm type <ref> <text>\n');
+        process.exit(1);
+      }
+      const typeWithin = resolveWithinFromArgs(args);
+      await sendRequest(port, 'POST', '/tool/type', {
+        ...resolveTargetFromArgs(args),
+        text,
+        ...(typeWithin ? { within: typeWithin } : {}),
+      });
+      break;
+    }
+    case 'get-text': {
+      const getTextTarget = getPositionalTarget(args);
+      if (
+        !getTextTarget &&
+        !args.includes('--selector') &&
+        !args.includes('--testid')
+      ) {
+        process.stderr.write(
+          'Usage: mm get-text <ref> [--selector <css>] [--testid <id>] [--within <scope>]\n',
+        );
+        process.exit(1);
+      }
+      const getTextWithin = resolveWithinFromArgs(args);
+      await sendRequest(port, 'POST', '/tool/get_text', {
+        ...resolveTargetFromArgs(args),
+        ...(getTextWithin ? { within: getTextWithin } : {}),
+      });
+      break;
+    }
+    case 'describe-screen':
+      await sendRequest(port, 'POST', '/tool/describe_screen', {});
+      break;
+    case 'screenshot': {
+      const nameIdx = args.indexOf('--name');
+      const name = nameIdx >= 0 ? args[nameIdx + 1] : undefined;
+      await sendRequest(port, 'POST', '/tool/screenshot', name ? { name } : {});
+      break;
+    }
+    case 'wait-for': {
+      const waitTarget = getPositionalTarget(args);
+      if (
+        !waitTarget &&
+        !args.includes('--selector') &&
+        !args.includes('--testid')
+      ) {
+        process.stderr.write(
+          'Usage: mm wait-for <ref> [--timeout <ms>] [--selector <css>] [--testid <id>] [--within <scope>]\n',
+        );
+        process.exit(1);
+      }
+      const timeoutMs = parseIntFlag(args, '--timeout');
+      const waitWithin = resolveWithinFromArgs(args);
+      await sendRequest(port, 'POST', '/tool/wait_for', {
+        ...resolveTargetFromArgs(args),
+        ...(timeoutMs === undefined ? {} : { timeoutMs }),
+        ...(waitWithin ? { within: waitWithin } : {}),
+      });
+      break;
+    }
+    case 'navigate':
+      if (!args[0]) {
+        process.stderr.write('Usage: mm navigate <url>\n');
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/navigate', {
+        screen: 'url',
+        url: args[0],
+      });
+      break;
+    case 'navigate-home':
+      await sendRequest(port, 'POST', '/tool/navigate', { screen: 'home' });
+      break;
+    case 'navigate-settings':
+      await sendRequest(port, 'POST', '/tool/navigate', {
+        screen: 'settings',
+      });
+      break;
+    case 'get-state':
+      await sendRequest(port, 'POST', '/tool/get_state', {});
+      break;
+    case 'get-context':
+      await sendRequest(port, 'POST', '/tool/get_context', {});
+      break;
+    case 'set-context':
+      if (!args[0] || (args[0] !== 'e2e' && args[0] !== 'prod')) {
+        process.stderr.write('Usage: mm set-context <e2e|prod>\n');
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/set_context', {
+        context: args[0],
+      });
+      break;
+    case 'build': {
+      const buildForce = args.includes('--force');
+      await sendRequest(port, 'POST', '/tool/build', {
+        ...(buildForce ? { force: true } : {}),
+      });
+      break;
+    }
+    case 'wait-for-notification': {
+      const notifTimeout = parseIntFlag(args, '--timeout');
+      await sendRequest(port, 'POST', '/tool/wait_for_notification', {
+        ...(notifTimeout === undefined ? {} : { timeoutMs: notifTimeout }),
+      });
+      break;
+    }
+    case 'switch-to-tab': {
+      const tabRole = parseStringFlag(args, '--role');
+      const tabUrl = parseStringFlag(args, '--url');
+      // Support positional arg as role: mm switch-to-tab dapp
+      const positionalRole =
+        !tabRole && !tabUrl ? getPositionalTarget(args) : undefined;
+      const resolvedRole = tabRole ?? positionalRole;
+      if (!resolvedRole && !tabUrl) {
+        process.stderr.write(
+          'Usage: mm switch-to-tab <role> | --role <role> | --url <url>\n',
+        );
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/switch_to_tab', {
+        ...(resolvedRole ? { role: resolvedRole } : {}),
+        ...(tabUrl ? { url: tabUrl } : {}),
+      });
+      break;
+    }
+    case 'close-tab': {
+      const closeRole = parseStringFlag(args, '--role');
+      const closeUrl = parseStringFlag(args, '--url');
+      if (!closeRole && !closeUrl) {
+        process.stderr.write(
+          'Usage: mm close-tab --role <role> | --url <url>\n',
+        );
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/close_tab', {
+        ...(closeRole ? { role: closeRole } : {}),
+        ...(closeUrl ? { url: closeUrl } : {}),
+      });
+      break;
+    }
+    case 'clipboard': {
+      const clipAction = args[0];
+      if (!clipAction || (clipAction !== 'read' && clipAction !== 'write')) {
+        process.stderr.write('Usage: mm clipboard <read|write> [text]\n');
+        process.exit(1);
+      }
+      if (clipAction === 'write' && !args[1]) {
+        process.stderr.write('Usage: mm clipboard write <text>\n');
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/clipboard', {
+        action: clipAction,
+        ...(clipAction === 'write' ? { text: args[1] } : {}),
+      });
+      break;
+    }
+    case 'seed-contract': {
+      if (!args[0]) {
+        process.stderr.write(
+          'Usage: mm seed-contract <name> [--hardfork <fork>]\n',
+        );
+        process.exit(1);
+      }
+      const hardfork = parseStringFlag(args, '--hardfork');
+      await sendRequest(port, 'POST', '/tool/seed_contract', {
+        contractName: args[0],
+        ...(hardfork ? { hardfork } : {}),
+      });
+      break;
+    }
+    case 'seed-contracts': {
+      const contractNames = args.filter(
+        (a) =>
+          !a.startsWith('--') && args[args.indexOf(a) - 1] !== '--hardfork',
+      );
+      if (contractNames.length === 0) {
+        process.stderr.write(
+          'Usage: mm seed-contracts <name1> <name2> ... [--hardfork <fork>]\n',
+        );
+        process.exit(1);
+      }
+      const seedHardfork = parseStringFlag(args, '--hardfork');
+      await sendRequest(port, 'POST', '/tool/seed_contracts', {
+        contracts: contractNames,
+        ...(seedHardfork ? { hardfork: seedHardfork } : {}),
+      });
+      break;
+    }
+    case 'get-contract-address':
+      if (!args[0]) {
+        process.stderr.write('Usage: mm get-contract-address <name>\n');
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/get_contract_address', {
+        contractName: args[0],
+      });
+      break;
+    case 'list-contracts':
+      await sendRequest(port, 'POST', '/tool/list_contracts', {});
+      break;
+    case 'list-testids': {
+      const testIdLimit = parseIntFlag(args, '--limit');
+      await sendRequest(port, 'POST', '/tool/list_testids', {
+        ...(testIdLimit === undefined ? {} : { limit: testIdLimit }),
+      });
+      break;
+    }
+    case 'accessibility-snapshot': {
+      const rootSelector = parseStringFlag(args, '--root');
+      await sendRequest(port, 'POST', '/tool/accessibility_snapshot', {
+        ...(rootSelector ? { rootSelector } : {}),
+      });
+      break;
+    }
+    case 'knowledge-search':
+      if (!args[0]) {
+        process.stderr.write('Usage: mm knowledge-search <query>\n');
+        process.exit(1);
+      }
+      await sendRequest(port, 'POST', '/tool/knowledge_search', {
+        query: args[0],
+      });
+      break;
+    case 'knowledge-last':
+      await sendRequest(port, 'POST', '/tool/knowledge_last', {});
+      break;
+    case 'knowledge-sessions':
+      await sendRequest(port, 'POST', '/tool/knowledge_sessions', {});
+      break;
+    case 'knowledge-summarize': {
+      const summarizeSession = parseStringFlag(args, '--session');
+      await sendRequest(port, 'POST', '/tool/knowledge_summarize', {
+        ...(summarizeSession ? { scope: { sessionId: summarizeSession } } : {}),
+      });
+      break;
+    }
+    case 'run-steps':
+      if (!args[0]) {
+        process.stderr.write(
+          'Usage: mm run-steps \'{"steps":[{"tool":"click","args":{"a11yRef":"e1"}}]}\'\n',
+        );
+        process.exit(1);
+      }
+      try {
+        await sendRequest(
+          port,
+          'POST',
+          '/tool/run_steps',
+          JSON.parse(args[0]) as Record<string, unknown>,
+        );
+      } catch (error) {
+        if (error instanceof SyntaxError) {
+          process.stderr.write(`Error: invalid JSON — ${error.message}\n`);
+          process.exit(1);
+        }
+        /* istanbul ignore next -- non-SyntaxError path depends on delegated failures */
+        throw error;
+      }
+      break;
+    default:
+      process.stderr.write(
+        `Error: unknown command '${command}'. Run 'mm --help' for usage.\n`,
+      );
+      process.exit(1);
+  }
+}
+
+/**
+ * Checks whether a fetch error is transient and worth retrying.
+ * Only network-level failures are retried — HTTP responses (even errors) are not.
+ *
+ * @param error - The caught error from a fetch attempt.
+ * @returns Whether the error is transient.
+ */
+export function isTransientError(error: unknown): boolean {
+  const message = String(error);
+  return (
+    message.includes('ECONNREFUSED') ||
+    message.includes('ECONNRESET') ||
+    message.includes('EPIPE') ||
+    message.includes('UND_ERR_SOCKET') ||
+    message.includes('fetch failed')
+  );
+}
+
+/**
+ * Sends an HTTP request to the daemon and prints the response.
+ * Retries transient network errors (ECONNREFUSED, ECONNRESET, etc.)
+ * with linear backoff up to SEND_MAX_RETRIES times.
+ *
+ * @param port - The daemon HTTP server port.
+ * @param method - The HTTP method to use.
+ * @param requestPath - The URL path for the request.
+ * @param body - The request body payload, or null for no body.
+ */
+export async function sendRequest(
+  port: number,
+  method: string,
+  requestPath: string,
+  body: unknown,
+): Promise<void> {
+  const commandName = requestPath.split('/').pop() ?? '';
+  const timeout =
+    COMMAND_TIMEOUTS_MS[commandName] ?? COMMAND_TIMEOUTS_MS.default;
+
+  let lastError: unknown;
+
+  for (let attempt = 0; attempt <= SEND_MAX_RETRIES; attempt++) {
+    if (attempt > 0) {
+      await sleep(SEND_RETRY_BASE_DELAY_MS * attempt);
+    }
+
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), timeout);
+
+    try {
+      const headers: Record<string, string> = {};
+      if (body !== null) {
+        headers['Content-Type'] = 'application/json';
+      }
+      const options: RequestInit = {
+        method,
+        signal: controller.signal,
+        headers,
+        ...(body === null ? {} : { body: JSON.stringify(body) }),
+      };
+      const response = await fetch(
+        `http://127.0.0.1:${port}${requestPath}`,
+        options,
+      );
+      const data = (await response.json()) as Record<string, unknown>;
+
+      if (!response.ok || data.ok === false) {
+        const errorData = data.error as { message?: string } | undefined;
+        process.stderr.write(
+          `Error: ${errorData?.message ?? 'Request failed'}\n`,
+        );
+        process.exit(1);
+      }
+
+      const result = data.result ?? data;
+      const observations = data.observations as
+        | Record<string, unknown>
+        | undefined;
+      let output: unknown = result;
+      if (observations) {
+        const base =
+          typeof result === 'object' && result !== null
+            ? (result as Record<string, unknown>)
+            : { result };
+        output = { ...base, observations };
+      }
+      if (typeof output === 'string') {
+        process.stdout.write(`${output}\n`);
+      } else {
+        process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
+      }
+      return;
+    } catch (error) {
+      if ((error as Error).name === 'AbortError') {
+        process.stderr.write(`Error: request timed out after ${timeout}ms\n`);
+        process.exit(1);
+      }
+
+      if (isTransientError(error) && attempt < SEND_MAX_RETRIES) {
+        lastError = error;
+        continue;
+      }
+
+      process.stderr.write(`Error: ${String(error)}\n`);
+      process.exit(1);
+    } finally {
+      clearTimeout(timer);
+    }
+  }
+
+  process.stderr.write(
+    `Error: request failed after ${SEND_MAX_RETRIES + 1} attempts: ${String(lastError)}\n`,
+  );
+  process.exit(1);
+}
+
+/**
+ * Discovers a running daemon or auto-starts one for eligible commands.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @param command - The CLI command being executed.
+ * @returns The daemon state with connection details.
+ */
+export async function discoverDaemon(
+  worktreeRoot: string,
+  command: string,
+): Promise<DaemonState> {
+  let state = await readDaemonState(worktreeRoot);
+
+  if (state) {
+    const alive = await isDaemonAlive(state);
+    if (alive) {
+      if (isDaemonVersionMatch(state)) {
+        return state;
+      }
+
+      process.stderr.write(
+        `Daemon version mismatch (running: ${state.version ?? 'unknown'}, cli: ${pkg.version}). Restarting...\n`,
+      );
+      await shutdownDaemon(worktreeRoot, state);
+      state = null;
+    } else {
+      await removeDaemonState(worktreeRoot);
+      state = null;
+    }
+  }
+
+  if (!AUTO_START_COMMANDS.has(command)) {
+    process.stderr.write(
+      'Error: no daemon running. Run `mm launch` to start.\n',
+    );
+    process.exit(1);
+  }
+
+  return autoStartDaemon(worktreeRoot);
+}
+
+/**
+ * Spawns a new daemon process and waits for it to become ready.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @returns The daemon state once it is alive.
+ */
+export async function autoStartDaemon(
+  worktreeRoot: string,
+): Promise<DaemonState> {
+  const locked = await acquireStartupLock(worktreeRoot);
+  if (!locked) {
+    return waitForDaemon(worktreeRoot);
+  }
+
+  try {
+    const existingState = await readDaemonState(worktreeRoot);
+    if (existingState && (await isDaemonAlive(existingState))) {
+      return existingState;
+    }
+
+    const config = await readDaemonConfig(worktreeRoot);
+    const runtimeBin = resolveRuntime(worktreeRoot, config.runtime);
+
+    const child = spawn(runtimeBin, [config.daemonPath], {
+      detached: true,
+      stdio: ['ignore', 'ignore', 'ignore'],
+      cwd: worktreeRoot,
+    });
+    child.unref();
+
+    return await waitForDaemon(worktreeRoot);
+  } finally {
+    await releaseStartupLock(worktreeRoot);
+  }
+}
+
+/**
+ * Starts the daemon in foreground or background mode.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @param background - Whether to run the daemon as a detached background process.
+ */
+export async function handleServe(
+  worktreeRoot: string,
+  background: boolean,
+): Promise<void> {
+  const existing = await readDaemonState(worktreeRoot);
+  if (existing && (await isDaemonAlive(existing))) {
+    process.stderr.write(
+      `Error: daemon already running on port ${existing.port} (PID ${existing.pid})\n`,
+    );
+    process.exit(1);
+  }
+
+  if (existing) {
+    await removeDaemonState(worktreeRoot);
+  }
+
+  const config = await readDaemonConfig(worktreeRoot);
+  const runtimeBin = resolveRuntime(worktreeRoot, config.runtime);
+
+  if (background) {
+    const child = spawn(runtimeBin, [config.daemonPath], {
+      detached: true,
+      stdio: ['ignore', 'ignore', 'ignore'],
+      cwd: worktreeRoot,
+    });
+    child.unref();
+
+    const state = await waitForDaemon(worktreeRoot);
+    process.stdout.write(
+      `Daemon started on port ${state.port} (PID ${state.pid})\n`,
+    );
+    return;
+  }
+
+  const child = spawn(runtimeBin, [config.daemonPath], {
+    stdio: 'inherit',
+    cwd: worktreeRoot,
+  });
+
+  await new Promise<void>((resolve) => {
+    child.on('exit', (code) => {
+      process.exitCode = code ?? 0;
+      resolve();
+    });
+  });
+}
+
+/**
+ * Reads the daemon configuration using cosmiconfig file discovery.
+ *
+ * Searches for configuration files (e.g., mm-client-cli.config.ts)
+ * starting from the worktree root directory.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @returns The daemon path and runtime configuration.
+ */
+export async function readDaemonConfig(
+  worktreeRoot: string,
+): Promise<DaemonConfig> {
+  const explorer = cosmiconfig(CONFIG_MODULE_NAME, {
+    searchPlaces: [
+      `${CONFIG_MODULE_NAME}.config.ts`,
+      `${CONFIG_MODULE_NAME}.config.js`,
+      `${CONFIG_MODULE_NAME}.config.cjs`,
+      `${CONFIG_MODULE_NAME}.config.mjs`,
+      `.${CONFIG_MODULE_NAME}rc`,
+      `.${CONFIG_MODULE_NAME}rc.json`,
+      `.${CONFIG_MODULE_NAME}rc.yaml`,
+      `.${CONFIG_MODULE_NAME}rc.yml`,
+      `.${CONFIG_MODULE_NAME}rc.js`,
+      `.${CONFIG_MODULE_NAME}rc.ts`,
+      `.${CONFIG_MODULE_NAME}rc.cjs`,
+    ],
+    stopDir: worktreeRoot,
+  });
+
+  const result = await explorer.search(worktreeRoot);
+
+  if (!result || result.isEmpty) {
+    process.stderr.write(
+      `Error: No mm-client-cli config found. Create ${CONFIG_MODULE_NAME}.config.ts in your project root.\n`,
+    );
+    process.exit(1);
+  }
+
+  const config = result.config as MmClientCliConfig;
+  if (!config.daemon) {
+    process.stderr.write(
+      `Error: No daemon entry point configured. Add 'daemon' to ${result.filepath}.\n`,
+    );
+    process.exit(1);
+  }
+
+  return {
+    daemonPath: config.daemon,
+    runtime: config.runtime ?? 'tsx',
+  };
+}
+
+/**
+ * Resolves the runtime binary path for spawning the daemon.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @param runtime - The runtime name from configuration.
+ * @returns The absolute path to the runtime binary.
+ */
+export function resolveRuntime(worktreeRoot: string, runtime: string): string {
+  if (runtime === 'node') {
+    return 'node';
+  }
+
+  const binPath = path.join(worktreeRoot, 'node_modules', '.bin', runtime);
+  if (!existsSync(binPath)) {
+    process.stderr.write(
+      `Error: Runtime '${runtime}' not found at ${binPath}. Install it or set "mm.runtime" in package.json.\n`,
+    );
+    process.exit(1);
+  }
+  return binPath;
+}
+
+/**
+ * Polls for daemon state until the daemon is alive or times out.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @returns The daemon state once the daemon is responsive.
+ */
+export async function waitForDaemon(
+  worktreeRoot: string,
+): Promise<DaemonState> {
+  for (let i = 0; i < DAEMON_POLL_MAX_ATTEMPTS; i++) {
+    await sleep(DAEMON_POLL_INTERVAL_MS);
+    const state = await readDaemonState(worktreeRoot);
+    if (state && (await isDaemonAlive(state))) {
+      return state;
+    }
+  }
+  throw new Error('Daemon failed to start within 10 seconds');
+}
+
+/**
+ * Terminates the daemon process and removes its state file.
+ *
+ * @param worktreeRoot - The git worktree root directory.
+ * @param state - The current daemon state containing the PID.
+ */
+export async function shutdownDaemon(
+  worktreeRoot: string,
+  state: DaemonState,
+): Promise<void> {
+  if (state.pid) {
+    try {
+      process.kill(state.pid, 'SIGTERM');
+    } catch {
+      /* already dead */
+    }
+  }
+  await removeDaemonState(worktreeRoot);
+}
+
+/**
+ * Delays execution for the specified duration.
+ *
+ * @param ms - The number of milliseconds to wait.
+ * @returns A promise that resolves after the delay.
+ */
+export async function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+/**
+ * Parses a numeric flag value from a CLI argument list.
+ *
+ * @param args - The raw CLI arguments to search.
+ * @param flag - The flag name to look for (e.g., '--timeout').
+ * @returns The parsed integer value, or undefined if the flag is absent or invalid.
+ */
+export function parseIntFlag(args: string[], flag: string): number | undefined {
+  const idx = args.indexOf(flag);
+  if (idx < 0) {
+    return undefined;
+  }
+  const parsed = parseInt(args[idx + 1], 10);
+  return isNaN(parsed) ? undefined : parsed;
+}
+
+/**
+ * Parses a string flag value from a CLI argument list.
+ *
+ * @param args - The raw CLI arguments to search.
+ * @param flag - The flag name to look for (e.g., '--role').
+ * @returns The string value, or undefined if the flag is absent.
+ */
+export function parseStringFlag(
+  args: string[],
+  flag: string,
+): string | undefined {
+  const idx = args.indexOf(flag);
+  if (idx < 0 || !args[idx + 1] || args[idx + 1].startsWith('--')) {
+    return undefined;
+  }
+  return args[idx + 1];
+}
+
+/**
+ * Parses launch command arguments into a key-value object.
+ *
+ * @param args - The raw CLI arguments after the command name.
+ * @returns The parsed launch options.
+ */
+export function parseLaunchArgs(args: string[]): Record<string, unknown> {
+  const result: Record<string, unknown> = {};
+  const knownFlags = new Set([
+    '--context',
+    '--state',
+    '--extension-path',
+    '--goal',
+    '--force',
+    '--flow-tags',
+  ]);
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+    if (arg === '--force') {
+      result.force = true;
+    } else if (arg === '--context') {
+      i += 1;
+      if (!args[i] || args[i].startsWith('--')) {
+        process.stderr.write('Error: --context requires a value (e2e|prod)\n');
+        process.exit(1);
+      }
+      result.context = args[i];
+    } else if (arg === '--state') {
+      i += 1;
+      if (!args[i] || args[i].startsWith('--')) {
+        process.stderr.write(
+          'Error: --state requires a value (default|onboarding|custom)\n',
+        );
+        process.exit(1);
+      }
+      result.stateMode = args[i];
+    } else if (arg === '--extension-path') {
+      i += 1;
+      if (!args[i] || args[i].startsWith('--')) {
+        process.stderr.write('Error: --extension-path requires a value\n');
+        process.exit(1);
+      }
+      result.extensionPath = args[i];
+    } else if (arg === '--goal') {
+      i += 1;
+      if (!args[i] || args[i].startsWith('--')) {
+        process.stderr.write('Error: --goal requires a value\n');
+        process.exit(1);
+      }
+      result.goal = args[i];
+    } else if (arg === '--flow-tags') {
+      i += 1;
+      if (!args[i] || args[i].startsWith('--')) {
+        process.stderr.write(
+          'Error: --flow-tags requires a comma-separated value\n',
+        );
+        process.exit(1);
+      }
+      result.flowTags = args[i].split(',').map((tag) => tag.trim());
+    } else if (arg.startsWith('--') && !knownFlags.has(arg)) {
+      process.stderr.write(`Warning: unknown launch flag '${arg}'\n`);
+    }
+  }
+  return result;
+}
+
+/**
+ * Prints CLI usage information to stdout.
+ */
+export function printHelp(): void {
+  process.stdout.write(`mm — MetaMask CLI
+
+Usage: mm [--project <path>] <command> [options]
+
+Global Options:
+  --project <path>    Target a specific project directory (absolute or relative).
+                      Overrides MM_PROJECT and git-based discovery.
+
+Environment Variables:
+  MM_PROJECT          Default project directory when --project is not provided.
+                      Falls back to the current git worktree root.
+
+Lifecycle:
+  mm launch [--context e2e|prod] [--state default|onboarding|custom] [--extension-path <path>] [--goal <text>] [--force] [--flow-tags <tags>]
+  mm cleanup [--shutdown]
+  mm status
+  mm serve [--background]
+  mm build [--force]
+
+Interaction:
+  mm click <ref> [--selector <css>] [--testid <id>] [--within <scope>]
+  mm type <ref> <text> [--selector <css>] [--testid <id>] [--within <scope>]
+  mm get-text <ref> [--selector <css>] [--testid <id>] [--within <scope>]
+  mm describe-screen
+  mm screenshot [--name <name>]
+  mm wait-for <ref> [--timeout <ms>] [--selector <css>] [--testid <id>] [--within <scope>]
+  mm wait-for-notification [--timeout <ms>]
+  mm clipboard <read|write> [text]
+
+Navigation:
+  mm navigate <url>
+  mm navigate-home
+  mm navigate-settings
+  mm switch-to-tab <role> | --role <role> | --url <url>
+  mm close-tab --role <role> | --url <url>
+
+Discovery:
+  mm list-testids [--limit <n>]
+  mm accessibility-snapshot [--root <selector>]
+
+State & Context:
+  mm get-state
+  mm get-context
+  mm set-context <e2e|prod>
+
+Knowledge:
+  mm knowledge-search <query>
+  mm knowledge-last
+  mm knowledge-sessions
+  mm knowledge-summarize [--session <id>]
+
+Contracts (E2E only):
+  mm seed-contract <name> [--hardfork <fork>]
+  mm seed-contracts <name1> <name2> ... [--hardfork <fork>]
+  mm get-contract-address <name>
+  mm list-contracts
+
+Batching:
+  mm run-steps <json>
+
+Examples:
+  mm launch                                          (from inside project)
+  mm --project ../metamask-extension launch          (from parent folder)
+  MM_PROJECT=/path/to/extension mm describe-screen   (via env var)
+`);
+}
+
+/* istanbul ignore next -- CLI entry point, tested via exported functions */
+/* istanbul ignore next -- top-level fatal handler is not exercised in tests */
+const handleFatalCliError = (error: unknown): void => {
+  process.stderr.write(`Fatal: ${String(error)}\n`);
+  process.exit(1);
+};
+
+/* istanbul ignore next -- CLI entry point, tested via exported functions */
+if (process.env.VITEST === undefined) {
+  main().catch(handleFatalCliError);
+}
diff --git a/src/index.ts b/src/index.ts
index eaefc45..2275fbe 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,26 +2,36 @@
 export type * from './capabilities/types.js';
 export * from './capabilities/context.js';
 
-// MCP Server - Session Manager Interface
-export * from './mcp-server/session-manager.js';
-
-// MCP Server - Server
-export * from './mcp-server/server.js';
-
-// MCP Server - Core Components
-export * from './mcp-server/knowledge-store.js';
-export * from './mcp-server/discovery.js';
-export * from './mcp-server/schemas.js';
-export * from './mcp-server/tools/definitions.js';
-export * from './mcp-server/tokenization.js';
-
-// MCP Server - Types
-export * from './mcp-server/types';
-
-// MCP Server - Utils
-export * from './mcp-server/utils';
-
-// Shared utilities
+// Session Manager Interface (transport-agnostic)
+export type {
+  ISessionManager,
+  TrackedPage,
+  SessionLaunchInput,
+  SessionLaunchResult,
+  SessionScreenshotOptions,
+} from './server/session-manager.js';
+
+// Core Components
+export * from './knowledge-store/knowledge-store.js';
+export * from './tools/utils/discovery.js';
+export * from './validation/schemas.js';
+export * from './knowledge-store/tokenization.js';
+
+// Types
+export * from './tools/types';
+
+// HTTP Server Types
+export type * from './types/http.js';
+export type { MmClientCliConfig } from './cli/mm.js';
+export * from './tools/registry.js';
+
+// Server utilities
+export * from './server/request-queue.js';
+export * from './server/port-allocator.js';
+export * from './server/daemon-state.js';
+export * from './server/create-server.js';
+
+// Utils
 export * from './utils';
 
 // Launcher utilities
@@ -30,26 +40,5 @@ export * from './launcher/extension-readiness.js';
 export * from './launcher/console-error-buffer.js';
 export * from './launcher/retry.js';
 
-// MCP Server - Tool Handlers
-export * from './mcp-server/tools/build.js';
-export * from './mcp-server/tools/launch.js';
-export * from './mcp-server/tools/cleanup.js';
-export * from './mcp-server/tools/state.js';
-export * from './mcp-server/tools/seeding.js';
-export * from './mcp-server/tools/interaction.js';
-export * from './mcp-server/tools/navigation.js';
-export * from './mcp-server/tools/discovery-tools.js';
-export * from './mcp-server/tools/screenshot.js';
-export * from './mcp-server/tools/knowledge.js';
-export * from './mcp-server/tools/batch.js';
-export * from './mcp-server/tools/context.js';
-export * from './mcp-server/tools/clipboard.js';
-
-// Run tool utility
-export * from './mcp-server/tools/run-tool.js';
-
 // Error classification
-export * from './mcp-server/tools/error-classification.js';
-
-// Helpers
-export * from './mcp-server/tools/helpers.js';
+export * from './tools/error-classification.js';
diff --git a/src/mcp-server/knowledge-store.test.ts b/src/knowledge-store/knowledge-store.test.ts
similarity index 91%
rename from src/mcp-server/knowledge-store.test.ts
rename to src/knowledge-store/knowledge-store.test.ts
index ea984f6..40e75b3 100644
--- a/src/mcp-server/knowledge-store.test.ts
+++ b/src/knowledge-store/knowledge-store.test.ts
@@ -16,12 +16,12 @@ import {
   knowledgeStore,
 } from './knowledge-store.js';
 import type { KnowledgeStoreConfig } from './knowledge-store.js';
+import type { ExtensionState } from '../capabilities/types.js';
 import type {
   SessionMetadata,
   StepRecordOutcome,
   StepRecordObservation,
-} from './types';
-import type { ExtensionState } from '../capabilities/types.js';
+} from '../tools/types';
 
 vi.mock('fs', () => ({
   existsSync: vi.fn(),
@@ -101,21 +101,10 @@ describe('core', () => {
       expect(store).toBeDefined();
     });
 
-    it('accepts custom toolPrefix configuration', () => {
-      const config: KnowledgeStoreConfig = {
-        toolPrefix: 'custom',
-      };
-
-      const store = new KnowledgeStore(config);
-
-      expect(store).toBeDefined();
-    });
-
     it('accepts full configuration object', () => {
       const config: KnowledgeStoreConfig = {
         rootDir: '/custom/root',
         sessionIdPrefix: 'test-',
-        toolPrefix: 'test',
       };
 
       const store = new KnowledgeStore(config);
@@ -126,7 +115,9 @@ describe('core', () => {
 
   describe('writeSessionMetadata', () => {
     it('creates session directory and writes metadata file', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'session-001',
@@ -152,7 +143,9 @@ describe('core', () => {
     });
 
     it('includes optional goal in metadata', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'session-003',
@@ -244,13 +237,15 @@ describe('core', () => {
 
   describe('recordStep', () => {
     it('creates steps directory and writes step file', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       const result = await store.recordStep({
         sessionId: 'session-step-001',
-        toolName: 'mm_click',
+        toolName: 'click',
         input: { testId: 'send-button' },
         outcome,
         observation,
@@ -264,17 +259,19 @@ describe('core', () => {
       expect(fs.writeFile).toHaveBeenCalled();
       expect(result).toContain('session-step-001');
       expect(result).toContain('steps');
-      expect(result).toContain('mm_click.json');
+      expect(result).toContain('click.json');
     });
 
     it('records step with screenshot artifact', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation({ currentScreen: 'send' });
 
       await store.recordStep({
         sessionId: 'session-step-002',
-        toolName: 'mm_screenshot',
+        toolName: 'screenshot',
         outcome,
         observation,
         screenshotPath: '/test/screenshots/screenshot-001.png',
@@ -294,7 +291,9 @@ describe('core', () => {
     });
 
     it('sanitizes sensitive input fields', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation({
         currentScreen: 'unlock',
@@ -303,7 +302,7 @@ describe('core', () => {
 
       await store.recordStep({
         sessionId: 'session-step-003',
-        toolName: 'mm_type',
+        toolName: 'type',
         input: { testId: 'password-input', text: 'my-secret-password' },
         outcome,
         observation,
@@ -318,13 +317,15 @@ describe('core', () => {
     });
 
     it('records step with target information', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       await store.recordStep({
         sessionId: 'session-step-004',
-        toolName: 'mm_click',
+        toolName: 'click',
         input: { testId: 'confirm-btn' },
         target: {
           testId: 'confirm-btn',
@@ -344,13 +345,15 @@ describe('core', () => {
     });
 
     it('computes discovery label for discovery tools', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       await store.recordStep({
         sessionId: 'session-step-005',
-        toolName: 'mm_describe_screen',
+        toolName: 'describe_screen',
         outcome,
         observation,
       });
@@ -362,13 +365,15 @@ describe('core', () => {
     });
 
     it('computes navigation label for navigation tools', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation({ currentScreen: 'settings' });
 
       await store.recordStep({
         sessionId: 'session-step-006',
-        toolName: 'mm_navigate',
+        toolName: 'navigate',
         outcome,
         observation,
       });
@@ -380,13 +385,15 @@ describe('core', () => {
     });
 
     it('computes interaction label for interaction tools', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       await store.recordStep({
         sessionId: 'session-step-007',
-        toolName: 'mm_click',
+        toolName: 'click',
         input: { testId: 'send-button' },
         outcome,
         observation,
@@ -399,7 +406,9 @@ describe('core', () => {
     });
 
     it('computes confirmation label for confirmation-related targets', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation({
         currentScreen: 'confirm-transaction',
@@ -407,7 +416,7 @@ describe('core', () => {
 
       await store.recordStep({
         sessionId: 'session-step-008',
-        toolName: 'mm_click',
+        toolName: 'click',
         target: { testId: 'confirm-transaction-btn' },
         outcome,
         observation,
@@ -420,7 +429,9 @@ describe('core', () => {
     });
 
     it('computes error-recovery label for failed outcomes', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = {
         ok: false,
         error: { code: 'MM_TARGET_NOT_FOUND', message: 'Target not found' },
@@ -429,7 +440,7 @@ describe('core', () => {
 
       await store.recordStep({
         sessionId: 'session-step-009',
-        toolName: 'mm_click',
+        toolName: 'click',
         input: { testId: 'nonexistent-btn' },
         outcome,
         observation,
@@ -442,13 +453,15 @@ describe('core', () => {
     });
 
     it('records step with e2e context', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       await store.recordStep({
         sessionId: 'session-step-011',
-        toolName: 'mm_click',
+        toolName: 'click',
         outcome,
         observation,
         context: 'e2e',
@@ -461,43 +474,24 @@ describe('core', () => {
     });
 
     it('records step with prod context', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
-      const outcome: StepRecordOutcome = { ok: true };
-      const observation = createObservation();
-
-      await store.recordStep({
-        sessionId: 'session-step-012',
-        toolName: 'mm_click',
-        outcome,
-        observation,
-        context: 'prod',
-      });
-
-      const writeCall = vi.mocked(fs.writeFile).mock.calls[0];
-      const writtenData = JSON.parse(writeCall[1] as string);
-
-      expect(writtenData.context).toBe('prod');
-    });
-
-    it('uses custom tool prefix for label computation', async () => {
       const store = new KnowledgeStore({
         rootDir: '/test/knowledge',
-        toolPrefix: 'custom',
       });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation();
 
       await store.recordStep({
-        sessionId: 'session-step-013',
-        toolName: 'custom_describe_screen',
+        sessionId: 'session-step-012',
+        toolName: 'click',
         outcome,
         observation,
+        context: 'prod',
       });
 
       const writeCall = vi.mocked(fs.writeFile).mock.calls[0];
       const writtenData = JSON.parse(writeCall[1] as string);
 
-      expect(writtenData.labels).toContain('discovery');
+      expect(writtenData.context).toBe('prod');
     });
   });
 
@@ -507,7 +501,9 @@ describe('core', () => {
     }
 
     it('returns empty array when no sessions exist', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       vi.mocked(fs.readdir).mockResolvedValueOnce([]);
 
       const result = await store.listSessions(10);
@@ -516,7 +512,9 @@ describe('core', () => {
     });
 
     it('returns sessions sorted by createdAt descending', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const oldMetadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-old',
@@ -549,7 +547,9 @@ describe('core', () => {
     });
 
     it('limits results to specified count', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-1'),
@@ -583,7 +583,9 @@ describe('core', () => {
     });
 
     it('filters by flowTag', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const sendMetadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-send',
@@ -616,7 +618,9 @@ describe('core', () => {
     });
 
     it('filters by tag', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const testMetadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-test',
@@ -649,7 +653,9 @@ describe('core', () => {
     });
 
     it('filters by sinceHours', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const now = new Date();
       const recentDate = new Date(now.getTime() - 12 * 60 * 60 * 1000);
       const oldDate = new Date(now.getTime() - 72 * 60 * 60 * 1000);
@@ -692,7 +698,9 @@ describe('core', () => {
     }
 
     it('returns current session ID for scope "current"', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const result = await store.resolveSessionIds(
         'current',
@@ -703,7 +711,9 @@ describe('core', () => {
     });
 
     it('returns empty array for scope "current" without current session', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const result = await store.resolveSessionIds('current', undefined);
 
@@ -711,7 +721,9 @@ describe('core', () => {
     });
 
     it('returns specific session ID for scope object', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const result = await store.resolveSessionIds(
         { sessionId: 'specific-session-001' },
@@ -722,7 +734,9 @@ describe('core', () => {
     });
 
     it('returns all session IDs for scope "all"', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-1'),
         createDirent('mm-session-2'),
@@ -739,7 +753,9 @@ describe('core', () => {
     });
 
     it('filters session IDs by filters for scope "all"', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const sendMetadata: SessionMetadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-send',
@@ -773,7 +789,9 @@ describe('core', () => {
     });
 
     it('includes sessions without metadata when filtering', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-with-metadata'),
@@ -803,7 +821,9 @@ describe('core', () => {
 
   describe('extractPathTokens', () => {
     it('extracts tokens from URL hash fragment', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const outcome: StepRecordOutcome = { ok: true };
       const observation = createObservation({
         currentScreen: 'confirm-transaction',
@@ -812,7 +832,7 @@ describe('core', () => {
 
       await store.recordStep({
         sessionId: 'session-path-001',
-        toolName: 'mm_click',
+        toolName: 'click',
         outcome,
         observation,
       });
@@ -855,7 +875,7 @@ describe('similarity', () => {
     } = {},
   ) {
     const baseTool = {
-      name: 'mm_click',
+      name: 'click',
       input: { testId: 'test-btn' },
       target: { testId: 'test-btn' },
     };
@@ -904,9 +924,11 @@ describe('similarity', () => {
 
   describe('searchSteps scoring', () => {
     it('scores steps matching tool name in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
-        tool: { name: 'mm_click', input: {} },
+        tool: { name: 'click', input: {} },
       });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
@@ -928,11 +950,13 @@ describe('similarity', () => {
       const results = await store.searchSteps('click', 10, 'all', undefined);
 
       expect(results.length).toBeGreaterThan(0);
-      expect(results[0].tool).toBe('mm_click');
+      expect(results[0].tool).toBe('click');
     });
 
     it('scores steps matching screen name in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         observation: {
           state: { currentScreen: 'send' },
@@ -964,10 +988,12 @@ describe('similarity', () => {
     });
 
     it('scores steps matching target testId in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         tool: {
-          name: 'mm_click',
+          name: 'click',
           input: { testId: 'confirm-button' },
           target: { testId: 'confirm-button' },
         },
@@ -995,7 +1021,9 @@ describe('similarity', () => {
     });
 
     it('scores steps matching labels in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         labels: ['navigation', 'confirmation'],
       });
@@ -1027,7 +1055,9 @@ describe('similarity', () => {
     });
 
     it('scores steps matching observed testIds in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         observation: {
           state: { currentScreen: 'home' },
@@ -1061,7 +1091,9 @@ describe('similarity', () => {
     });
 
     it('scores steps matching a11y node names in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         observation: {
           state: { currentScreen: 'home' },
@@ -1097,7 +1129,9 @@ describe('similarity', () => {
     });
 
     it('scores steps matching a11y node roles in query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
         observation: {
           state: { currentScreen: 'home' },
@@ -1130,7 +1164,9 @@ describe('similarity', () => {
     });
 
     it('returns empty results for empty query', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const results = await store.searchSteps('', 10, 'all', undefined);
 
@@ -1138,9 +1174,11 @@ describe('similarity', () => {
     });
 
     it('calculates token coverage ratio bonus', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const stepRecord = createStepRecord({
-        tool: { name: 'mm_click', input: {} },
+        tool: { name: 'click', input: {} },
         observation: {
           state: { currentScreen: 'send' },
           testIds: [],
@@ -1177,7 +1215,9 @@ describe('similarity', () => {
 
   describe('session scoring', () => {
     it('scores sessions with matching flowTags higher', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const sendMetadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-send',
@@ -1219,7 +1259,9 @@ describe('similarity', () => {
     });
 
     it('scores sessions with matching goal tokens', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1245,7 +1287,9 @@ describe('similarity', () => {
     });
 
     it('scores sessions with matching tags', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1275,7 +1319,9 @@ describe('similarity', () => {
     });
 
     it('gives recency bonus to recent sessions (< 24 hours)', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const recentDate = new Date(
         Date.now() - 12 * 60 * 60 * 1000,
       ).toISOString();
@@ -1296,7 +1342,7 @@ describe('similarity', () => {
       vi.mocked(fs.readFile).mockResolvedValueOnce(
         JSON.stringify(
           createStepRecord({
-            tool: { name: 'mm_click', input: {} },
+            tool: { name: 'click', input: {} },
           }),
         ),
       );
@@ -1307,7 +1353,9 @@ describe('similarity', () => {
     });
 
     it('gives smaller recency bonus to moderately recent sessions (24-72 hours)', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const moderateDate = new Date(
         Date.now() - 48 * 60 * 60 * 1000,
       ).toISOString();
@@ -1328,7 +1376,7 @@ describe('similarity', () => {
       vi.mocked(fs.readFile).mockResolvedValueOnce(
         JSON.stringify(
           createStepRecord({
-            tool: { name: 'mm_click', input: {} },
+            tool: { name: 'click', input: {} },
           }),
         ),
       );
@@ -1339,7 +1387,9 @@ describe('similarity', () => {
     });
 
     it('sorts sessions by score then by createdAt', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata1 = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1372,7 +1422,7 @@ describe('similarity', () => {
           JSON.stringify(
             createStepRecord({
               sessionId: 'mm-session-1',
-              tool: { name: 'mm_click', input: {} },
+              tool: { name: 'click', input: {} },
             }),
           ),
         )
@@ -1380,7 +1430,7 @@ describe('similarity', () => {
           JSON.stringify(
             createStepRecord({
               sessionId: 'mm-session-2',
-              tool: { name: 'mm_click', input: {} },
+              tool: { name: 'click', input: {} },
             }),
           ),
         );
@@ -1393,7 +1443,9 @@ describe('similarity', () => {
 
   describe('generatePriorKnowledge similarity scoring', () => {
     it('scores steps with same screen higher', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1433,7 +1485,9 @@ describe('similarity', () => {
     });
 
     it('scores steps with URL path overlap', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1474,7 +1528,9 @@ describe('similarity', () => {
     });
 
     it('scores steps with testId overlap', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1518,7 +1574,9 @@ describe('similarity', () => {
     });
 
     it('scores steps with a11y node overlap', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1563,7 +1621,9 @@ describe('similarity', () => {
     });
 
     it('scores actionable tools higher than discovery tools', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1573,7 +1633,7 @@ describe('similarity', () => {
         launch: { stateMode: 'default' },
       };
       const clickStep = createStepRecord({
-        tool: { name: 'mm_click', input: { testId: 'send-btn' } },
+        tool: { name: 'click', input: { testId: 'send-btn' } },
         observation: {
           state: { currentScreen: 'home' },
           testIds: [{ testId: 'send-btn', tag: 'button', visible: true }],
@@ -1601,12 +1661,14 @@ describe('similarity', () => {
 
       expect(result).toBeDefined();
       if (result?.similarSteps.length) {
-        expect(result.similarSteps[0].tool).toBe('mm_click');
+        expect(result.similarSteps[0].tool).toBe('click');
       }
     });
 
     it('excludes discovery tools from similarity scoring', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1616,7 +1678,7 @@ describe('similarity', () => {
         launch: { stateMode: 'default' },
       };
       const discoveryStep = createStepRecord({
-        tool: { name: 'mm_describe_screen', input: {} },
+        tool: { name: 'describe_screen', input: {} },
         observation: {
           state: { currentScreen: 'home' },
           testIds: [{ testId: 'send-btn', tag: 'button', visible: true }],
@@ -1644,14 +1706,16 @@ describe('similarity', () => {
 
       if (result?.similarSteps.length) {
         const hasDiscoveryTool = result.similarSteps.some(
-          (s) => s.tool === 'mm_describe_screen',
+          (s) => s.tool === 'describe_screen',
         );
         expect(hasDiscoveryTool).toBe(false);
       }
     });
 
     it('returns undefined when no candidate sessions exist', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([] as any);
 
@@ -1668,7 +1732,9 @@ describe('similarity', () => {
     });
 
     it('excludes current session from candidate sessions', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-current',
@@ -1696,7 +1762,9 @@ describe('similarity', () => {
     });
 
     it('caps testId overlap scoring at 3 items', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1745,7 +1813,9 @@ describe('similarity', () => {
     });
 
     it('caps a11y overlap scoring at 2 items', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1794,7 +1864,9 @@ describe('similarity', () => {
     });
 
     it('computes confidence as ratio of score to max score', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1804,7 +1876,7 @@ describe('similarity', () => {
         launch: { stateMode: 'default' },
       };
       const stepRecord = createStepRecord({
-        tool: { name: 'mm_click', input: { testId: 'send-btn' } },
+        tool: { name: 'click', input: { testId: 'send-btn' } },
         observation: {
           state: { currentScreen: 'send' },
           testIds: [{ testId: 'send-btn', tag: 'button', visible: true }],
@@ -1842,7 +1914,9 @@ describe('similarity', () => {
     });
 
     it('filters steps using flowTag from context', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1875,7 +1949,9 @@ describe('similarity', () => {
     });
 
     it('does not award sameScreen bonus for unknown screens', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1915,7 +1991,9 @@ describe('similarity', () => {
     });
 
     it('builds avoid list only for targets meeting failure threshold', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -1931,7 +2009,7 @@ describe('similarity', () => {
       }) => ({
         ...createStepRecord({
           tool: {
-            name: 'mm_click',
+            name: 'click',
             input: { testId: target.testId ?? 'unknown-btn' },
             target,
           },
@@ -1955,7 +2033,7 @@ describe('similarity', () => {
       const failedSelector = makeFailedStep({ selector: '.unstable-target' });
       const successfulStep = createStepRecord({
         tool: {
-          name: 'mm_click',
+          name: 'click',
           input: { testId: 'confirm-btn' },
           target: { testId: 'confirm-btn' },
         },
@@ -2007,7 +2085,9 @@ describe('similarity', () => {
     });
 
     it('skips suggested action when tool is not in action map', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -2018,7 +2098,7 @@ describe('similarity', () => {
       };
       const unknownToolStep = createStepRecord({
         tool: {
-          name: 'mm_unknown_tool',
+          name: 'unknown_tool',
           input: { testId: 'send-btn' },
           target: { testId: 'send-btn' },
         },
@@ -2056,7 +2136,9 @@ describe('similarity', () => {
     });
 
     it('includes a11y fallback target when testId text matches visible a11y name', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const metadata = {
         schemaVersion: 1,
         sessionId: 'mm-session-1',
@@ -2067,7 +2149,7 @@ describe('similarity', () => {
       };
       const actionableStep = createStepRecord({
         tool: {
-          name: 'mm_click',
+          name: 'click',
           input: { testId: 'send-button' },
           target: { testId: 'send-button' },
         },
@@ -2151,7 +2233,7 @@ describe('session', () => {
       schemaVersion: 1,
       sessionId,
       timestamp,
-      tool: { name: 'mm_click', input: { testId: 'test-btn' } },
+      tool: { name: 'click', input: { testId: 'test-btn' } },
       observation: {
         state: {
           isLoaded: true,
@@ -2176,7 +2258,9 @@ describe('session', () => {
 
   describe('getAllSessionIds', () => {
     it('returns session IDs from directories starting with mm-', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-1'),
@@ -2208,7 +2292,9 @@ describe('session', () => {
     });
 
     it('returns empty array when directory read fails', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockRejectedValueOnce(new Error('ENOENT'));
 
@@ -2218,7 +2304,9 @@ describe('session', () => {
     });
 
     it('returns empty array for empty directory', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([] as any);
 
@@ -2230,7 +2318,9 @@ describe('session', () => {
 
   describe('session scanning limits', () => {
     it('limits sessions scanned to maxSessionsToScan (20)', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       // Create 25 sessions
       const sessions = Array.from({ length: 25 }, (_, i) =>
@@ -2267,7 +2357,9 @@ describe('session', () => {
     });
 
     it('limits steps per session to maxStepsPerSession (500)', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       // Create 600 steps for one session
       const stepFiles = Array.from({ length: 600 }, (_, i) => `step-${i}.json`);
@@ -2300,7 +2392,9 @@ describe('session', () => {
     });
 
     it('stops scanning when maxTotalSteps (2000) is reached', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       // Create 5 sessions with 500 steps each = 2500 total
       const sessions = Array.from({ length: 5 }, (_, i) =>
@@ -2353,7 +2447,9 @@ describe('session', () => {
 
   describe('filter parameters', () => {
     it('filters sessions by flowTag', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-send'),
@@ -2385,7 +2481,9 @@ describe('session', () => {
     });
 
     it('filters sessions by tag', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-e2e'),
@@ -2417,7 +2515,9 @@ describe('session', () => {
     });
 
     it('filters sessions by sinceHours', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const now = new Date();
       const recentDate = new Date(now.getTime() - 6 * 60 * 60 * 1000);
       const oldDate = new Date(now.getTime() - 48 * 60 * 60 * 1000);
@@ -2452,7 +2552,9 @@ describe('session', () => {
     });
 
     it('combines multiple filters', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       const recentDate = new Date(Date.now() - 6 * 60 * 60 * 1000);
       const oldDate = new Date(Date.now() - 48 * 60 * 60 * 1000);
 
@@ -2503,7 +2605,9 @@ describe('session', () => {
 
   describe('corrupted session file handling', () => {
     it('skips corrupted session metadata files', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-valid'),
@@ -2527,7 +2631,9 @@ describe('session', () => {
     });
 
     it('skips corrupted step files during search', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir)
         .mockResolvedValueOnce([createDirent('mm-session-1')] as any)
@@ -2559,7 +2665,9 @@ describe('session', () => {
     });
 
     it('handles missing step files gracefully', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir)
         .mockResolvedValueOnce([createDirent('mm-session-1')] as any)
@@ -2582,7 +2690,9 @@ describe('session', () => {
     });
 
     it('handles steps directory not existing', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir)
         .mockResolvedValueOnce([createDirent('mm-session-1')] as any)
@@ -2605,7 +2715,9 @@ describe('session', () => {
 
   describe('empty session directory', () => {
     it('returns empty results for empty knowledge root', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([] as any);
 
@@ -2615,7 +2727,9 @@ describe('session', () => {
     });
 
     it('returns empty search results for empty knowledge root', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([] as any);
 
@@ -2625,7 +2739,9 @@ describe('session', () => {
     });
 
     it('returns empty getLastSteps for session with no steps', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir)
         .mockResolvedValueOnce([createDirent('mm-session-1')] as any)
@@ -2641,7 +2757,9 @@ describe('session', () => {
     });
 
     it('returns empty summarizeSession for session with no steps', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([] as any);
 
@@ -2654,7 +2772,9 @@ describe('session', () => {
 
   describe('resolveSessionIds with filters', () => {
     it('includes sessions without metadata when filtering', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       vi.mocked(fs.readdir).mockResolvedValueOnce([
         createDirent('mm-session-with-metadata'),
@@ -2682,7 +2802,9 @@ describe('session', () => {
     });
 
     it('returns empty array for scope current without sessionId', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const result = await store.resolveSessionIds('current', undefined);
 
@@ -2690,7 +2812,9 @@ describe('session', () => {
     });
 
     it('returns specific sessionId for scope object', async () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
 
       const result = await store.resolveSessionIds(
         { sessionId: 'specific-session' },
@@ -2715,7 +2839,9 @@ describe('session', () => {
     });
 
     it('returns true when knowledge store is initialized', () => {
-      const store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      const store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       setKnowledgeStore(store);
 
       const result = hasKnowledgeStore();
@@ -2728,7 +2854,9 @@ describe('session', () => {
     let store: KnowledgeStore;
 
     beforeEach(() => {
-      store = new KnowledgeStore({ rootDir: '/test/knowledge' });
+      store = new KnowledgeStore({
+        rootDir: '/test/knowledge',
+      });
       setKnowledgeStore(store);
     });
 
@@ -2739,7 +2867,7 @@ describe('session', () => {
     it('recordStep delegates to underlying KnowledgeStore instance', async () => {
       const params = {
         sessionId: 'test-session',
-        toolName: 'mm_click',
+        toolName: 'click',
         observation: createObservation(),
         outcome: { ok: true } as StepRecordOutcome,
       };
@@ -2757,7 +2885,7 @@ describe('session', () => {
 
       const params = {
         sessionId: 'test-session',
-        toolName: 'mm_click',
+        toolName: 'click',
         observation: createObservation(),
         outcome: { ok: true } as StepRecordOutcome,
       };
@@ -2771,7 +2899,7 @@ describe('session', () => {
       const mockSteps = [
         {
           timestamp: '2024-01-15T10:30:00.000Z',
-          tool: 'mm_click',
+          tool: 'click',
           screen: 'home' as const,
           snippet: 'Clicked button',
         },
@@ -2809,7 +2937,7 @@ describe('session', () => {
       const mockResults = [
         {
           timestamp: '2024-01-15T10:30:00.000Z',
-          tool: 'mm_click',
+          tool: 'click',
           screen: 'home' as const,
           snippet: 'Clicked send button',
         },
@@ -2845,7 +2973,7 @@ describe('session', () => {
       const mockSummary = {
         sessionId: 'test-session',
         stepCount: 5,
-        recipe: [{ stepNumber: 1, tool: 'mm_click', notes: 'Clicked send' }],
+        recipe: [{ stepNumber: 1, tool: 'click', notes: 'Clicked send' }],
       };
 
       vi.spyOn(store, 'summarizeSession').mockResolvedValueOnce(mockSummary);
diff --git a/src/mcp-server/knowledge-store.ts b/src/knowledge-store/knowledge-store.ts
similarity index 97%
rename from src/mcp-server/knowledge-store.ts
rename to src/knowledge-store/knowledge-store.ts
index fbf1ec5..fecbd8c 100644
--- a/src/mcp-server/knowledge-store.ts
+++ b/src/knowledge-store/knowledge-store.ts
@@ -8,6 +8,7 @@ import {
   tokenize,
   tokenizeIdentifier,
 } from './tokenization.js';
+import type { ExtensionState } from '../capabilities/types.js';
 import type {
   StepRecord,
   StepRecordTool,
@@ -28,14 +29,12 @@ import type {
   PriorKnowledgeAvoid,
   PriorKnowledgeRelatedSession,
   PriorKnowledgeTarget,
-} from './types';
+} from '../tools/types';
+import { generateFilesafeTimestamp, debugWarn } from '../utils';
 import {
-  generateFilesafeTimestamp,
   isSensitiveField,
   SENSITIVE_FIELD_PATTERNS,
-  debugWarn,
-} from './utils';
-import type { ExtensionState } from '../capabilities/types.js';
+} from './utils/redaction.js';
 
 const KNOWLEDGE_ROOT = 'test-artifacts/llm-knowledge';
 const SCHEMA_VERSION = 1;
@@ -96,10 +95,6 @@ export type KnowledgeStoreConfig = {
    * Prefix for session IDs (default: 'mm-')
    */
   sessionIdPrefix?: string;
-  /**
-   * Prefix for tool names (default: 'mm')
-   */
-  toolPrefix?: string;
 };
 
 /**
@@ -128,8 +123,6 @@ export class KnowledgeStore {
 
   readonly #sessionIdPrefix: string;
 
-  readonly #toolPrefix: string;
-
   readonly #sessionMetadataCache: Map<string, SessionMetadata | null> =
     new Map();
 
@@ -151,30 +144,28 @@ export class KnowledgeStore {
     this.#knowledgeRoot =
       config.rootDir ?? path.join(process.cwd(), KNOWLEDGE_ROOT);
     this.#sessionIdPrefix = config.sessionIdPrefix ?? 'mm-';
-    this.#toolPrefix = config.toolPrefix ?? 'mm';
 
-    const prefix = this.#toolPrefix;
     this.#actionableTools = [
-      `${prefix}_click`,
-      `${prefix}_type`,
-      `${prefix}_wait_for`,
-      `${prefix}_navigate`,
-      `${prefix}_wait_for_notification`,
+      'click',
+      'type',
+      'wait_for',
+      'navigate',
+      'wait_for_notification',
     ];
 
     this.#toolActionMap = {
-      [`${prefix}_click`]: 'click',
-      [`${prefix}_type`]: 'type',
-      [`${prefix}_wait_for`]: 'wait_for',
-      [`${prefix}_navigate`]: 'navigate',
-      [`${prefix}_wait_for_notification`]: 'wait_for_notification',
+      click: 'click',
+      type: 'type',
+      wait_for: 'wait_for',
+      navigate: 'navigate',
+      wait_for_notification: 'wait_for_notification',
     };
 
     this.#discoveryTools = [
-      `${prefix}_describe_screen`,
-      `${prefix}_list_testids`,
-      `${prefix}_accessibility_snapshot`,
-      `${prefix}_get_state`,
+      'describe_screen',
+      'list_testids',
+      'accessibility_snapshot',
+      'get_state',
     ];
   }
 
@@ -448,15 +439,8 @@ export class KnowledgeStore {
   ): string[] {
     const labels: string[] = [];
 
-    const navigationTools = [
-      `${this.#toolPrefix}_navigate`,
-      `${this.#toolPrefix}_wait_for_notification`,
-    ];
-    const interactionTools = [
-      `${this.#toolPrefix}_click`,
-      `${this.#toolPrefix}_type`,
-      `${this.#toolPrefix}_wait_for`,
-    ];
+    const navigationTools = ['navigate', 'wait_for_notification'];
+    const interactionTools = ['click', 'type', 'wait_for'];
 
     if (this.#discoveryTools.includes(toolName)) {
       labels.push('discovery');
@@ -825,7 +809,7 @@ export class KnowledgeStore {
     let textRedacted = false;
     let textLength: number | undefined;
 
-    const typeToolName = `${this.#toolPrefix}_type`;
+    const typeToolName = 'type';
 
     for (const [key, value] of Object.entries(input)) {
       if (toolName === typeToolName && key === 'text') {
diff --git a/src/mcp-server/tokenization.test.ts b/src/knowledge-store/tokenization.test.ts
similarity index 99%
rename from src/mcp-server/tokenization.test.ts
rename to src/knowledge-store/tokenization.test.ts
index 64c33d5..939ac1c 100644
--- a/src/mcp-server/tokenization.test.ts
+++ b/src/knowledge-store/tokenization.test.ts
@@ -56,7 +56,7 @@ describe('tokenization', () => {
       expect(buttonCount).toBe(1);
     });
 
-    it('handles special MCP/extension stopwords', () => {
+    it('handles special tool/extension stopwords', () => {
       const tokens = tokenize('mm mcp lw test flow');
       expect(tokens).not.toContain('mm');
       expect(tokens).not.toContain('mcp');
diff --git a/src/mcp-server/tokenization.ts b/src/knowledge-store/tokenization.ts
similarity index 100%
rename from src/mcp-server/tokenization.ts
rename to src/knowledge-store/tokenization.ts
diff --git a/src/mcp-server/utils/redaction.test.ts b/src/knowledge-store/utils/redaction.test.ts
similarity index 100%
rename from src/mcp-server/utils/redaction.test.ts
rename to src/knowledge-store/utils/redaction.test.ts
diff --git a/src/mcp-server/utils/redaction.ts b/src/knowledge-store/utils/redaction.ts
similarity index 100%
rename from src/mcp-server/utils/redaction.ts
rename to src/knowledge-store/utils/redaction.ts
diff --git a/src/launcher/console-error-buffer.test.ts b/src/launcher/console-error-buffer.test.ts
index c34747b..b824862 100644
--- a/src/launcher/console-error-buffer.test.ts
+++ b/src/launcher/console-error-buffer.test.ts
@@ -1,7 +1,7 @@
 import { describe, it, expect } from 'vitest';
 
-import { ConsoleErrorBuffer } from './console-error-buffer';
-import type { ConsoleErrorEntry } from './console-error-buffer';
+import { ConsoleErrorBuffer } from './console-error-buffer.js';
+import type { ConsoleErrorEntry } from './console-error-buffer.js';
 
 describe('ConsoleErrorBuffer', () => {
   describe('constructor', () => {
diff --git a/src/launcher/retry.test.ts b/src/launcher/retry.test.ts
index 06f7ef6..4cf4dbf 100644
--- a/src/launcher/retry.test.ts
+++ b/src/launcher/retry.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 
-import { delay, retryUntil } from './retry';
+import { delay, retryUntil } from './retry.js';
 
 describe('retry', () => {
   beforeEach(() => {
diff --git a/src/mcp-server/server.test.ts b/src/mcp-server/server.test.ts
deleted file mode 100644
index f6ff8fa..0000000
--- a/src/mcp-server/server.test.ts
+++ /dev/null
@@ -1,677 +0,0 @@
-/* eslint-disable @typescript-eslint/naming-convention */
-import { Server } from '@modelcontextprotocol/sdk/server/index.js';
-import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-import type { MockInstance } from 'vitest';
-
-import { createMcpServer } from './server.js';
-import type { McpServerConfig } from './server.js';
-import * as sessionManagerModule from './session-manager.js';
-import { flushPromises } from './test-utils';
-import * as batchModule from './tools/batch.js';
-import * as definitionsModule from './tools/definitions.js';
-import { ErrorCodes } from './types';
-
-vi.mock('@modelcontextprotocol/sdk/server/index.js');
-vi.mock('@modelcontextprotocol/sdk/server/stdio.js');
-vi.mock('./session-manager.js');
-vi.mock('./tools/definitions.js');
-vi.mock('./tools/batch.js');
-
-describe('createMcpServer', () => {
-  let processExitSpy: MockInstance;
-  let processOnSpy: MockInstance;
-  let consoleErrorSpy: MockInstance;
-  let signalHandlers: Map<string, () => void>;
-  let mockSetRequestHandler: ReturnType<typeof vi.fn>;
-  let mockConnect: ReturnType<typeof vi.fn>;
-  let mockClose: ReturnType<typeof vi.fn>;
-
-  const mockToolDefinitions = [
-    { name: 'mm_click', description: 'Click element', inputSchema: {} },
-    { name: 'mm_type', description: 'Type text', inputSchema: {} },
-  ];
-
-  const mockToolHandlers = {
-    mm_click: vi
-      .fn()
-      .mockResolvedValue({ ok: true, result: { clicked: true } }),
-    mm_type: vi.fn().mockResolvedValue({ ok: true, result: { typed: true } }),
-  };
-
-  beforeEach(() => {
-    vi.clearAllMocks();
-
-    mockSetRequestHandler = vi.fn();
-    mockConnect = vi.fn().mockResolvedValue(undefined);
-    mockClose = vi.fn().mockResolvedValue(undefined);
-
-    vi.mocked(Server).mockImplementation(
-      () =>
-        ({
-          setRequestHandler: mockSetRequestHandler,
-          connect: mockConnect,
-          close: mockClose,
-        }) as unknown as InstanceType<typeof Server>,
-    );
-
-    vi.mocked(StdioServerTransport).mockImplementation(
-      () =>
-        ({
-          type: 'stdio',
-        }) as unknown as InstanceType<typeof StdioServerTransport>,
-    );
-
-    vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({
-      getSessionId: vi.fn().mockReturnValue('test-session-123'),
-      cleanup: vi.fn().mockResolvedValue(true),
-    } as unknown as ReturnType<typeof sessionManagerModule.getSessionManager>);
-    vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true);
-
-    vi.mocked(definitionsModule.getToolDefinitions).mockReturnValue(
-      mockToolDefinitions,
-    );
-    vi.mocked(definitionsModule.buildToolHandlersRecord).mockReturnValue(
-      mockToolHandlers,
-    );
-    vi.mocked(definitionsModule.getToolHandler).mockReturnValue(
-      vi.fn().mockResolvedValue({ ok: true, result: {} }),
-    );
-    vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValue({
-      success: true,
-      data: {},
-    });
-    (definitionsModule as { TOOL_PREFIX: string }).TOOL_PREFIX = 'mm';
-
-    vi.mocked(batchModule.setToolRegistry).mockImplementation(() => {});
-
-    signalHandlers = new Map();
-    processOnSpy = vi
-      .spyOn(process, 'on')
-      .mockImplementation(
-        (event: string | symbol, handler: (...args: unknown[]) => void) => {
-          signalHandlers.set(String(event), handler as () => void);
-          return process;
-        },
-      );
-
-    processExitSpy = vi
-      .spyOn(process, 'exit')
-      .mockImplementation(
-        (_code?: string | number | null | undefined): never => {
-          throw new Error(`process.exit(${_code})`);
-        },
-      );
-
-    consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('factory function', () => {
-    it('creates server with required config', () => {
-      const config: McpServerConfig = {
-        name: 'test-server',
-        version: '1.0.0',
-      };
-
-      const server = createMcpServer(config);
-
-      expect(server).toBeDefined();
-      expect(server.start).toBeInstanceOf(Function);
-      expect(server.stop).toBeInstanceOf(Function);
-      expect(server.getServer).toBeInstanceOf(Function);
-      expect(server.getToolDefinitions).toBeInstanceOf(Function);
-      expect(server.getToolPrefix).toBeInstanceOf(Function);
-    });
-
-    it('creates Server with name and version', () => {
-      const config: McpServerConfig = {
-        name: 'my-extension',
-        version: '2.0.0',
-      };
-
-      createMcpServer(config);
-
-      expect(Server).toHaveBeenCalledWith(
-        { name: 'my-extension', version: '2.0.0' },
-        { capabilities: { tools: {} } },
-      );
-    });
-
-    it('registers ListTools and CallTool request handlers', () => {
-      createMcpServer({
-        name: 'test-server',
-        version: '1.0.0',
-      });
-
-      expect(mockSetRequestHandler).toHaveBeenCalledTimes(2);
-    });
-
-    it('registers signal handlers for SIGINT and SIGTERM', () => {
-      createMcpServer({
-        name: 'test-server',
-        version: '1.0.0',
-      });
-
-      expect(processOnSpy).toHaveBeenCalledWith('SIGINT', expect.any(Function));
-      expect(processOnSpy).toHaveBeenCalledWith(
-        'SIGTERM',
-        expect.any(Function),
-      );
-    });
-  });
-
-  describe('getServer()', () => {
-    it('returns the underlying MCP Server instance', () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-
-      const mcpServer = server.getServer();
-
-      expect(mcpServer).toBeDefined();
-      expect(mcpServer.setRequestHandler).toBeInstanceOf(Function);
-      expect(mcpServer.connect).toBeInstanceOf(Function);
-      expect(mcpServer.close).toBeInstanceOf(Function);
-    });
-  });
-
-  describe('getToolDefinitions()', () => {
-    it('returns all tool definitions', () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-
-      const toolDefs = server.getToolDefinitions();
-
-      expect(toolDefs).toStrictEqual(mockToolDefinitions);
-    });
-  });
-
-  describe('getToolPrefix()', () => {
-    it('returns the tool prefix', () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-
-      const prefix = server.getToolPrefix();
-
-      expect(prefix).toBe('mm');
-    });
-  });
-
-  describe('start()', () => {
-    it('creates StdioServerTransport and connects', async () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-
-      await server.start();
-
-      expect(StdioServerTransport).toHaveBeenCalled();
-      expect(mockConnect).toHaveBeenCalled();
-    });
-
-    it('logs server startup message', async () => {
-      const customLogger = vi.fn();
-      const server = createMcpServer({
-        name: 'my-server',
-        version: '2.0.0',
-        logger: customLogger,
-      });
-
-      await server.start();
-
-      expect(customLogger).toHaveBeenCalledWith(
-        'my-server MCP Server v2.0.0 running on stdio',
-      );
-    });
-
-    it('uses console.error as default logger', async () => {
-      const server = createMcpServer({
-        name: 'test-server',
-        version: '1.0.0',
-      });
-
-      await server.start();
-
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        'test-server MCP Server v1.0.0 running on stdio',
-      );
-    });
-  });
-
-  describe('stop()', () => {
-    it('closes server when transport exists', async () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-      await server.start();
-
-      await server.stop();
-
-      expect(mockClose).toHaveBeenCalled();
-    });
-
-    it('does nothing when transport does not exist', async () => {
-      const server = createMcpServer({ name: 'test', version: '1.0.0' });
-
-      await server.stop();
-
-      expect(mockClose).not.toHaveBeenCalled();
-    });
-  });
-
-  describe('ListToolsRequestSchema handler', () => {
-    it('returns tool definitions', async () => {
-      createMcpServer({ name: 'test', version: '1.0.0' });
-
-      const listToolsHandler = mockSetRequestHandler.mock.calls[0][1];
-
-      const result = await listToolsHandler();
-
-      expect(result).toStrictEqual({
-        tools: mockToolDefinitions,
-      });
-    });
-  });
-
-  describe('CallToolRequestSchema handler', () => {
-    let callToolHandler: (
-      request: {
-        params: { name: string; arguments?: Record<string, unknown> };
-      },
-      extra?: { signal?: AbortSignal },
-    ) => Promise<unknown>;
-
-    beforeEach(() => {
-      createMcpServer({ name: 'test', version: '1.0.0' });
-      callToolHandler = mockSetRequestHandler.mock.calls[1][1];
-    });
-
-    it('returns error for unknown tool', async () => {
-      const result = await callToolHandler({
-        params: { name: 'mm_unknown', arguments: {} },
-      });
-
-      expect(result).toMatchObject({
-        content: [{ type: 'text' }],
-        isError: true,
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-      expect(responseText.error.message).toContain('Unknown tool: mm_unknown');
-    });
-
-    it('returns error for invalid input', async () => {
-      vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValueOnce({
-        success: false,
-        error: 'name: Required',
-      });
-
-      const result = await callToolHandler({
-        params: { name: 'mm_click', arguments: {} },
-      });
-
-      expect(result).toMatchObject({
-        content: [{ type: 'text' }],
-        isError: true,
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-      expect(responseText.error.message).toContain(
-        'Invalid input: name: Required',
-      );
-    });
-
-    it('returns error when no handler registered', async () => {
-      vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce(
-        undefined,
-      );
-
-      const result = await callToolHandler({
-        params: { name: 'mm_click', arguments: {} },
-      });
-
-      expect(result).toMatchObject({
-        content: [{ type: 'text' }],
-        isError: true,
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-      expect(responseText.error.message).toContain(
-        'No handler registered for tool: mm_click',
-      );
-    });
-
-    it('executes handler and returns success response', async () => {
-      const mockHandler = vi
-        .fn()
-        .mockResolvedValue({ ok: true, result: { clicked: true } });
-      vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce(
-        mockHandler,
-      );
-
-      const result = await callToolHandler({
-        params: { name: 'mm_click', arguments: { testId: 'btn' } },
-      });
-
-      expect(result).toMatchObject({
-        content: [{ type: 'text' }],
-        isError: false,
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.ok).toBe(true);
-      expect(responseText.result.clicked).toBe(true);
-    });
-
-    it('passes signal to handler', async () => {
-      const mockHandler = vi.fn().mockResolvedValue({ ok: true, result: {} });
-      vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce(
-        mockHandler,
-      );
-      const mockSignal = new AbortController().signal;
-
-      await callToolHandler(
-        { params: { name: 'mm_click', arguments: {} } },
-        { signal: mockSignal },
-      );
-
-      expect(mockHandler).toHaveBeenCalledWith(
-        expect.any(Object),
-        expect.objectContaining({ signal: mockSignal }),
-      );
-    });
-
-    it('returns isError: true when handler returns ok: false', async () => {
-      const mockHandler = vi.fn().mockResolvedValue({
-        ok: false,
-        error: { code: 'MM_CLICK_FAILED', message: 'Click failed' },
-      });
-      vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce(
-        mockHandler,
-      );
-
-      const result = await callToolHandler({
-        params: { name: 'mm_click', arguments: {} },
-      });
-
-      expect(result).toMatchObject({
-        isError: true,
-      });
-    });
-
-    it('includes sessionId in error response when session manager available', async () => {
-      vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true);
-      vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({
-        getSessionId: vi.fn().mockReturnValue('session-abc'),
-        cleanup: vi.fn(),
-      } as unknown as ReturnType<
-        typeof sessionManagerModule.getSessionManager
-      >);
-
-      const result = await callToolHandler({
-        params: { name: 'mm_unknown', arguments: {} },
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.meta.sessionId).toBe('session-abc');
-    });
-
-    it('does not include sessionId when no session manager', async () => {
-      vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(false);
-
-      const result = await callToolHandler({
-        params: { name: 'mm_unknown', arguments: {} },
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.meta.sessionId).toBeUndefined();
-    });
-  });
-
-  describe('signal handlers', () => {
-    it('calls cleanup on SIGINT', async () => {
-      const onCleanup = vi.fn().mockResolvedValue(undefined);
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        onCleanup,
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-      expect(sigintHandler).toBeDefined();
-
-      try {
-        sigintHandler?.();
-        await flushPromises();
-      } catch (e) {
-        expect((e as Error).message).toBe('process.exit(0)');
-      }
-
-      expect(onCleanup).toHaveBeenCalled();
-    });
-
-    it('calls cleanup on SIGTERM', async () => {
-      const onCleanup = vi.fn().mockResolvedValue(undefined);
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        onCleanup,
-      });
-
-      const sigtermHandler = signalHandlers.get('SIGTERM');
-      expect(sigtermHandler).toBeDefined();
-
-      try {
-        sigtermHandler?.();
-        await flushPromises();
-      } catch (e) {
-        expect((e as Error).message).toBe('process.exit(0)');
-      }
-
-      expect(onCleanup).toHaveBeenCalled();
-    });
-
-    it('cleans up session manager if available', async () => {
-      const mockCleanup = vi.fn().mockResolvedValue(true);
-      vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true);
-      vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({
-        getSessionId: vi.fn().mockReturnValue('session-abc'),
-        cleanup: mockCleanup,
-      } as unknown as ReturnType<
-        typeof sessionManagerModule.getSessionManager
-      >);
-
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(mockCleanup).toHaveBeenCalled();
-    });
-
-    it('does not call session cleanup when no session manager', async () => {
-      const mockCleanup = vi.fn();
-      vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(false);
-
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(mockCleanup).not.toHaveBeenCalled();
-    });
-
-    it('prevents duplicate cleanup calls', async () => {
-      const onCleanup = vi.fn().mockResolvedValue(undefined);
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        onCleanup,
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(onCleanup).toHaveBeenCalledTimes(1);
-    });
-
-    it('logs cleanup message', async () => {
-      const customLogger = vi.fn();
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        logger: customLogger,
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(customLogger).toHaveBeenCalledWith(
-        'Received SIGINT, cleaning up...',
-      );
-    });
-
-    it('logs cleanup errors', async () => {
-      const customLogger = vi.fn();
-      const onCleanup = vi.fn().mockRejectedValue(new Error('Cleanup failed'));
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        onCleanup,
-        logger: customLogger,
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(customLogger).toHaveBeenCalledWith(
-        expect.stringContaining('Cleanup error:'),
-      );
-    });
-
-    it('exits with code 0 after cleanup', async () => {
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      try {
-        sigintHandler?.();
-        await flushPromises();
-      } catch (e) {
-        expect((e as Error).message).toBe('process.exit(0)');
-      }
-
-      expect(processExitSpy).toHaveBeenCalledWith(0);
-    });
-
-    it('handles signal error gracefully', async () => {
-      const customLogger = vi.fn();
-      const onCleanup = vi.fn().mockImplementation(() => {
-        throw new Error('Sync error');
-      });
-      createMcpServer({
-        name: 'test',
-        version: '1.0.0',
-        onCleanup,
-        logger: customLogger,
-      });
-
-      const sigintHandler = signalHandlers.get('SIGINT');
-
-      sigintHandler?.();
-      await flushPromises();
-
-      expect(customLogger).toHaveBeenCalledWith(
-        expect.stringContaining('Cleanup error:'),
-      );
-    });
-  });
-
-  describe('tool registry', () => {
-    it('sets tool registry with handlers', () => {
-      createMcpServer({ name: 'test', version: '1.0.0' });
-
-      expect(batchModule.setToolRegistry).toHaveBeenCalledWith(
-        mockToolHandlers,
-      );
-    });
-  });
-
-  describe('createToolErrorResponse helper', () => {
-    it('formats error with sessionId from session manager', async () => {
-      vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true);
-      vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({
-        getSessionId: vi.fn().mockReturnValue('my-session'),
-        cleanup: vi.fn(),
-      } as unknown as ReturnType<
-        typeof sessionManagerModule.getSessionManager
-      >);
-
-      createMcpServer({ name: 'test', version: '1.0.0' });
-      const callToolHandler = mockSetRequestHandler.mock.calls[1][1];
-
-      const result = await callToolHandler({
-        params: { name: 'mm_invalid', arguments: {} },
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.meta.sessionId).toBe('my-session');
-      expect(responseText.meta.timestamp).toBeDefined();
-      expect(responseText.meta.durationMs).toBeGreaterThanOrEqual(0);
-    });
-
-    it('includes error details when provided', async () => {
-      vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValueOnce({
-        success: false,
-        error: 'validation error',
-      });
-
-      createMcpServer({ name: 'test', version: '1.0.0' });
-      const callToolHandler = mockSetRequestHandler.mock.calls[1][1];
-
-      const result = await callToolHandler({
-        params: { name: 'mm_click', arguments: { invalid: 'arg' } },
-      });
-
-      const responseText = JSON.parse(
-        (result as { content: [{ text: string }] }).content[0].text,
-      );
-      expect(responseText.error.details).toStrictEqual({
-        providedArgs: { invalid: 'arg' },
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/server.ts b/src/mcp-server/server.ts
deleted file mode 100644
index 1c3411c..0000000
--- a/src/mcp-server/server.ts
+++ /dev/null
@@ -1,237 +0,0 @@
-#!/usr/bin/env node
-/* eslint-disable @typescript-eslint/explicit-function-return-type */
-import { Server } from '@modelcontextprotocol/sdk/server/index.js';
-import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
-import {
-  CallToolRequestSchema,
-  ListToolsRequestSchema,
-} from '@modelcontextprotocol/sdk/types.js';
-
-import { getSessionManager, hasSessionManager } from './session-manager.js';
-import { setToolRegistry } from './tools/batch.js';
-import {
-  getToolDefinitions,
-  getToolHandler,
-  safeValidateToolInput,
-  buildToolHandlersRecord,
-  TOOL_PREFIX,
-} from './tools/definitions.js';
-import type { ToolDefinition } from './tools/definitions.js';
-import { ErrorCodes } from './types';
-import { createErrorResponse } from './utils';
-
-export type McpServerConfig = {
-  name: string;
-  version: string;
-  onCleanup?: () => Promise<void>;
-  logger?: (message: string) => void;
-};
-
-/**
- * Create a standardized error response for tool execution failures.
- *
- * @param code The error code from ErrorCodes enum
- * @param message Human-readable error message
- * @param details Optional error details object
- * @param startTime Timestamp when the operation started
- * @returns MCP-formatted error response object
- */
-function createToolErrorResponse(
-  code: (typeof ErrorCodes)[keyof typeof ErrorCodes],
-  message: string,
-  details: Record<string, unknown> | undefined,
-  startTime: number,
-) {
-  const sessionId = hasSessionManager()
-    ? getSessionManager().getSessionId()
-    : undefined;
-
-  const response = createErrorResponse(
-    code,
-    message,
-    details,
-    sessionId,
-    startTime,
-  );
-
-  return {
-    content: [
-      {
-        type: 'text' as const,
-        text: JSON.stringify(response),
-      },
-    ],
-    isError: true,
-  };
-}
-
-export type McpServer = {
-  start(): Promise<void>;
-  stop(): Promise<void>;
-  getServer(): Server;
-  getToolDefinitions(): ToolDefinition[];
-  getToolPrefix(): string;
-};
-
-/**
- * Create and configure an MCP server instance.
- *
- * @param config Server configuration including name, version, and optional cleanup handler
- * @returns McpServer instance with start/stop methods and tool definitions
- */
-export function createMcpServer(config: McpServerConfig): McpServer {
-  const { name, version, onCleanup, logger = console.error } = config;
-
-  const toolDefinitions = getToolDefinitions();
-  const toolHandlers = buildToolHandlersRecord();
-
-  setToolRegistry(toolHandlers);
-
-  const validToolNames = new Set(toolDefinitions.map((tool) => tool.name));
-
-  const server = new Server({ name, version }, { capabilities: { tools: {} } });
-
-  let isCleaningUp = false;
-
-  server.setRequestHandler(ListToolsRequestSchema, async () => ({
-    tools: toolDefinitions,
-  }));
-
-  server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
-    const { name: toolName, arguments: args } = request.params;
-    const startTime = Date.now();
-    const signal = extra?.signal;
-
-    if (!validToolNames.has(toolName)) {
-      return createToolErrorResponse(
-        ErrorCodes.MM_INVALID_INPUT,
-        `Unknown tool: ${toolName}`,
-        undefined,
-        startTime,
-      );
-    }
-
-    const validation = safeValidateToolInput(toolName, args);
-    if (!validation.success) {
-      return createToolErrorResponse(
-        ErrorCodes.MM_INVALID_INPUT,
-        `Invalid input: ${validation.error}`,
-        { providedArgs: args },
-        startTime,
-      );
-    }
-
-    const handler = getToolHandler(toolName);
-
-    if (!handler) {
-      return createToolErrorResponse(
-        ErrorCodes.MM_INVALID_INPUT,
-        `No handler registered for tool: ${toolName}`,
-        undefined,
-        startTime,
-      );
-    }
-
-    const response = await handler(validation.data as Record<string, unknown>, {
-      signal,
-    });
-
-    return {
-      content: [
-        {
-          type: 'text' as const,
-          text: JSON.stringify(response),
-        },
-      ],
-      isError: !response.ok,
-    };
-  });
-
-  /**
-   * Handle process signals (SIGINT, SIGTERM) and perform cleanup.
-   *
-   * @param signal The signal name received (e.g., 'SIGINT', 'SIGTERM')
-   */
-  const handleSignal = async (signal: string) => {
-    if (isCleaningUp) {
-      return;
-    }
-    isCleaningUp = true;
-
-    logger(`Received ${signal}, cleaning up...`);
-
-    try {
-      if (onCleanup) {
-        await onCleanup();
-      }
-
-      if (hasSessionManager()) {
-        await getSessionManager().cleanup();
-      }
-    } catch (error) {
-      logger(`Cleanup error: ${JSON.stringify(error)}`);
-    }
-
-    process.exit(0);
-  };
-
-  process.on('SIGINT', () => {
-    handleSignal('SIGINT').catch((error) => logger(`SIGINT error: ${error}`));
-  });
-  process.on('SIGTERM', () => {
-    handleSignal('SIGTERM').catch((error) => logger(`SIGTERM error: ${error}`));
-  });
-
-  let transport: StdioServerTransport | undefined;
-
-  return {
-    /**
-     * Start the MCP server and connect to stdio transport.
-     *
-     * @returns Promise that resolves when server is running
-     */
-    async start() {
-      transport = new StdioServerTransport();
-      await server.connect(transport);
-      logger(`${name} MCP Server v${version} running on stdio`);
-    },
-
-    /**
-     * Stop the MCP server and close the transport.
-     *
-     * @returns Promise that resolves when server is stopped
-     */
-    async stop() {
-      if (transport) {
-        await server.close();
-      }
-    },
-
-    /**
-     * Get the underlying MCP Server instance.
-     *
-     * @returns The MCP Server instance
-     */
-    getServer() {
-      return server;
-    },
-
-    /**
-     * Get all available tool definitions.
-     *
-     * @returns Array of tool definitions
-     */
-    getToolDefinitions() {
-      return toolDefinitions;
-    },
-
-    /**
-     * Get the tool name prefix (e.g., 'mm_').
-     *
-     * @returns The tool prefix string
-     */
-    getToolPrefix() {
-      return TOOL_PREFIX;
-    },
-  };
-}
diff --git a/src/mcp-server/session-manager.test.ts b/src/mcp-server/session-manager.test.ts
deleted file mode 100644
index b41b7ca..0000000
--- a/src/mcp-server/session-manager.test.ts
+++ /dev/null
@@ -1,105 +0,0 @@
-import { describe, it, expect, beforeEach } from 'vitest';
-
-import {
-  setSessionManager,
-  getSessionManager,
-  hasSessionManager,
-} from './session-manager.js';
-import type { ISessionManager } from './session-manager.js';
-import { createMockSessionManager } from './test-utils/mock-factories.js';
-
-describe('session-manager', () => {
-  beforeEach(() => {
-    setSessionManager(undefined as unknown as ISessionManager);
-  });
-
-  describe('setSessionManager', () => {
-    it('sets the session manager instance', () => {
-      const mockManager = createMockSessionManager();
-      setSessionManager(mockManager);
-
-      expect(hasSessionManager()).toBe(true);
-    });
-
-    it('replaces the existing session manager', () => {
-      const mockManager1 = createMockSessionManager();
-      const mockManager2 = createMockSessionManager();
-
-      setSessionManager(mockManager1);
-      setSessionManager(mockManager2);
-
-      expect(getSessionManager()).toBe(mockManager2);
-    });
-  });
-
-  describe('getSessionManager', () => {
-    it('returns the session manager when set', () => {
-      const mockManager = createMockSessionManager();
-      setSessionManager(mockManager);
-
-      expect(getSessionManager()).toBe(mockManager);
-    });
-
-    it('throws error when session manager is not set', () => {
-      expect(() => getSessionManager()).toThrowError(
-        'Session manager not initialized. Call setSessionManager() first.',
-      );
-    });
-  });
-
-  describe('hasSessionManager', () => {
-    it('returns false when no session manager is set', () => {
-      expect(hasSessionManager()).toBe(false);
-    });
-
-    it('returns true when session manager is set', () => {
-      const mockManager = createMockSessionManager();
-      setSessionManager(mockManager);
-
-      expect(hasSessionManager()).toBe(true);
-    });
-  });
-
-  describe('ISessionManager interface compliance', () => {
-    let manager: ISessionManager;
-
-    beforeEach(() => {
-      manager = createMockSessionManager();
-      setSessionManager(manager);
-    });
-
-    it('can call hasActiveSession', () => {
-      const result = getSessionManager().hasActiveSession();
-      expect(typeof result).toBe('boolean');
-    });
-
-    it('can call getSessionId', () => {
-      const result = getSessionManager().getSessionId();
-      expect(result).toBeUndefined();
-    });
-
-    it('can call launch', async () => {
-      const result = await getSessionManager().launch({});
-      expect(result.sessionId).toBe('test-session-123');
-    });
-
-    it('can call cleanup', async () => {
-      const result = await getSessionManager().cleanup();
-      expect(result).toBe(true);
-    });
-
-    it('can call screenshot', async () => {
-      const result = await getSessionManager().screenshot({ name: 'test' });
-      expect(result.path).toBeDefined();
-    });
-
-    it('can access capability methods', () => {
-      expect(getSessionManager().getBuildCapability()).toBeUndefined();
-      expect(getSessionManager().getFixtureCapability()).toBeUndefined();
-      expect(getSessionManager().getChainCapability()).toBeUndefined();
-      expect(
-        getSessionManager().getContractSeedingCapability(),
-      ).toBeUndefined();
-    });
-  });
-});
diff --git a/src/mcp-server/test-utils/flush-promises.ts b/src/mcp-server/test-utils/flush-promises.ts
deleted file mode 100644
index eb3403f..0000000
--- a/src/mcp-server/test-utils/flush-promises.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-const scheduler =
-  typeof setImmediate === 'function' ? setImmediate : setTimeout;
-
-export async function flushPromises() {
-  return new Promise((resolve) => {
-    scheduler(resolve, 0);
-  });
-}
diff --git a/src/mcp-server/tools/batch.test.ts b/src/mcp-server/tools/batch.test.ts
deleted file mode 100644
index 2a84d2b..0000000
--- a/src/mcp-server/tools/batch.test.ts
+++ /dev/null
@@ -1,428 +0,0 @@
-import { describe, it, expect, beforeEach, vi } from 'vitest';
-
-import {
-  setToolRegistry,
-  getToolRegistry,
-  hasToolRegistry,
-  setToolValidator,
-  getToolValidator,
-  handleRunSteps,
-} from './batch.js';
-import type { ToolRegistry, ToolHandler, ToolValidator } from './batch.js';
-import { setSessionManager } from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-
-/**
- * Clears the tool validator by resetting it to undefined.
- */
-function clearToolValidator(): void {
-  setToolValidator((() => ({ success: true })) as ToolValidator);
-  setToolValidator(undefined as unknown as ToolValidator);
-}
-
-describe('batch', () => {
-  beforeEach(() => {
-    setToolRegistry({});
-    clearToolValidator();
-  });
-
-  describe('setToolRegistry / getToolRegistry', () => {
-    it('sets and gets tool registry', () => {
-      const mockHandler: ToolHandler = vi.fn().mockResolvedValue({ ok: true });
-      const registry: ToolRegistry = {
-        mm_click: mockHandler,
-      };
-
-      setToolRegistry(registry);
-
-      expect(getToolRegistry()).toBe(registry);
-      expect(getToolRegistry().mm_click).toBe(mockHandler);
-    });
-
-    it('replaces existing registry', () => {
-      const registry1: ToolRegistry = { tool1: vi.fn() };
-      const registry2: ToolRegistry = { tool2: vi.fn() };
-
-      setToolRegistry(registry1);
-      setToolRegistry(registry2);
-
-      expect(getToolRegistry()).toBe(registry2);
-      expect(getToolRegistry().tool1).toBeUndefined();
-      expect(getToolRegistry().tool2).toBeDefined();
-    });
-  });
-
-  describe('hasToolRegistry', () => {
-    it('returns false for empty registry', () => {
-      setToolRegistry({});
-      expect(hasToolRegistry()).toBe(false);
-    });
-
-    it('returns true when registry has handlers', () => {
-      setToolRegistry({ mm_click: vi.fn() });
-      expect(hasToolRegistry()).toBe(true);
-    });
-  });
-
-  describe('setToolValidator / getToolValidator', () => {
-    it('sets and gets tool validator', () => {
-      const validator: ToolValidator = vi
-        .fn()
-        .mockReturnValue({ success: true });
-      setToolValidator(validator);
-
-      expect(getToolValidator()).toBe(validator);
-    });
-
-    it('returns undefined when not set', () => {
-      expect(getToolValidator()).toBeUndefined();
-    });
-  });
-
-  describe('handleRunSteps', () => {
-    beforeEach(() => {
-      setSessionManager(createMockSessionManager({ hasActive: true }));
-    });
-
-    it('returns error when no active session', async () => {
-      setSessionManager(createMockSessionManager({ hasActive: false }));
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: { testId: 'button' } }],
-      });
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error?.code).toBe('MM_NO_ACTIVE_SESSION');
-      }
-    });
-
-    it('executes steps in sequence', async () => {
-      const executionOrder: string[] = [];
-      const clickHandler = vi.fn().mockImplementation(async () => {
-        executionOrder.push('click');
-        return { ok: true, result: 'clicked' };
-      });
-      const typeHandler = vi.fn().mockImplementation(async () => {
-        executionOrder.push('type');
-        return { ok: true, result: 'typed' };
-      });
-
-      setToolRegistry({
-        mm_click: clickHandler,
-        mm_type: typeHandler,
-      });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: { testId: 'button' } },
-          { tool: 'mm_type', args: { testId: 'input', text: 'hello' } },
-        ],
-      });
-
-      expect(result.ok).toBe(true);
-      expect(executionOrder).toStrictEqual(['click', 'type']);
-      if (result.ok) {
-        expect(result.result?.summary.total).toBe(2);
-        expect(result.result?.summary.succeeded).toBe(2);
-        expect(result.result?.summary.failed).toBe(0);
-      }
-    });
-
-    it('returns error for unknown tool', async () => {
-      setToolRegistry({});
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'unknown_tool', args: {} }],
-      });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_UNKNOWN_TOOL');
-        expect(result.result?.summary.failed).toBe(1);
-      }
-    });
-
-    it('stops on error when stopOnError is true', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({
-        ok: false,
-        error: { code: 'ERR', message: 'fail' },
-      });
-      const typeHandler = vi.fn().mockResolvedValue({ ok: true });
-
-      setToolRegistry({
-        mm_click: clickHandler,
-        mm_type: typeHandler,
-      });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: {} },
-          { tool: 'mm_type', args: { text: 'hello' } },
-        ],
-        stopOnError: true,
-      });
-
-      expect(clickHandler).toHaveBeenCalledTimes(1);
-      expect(typeHandler).not.toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps.length).toBe(1);
-      }
-    });
-
-    it('continues on error when stopOnError is false', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({
-        ok: false,
-        error: { code: 'ERR', message: 'fail' },
-      });
-      const typeHandler = vi
-        .fn()
-        .mockResolvedValue({ ok: true, result: 'typed' });
-
-      setToolRegistry({
-        mm_click: clickHandler,
-        mm_type: typeHandler,
-      });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: {} },
-          { tool: 'mm_type', args: { text: 'hello' } },
-        ],
-        stopOnError: false,
-      });
-
-      expect(clickHandler).toHaveBeenCalledTimes(1);
-      expect(typeHandler).toHaveBeenCalledTimes(1);
-      if (result.ok) {
-        expect(result.result?.steps.length).toBe(2);
-        expect(result.result?.summary.failed).toBe(1);
-        expect(result.result?.summary.succeeded).toBe(1);
-      }
-    });
-
-    it('uses tool validator when set', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const validator: ToolValidator = vi.fn().mockReturnValue({
-        success: false,
-        error: { message: 'Invalid testId' },
-      });
-      setToolValidator(validator);
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: { testId: '' } }],
-      });
-
-      expect(validator).toHaveBeenCalledWith('mm_click', { testId: '' });
-      expect(clickHandler).not.toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_INVALID_INPUT');
-      }
-    });
-
-    it('passes validation when validator returns success', async () => {
-      const clickHandler = vi
-        .fn()
-        .mockResolvedValue({ ok: true, result: 'clicked' });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const validator: ToolValidator = vi
-        .fn()
-        .mockReturnValue({ success: true });
-      setToolValidator(validator);
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: { testId: 'btn' } }],
-      });
-
-      expect(clickHandler).toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(true);
-      }
-    });
-
-    it('handles exceptions from tool handlers', async () => {
-      const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout'));
-      setToolRegistry({ mm_click: clickHandler });
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: {} }],
-      });
-
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_INTERNAL_ERROR');
-        expect(result.result?.steps[0].error?.message).toContain('Timeout');
-      }
-    });
-
-    it('includes duration in step results', async () => {
-      vi.useFakeTimers();
-      const clickHandler = vi.fn().mockImplementation(async () => {
-        await new Promise((resolve) => setTimeout(resolve, 100));
-        return { ok: true };
-      });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const resultPromise = handleRunSteps({
-        steps: [{ tool: 'mm_click', args: {} }],
-      });
-
-      await vi.advanceTimersByTimeAsync(100);
-      const result = await resultPromise;
-
-      if (result.ok) {
-        expect(result.result?.steps[0].meta?.durationMs).toBe(100);
-      }
-
-      vi.useRealTimers();
-    });
-
-    it('includes total duration in summary', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: {} },
-          { tool: 'mm_click', args: {} },
-        ],
-      });
-
-      if (result.ok) {
-        expect(result.result?.summary.durationMs).toBeGreaterThanOrEqual(0);
-      }
-    });
-
-    it('defaults args to empty object when not provided', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_click: clickHandler });
-
-      await handleRunSteps({
-        steps: [{ tool: 'mm_click' }],
-      });
-
-      expect(clickHandler).toHaveBeenCalledWith({}, expect.any(Object));
-    });
-
-    it('maps includeObservations "none" to observation policy', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: {} }],
-        includeObservations: 'none',
-      });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(true);
-      }
-      expect(clickHandler).toHaveBeenCalledWith(
-        {},
-        expect.objectContaining({ observationPolicy: 'none' }),
-      );
-    });
-
-    it('maps includeObservations "failures" to observation policy', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_click: clickHandler });
-
-      const result = await handleRunSteps({
-        steps: [{ tool: 'mm_click', args: {} }],
-        includeObservations: 'failures',
-      });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result?.steps[0].ok).toBe(true);
-      }
-      expect(clickHandler).toHaveBeenCalledWith(
-        {},
-        expect.objectContaining({ observationPolicy: 'failures' }),
-      );
-    });
-
-    it('stops execution when stopOnError=true and handler not found', async () => {
-      const typeHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({ mm_type: typeHandler });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'unknown_tool', args: {} },
-          { tool: 'mm_type', args: { text: 'hello' } },
-        ],
-        stopOnError: true,
-      });
-
-      expect(typeHandler).not.toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps.length).toBe(1);
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_UNKNOWN_TOOL');
-      }
-    });
-
-    it('stops execution when stopOnError=true and validation fails', async () => {
-      const clickHandler = vi.fn().mockResolvedValue({ ok: true });
-      const typeHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({
-        mm_click: clickHandler,
-        mm_type: typeHandler,
-      });
-
-      const validator: ToolValidator = vi.fn().mockImplementation((tool) => {
-        if (tool === 'mm_click') {
-          return { success: false, error: { message: 'Invalid testId' } };
-        }
-        return { success: true };
-      });
-      setToolValidator(validator);
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: { testId: '' } },
-          { tool: 'mm_type', args: { text: 'hello' } },
-        ],
-        stopOnError: true,
-      });
-
-      expect(clickHandler).not.toHaveBeenCalled();
-      expect(typeHandler).not.toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps.length).toBe(1);
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_INVALID_INPUT');
-      }
-    });
-
-    it('stops execution when stopOnError=true and handler throws error', async () => {
-      const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout'));
-      const typeHandler = vi.fn().mockResolvedValue({ ok: true });
-      setToolRegistry({
-        mm_click: clickHandler,
-        mm_type: typeHandler,
-      });
-
-      const result = await handleRunSteps({
-        steps: [
-          { tool: 'mm_click', args: {} },
-          { tool: 'mm_type', args: { text: 'hello' } },
-        ],
-        stopOnError: true,
-      });
-
-      expect(clickHandler).toHaveBeenCalledTimes(1);
-      expect(typeHandler).not.toHaveBeenCalled();
-      if (result.ok) {
-        expect(result.result?.steps.length).toBe(1);
-        expect(result.result?.steps[0].ok).toBe(false);
-        expect(result.result?.steps[0].error?.code).toBe('MM_INTERNAL_ERROR');
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/batch.ts b/src/mcp-server/tools/batch.ts
deleted file mode 100644
index b723a69..0000000
--- a/src/mcp-server/tools/batch.ts
+++ /dev/null
@@ -1,286 +0,0 @@
-import { getSessionManager } from '../session-manager.js';
-import type {
-  McpResponse,
-  HandlerOptions,
-  RunStepsInput,
-  RunStepsResult,
-  StepResult,
-  ObservationPolicyOverride,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createSuccessResponse,
-  createErrorResponse,
-  extractErrorMessage,
-} from '../utils';
-
-/**
- * Maps includeObservations string to observation policy override.
- *
- * @param value The observation policy string ('none', 'failures', 'all', or undefined)
- * @returns The mapped observation policy override
- */
-function mapIncludeObservationsToPolicy(
-  value: 'none' | 'failures' | 'all' | undefined,
-): ObservationPolicyOverride {
-  switch (value) {
-    case 'none':
-      return 'none';
-    case 'failures':
-      return 'failures';
-    case 'all':
-    default:
-      return 'default';
-  }
-}
-
-/**
- * Handler function type for executing MCP tools.
- *
- * @param input Tool arguments as key-value pairs
- * @param options Optional handler configuration
- * @returns Promise resolving to MCP response with tool result
- */
-export type ToolHandler = (
-  input: Record<string, unknown>,
-  options?: HandlerOptions,
-) => Promise<McpResponse<unknown>>;
-
-/**
- * Registry mapping tool names to their handler functions.
- *
- * @returns Record of tool name to handler function mappings
- */
-export type ToolRegistry = Record<string, ToolHandler>;
-
-/**
- * Validator function type for validating tool arguments before execution.
- *
- * @param tool Tool name being validated
- * @param args Tool arguments to validate
- * @returns Validation result with success status and optional error details
- */
-export type ToolValidator = (
-  tool: string,
-  args: Record<string, unknown>,
-) =>
-  | {
-      /**
-       * Validation succeeded
-       */
-      success: true;
-    }
-  | {
-      /**
-       * Validation failed
-       */
-      success: false;
-      /**
-       * Error details when validation fails
-       */
-      error: {
-        /**
-         * Error message describing validation failure
-         */
-        message: string;
-      };
-    };
-
-let _toolRegistry: ToolRegistry = {};
-let _toolValidator: ToolValidator | undefined;
-
-/**
- * Sets the global tool registry for batch execution.
- *
- * @param registry Tool registry mapping names to handlers
- */
-export function setToolRegistry(registry: ToolRegistry): void {
-  _toolRegistry = registry;
-}
-
-/**
- * Gets the current global tool registry.
- *
- * @returns The current tool registry
- */
-export function getToolRegistry(): ToolRegistry {
-  return _toolRegistry;
-}
-
-/**
- * Checks if the tool registry has any registered handlers.
- *
- * @returns True if registry contains handlers, false otherwise
- */
-export function hasToolRegistry(): boolean {
-  return Object.keys(_toolRegistry).length > 0;
-}
-
-/**
- * Sets the global tool validator for batch execution.
- *
- * @param validator Validator function to validate tool arguments
- */
-export function setToolValidator(validator: ToolValidator): void {
-  _toolValidator = validator;
-}
-
-/**
- * Gets the current global tool validator.
- *
- * @returns The current tool validator or undefined if not set
- */
-export function getToolValidator(): ToolValidator | undefined {
-  return _toolValidator;
-}
-
-/**
- * Executes multiple tool steps in sequence with optional validation and error handling.
- *
- * @param input Steps to execute with optional stop-on-error and observation policy
- * @param options Optional handler configuration and observation policy override
- * @returns Promise resolving to MCP response with step results and summary
- */
-export async function handleRunSteps(
-  input: RunStepsInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<RunStepsResult>> {
-  const batchStartTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-
-  if (!sessionManager.hasActiveSession()) {
-    return createErrorResponse(
-      ErrorCodes.MM_NO_ACTIVE_SESSION,
-      'No active session. Call launch first.',
-      { input },
-      undefined,
-      batchStartTime,
-    );
-  }
-
-  const { steps: stepInputs, stopOnError = false, includeObservations } = input;
-  const observationPolicy = mapIncludeObservationsToPolicy(includeObservations);
-  const stepResults: StepResult[] = [];
-  let succeeded = 0;
-  let failed = 0;
-
-  const toolHandlers = getToolRegistry();
-  const toolValidator = getToolValidator();
-
-  for (const stepInput of stepInputs) {
-    const stepStartTime = Date.now();
-    const { tool, args = {} } = stepInput;
-
-    const handler = toolHandlers[tool];
-    if (!handler) {
-      const result: StepResult = {
-        tool,
-        ok: false,
-        error: {
-          code: ErrorCodes.MM_UNKNOWN_TOOL,
-          message: `Unknown tool: ${tool}`,
-        },
-        meta: {
-          durationMs: Date.now() - stepStartTime,
-          timestamp: new Date().toISOString(),
-        },
-      };
-      stepResults.push(result);
-      failed += 1;
-
-      if (stopOnError) {
-        break;
-      }
-      continue;
-    }
-
-    if (toolValidator) {
-      const validation = toolValidator(tool, args);
-      if (!validation.success) {
-        const result: StepResult = {
-          tool,
-          ok: false,
-          error: {
-            code: ErrorCodes.MM_INVALID_INPUT,
-            message: `Invalid input: ${validation.error.message}`,
-          },
-          meta: {
-            durationMs: Date.now() - stepStartTime,
-            timestamp: new Date().toISOString(),
-          },
-        };
-        stepResults.push(result);
-        failed += 1;
-
-        if (stopOnError) {
-          break;
-        }
-        continue;
-      }
-    }
-
-    try {
-      const stepOptions: HandlerOptions = {
-        ...options,
-        observationPolicy,
-      };
-      const response = await handler(args, stepOptions);
-
-      const result: StepResult = {
-        tool,
-        ok: response.ok,
-        result: response.ok ? response.result : undefined,
-        error: response.ok ? undefined : response.error,
-        meta: {
-          durationMs: Date.now() - stepStartTime,
-          timestamp: new Date().toISOString(),
-        },
-      };
-
-      stepResults.push(result);
-
-      if (response.ok) {
-        succeeded += 1;
-      } else {
-        failed += 1;
-        if (stopOnError) {
-          break;
-        }
-      }
-    } catch (error) {
-      const message = extractErrorMessage(error);
-      const result: StepResult = {
-        tool,
-        ok: false,
-        error: {
-          code: ErrorCodes.MM_INTERNAL_ERROR,
-          message: `Unexpected error: ${message}`,
-        },
-        meta: {
-          durationMs: Date.now() - stepStartTime,
-          timestamp: new Date().toISOString(),
-        },
-      };
-      stepResults.push(result);
-      failed += 1;
-
-      if (stopOnError) {
-        break;
-      }
-    }
-  }
-
-  const batchResult: RunStepsResult = {
-    steps: stepResults,
-    summary: {
-      ok: failed === 0,
-      total: stepResults.length,
-      succeeded,
-      failed,
-      durationMs: Date.now() - batchStartTime,
-    },
-  };
-
-  return createSuccessResponse(batchResult, sessionId, batchStartTime);
-}
diff --git a/src/mcp-server/tools/build.test.ts b/src/mcp-server/tools/build.test.ts
deleted file mode 100644
index 4e3721c..0000000
--- a/src/mcp-server/tools/build.test.ts
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Unit tests for build tool handler.
- *
- * Tests the build handler with BuildCapability and legacy build paths,
- * including success/failure scenarios and build options handling.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import { handleBuild } from './build.js';
-import type { BuildCapability } from '../../capabilities/types.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('build', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-  let mockBuildCapability: BuildCapability;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      sessionMetadata: {
-        schemaVersion: 1,
-        sessionId: 'test-session-123',
-        createdAt: new Date().toISOString(),
-        flowTags: [],
-        tags: [],
-        launch: { stateMode: 'default' },
-      },
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-
-    mockBuildCapability = {
-      build: vi.fn(),
-      getExtensionPath: vi.fn(),
-      isBuilt: vi.fn(),
-    };
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleBuild with capability', () => {
-    it('builds extension successfully with default buildType', async () => {
-      // Arrange
-      const mockedBuild = vi
-        .spyOn(mockBuildCapability, 'build')
-        .mockResolvedValue({
-          success: true,
-          extensionPath: '/path/to/dist/chrome',
-          durationMs: 5000,
-        });
-
-      // Act
-      const result = await handleBuild(
-        {},
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.buildType).toBe('build:test');
-        expect(result.result.extensionPathResolved).toBe(
-          '/path/to/dist/chrome',
-        );
-      }
-      expect(mockedBuild).toHaveBeenCalledWith({
-        buildType: undefined,
-        force: undefined,
-      });
-    });
-
-    it('builds extension with explicit buildType', async () => {
-      // Arrange
-      const mockedBuild = vi
-        .spyOn(mockBuildCapability, 'build')
-        .mockResolvedValue({
-          success: true,
-          extensionPath: '/path/to/dist/chrome',
-          durationMs: 5000,
-        });
-
-      // Act
-      const result = await handleBuild(
-        { buildType: 'build:test' },
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.buildType).toBe('build:test');
-        expect(result.result.extensionPathResolved).toBe(
-          '/path/to/dist/chrome',
-        );
-      }
-      expect(mockedBuild).toHaveBeenCalledWith({
-        buildType: 'build:test',
-        force: undefined,
-      });
-    });
-
-    it('builds extension with force flag', async () => {
-      // Arrange
-      const mockedBuild = vi
-        .spyOn(mockBuildCapability, 'build')
-        .mockResolvedValue({
-          success: true,
-          extensionPath: '/path/to/dist/chrome',
-          durationMs: 5000,
-        });
-
-      // Act
-      const result = await handleBuild(
-        { force: true },
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      expect(mockedBuild).toHaveBeenCalledWith({
-        buildType: undefined,
-        force: true,
-      });
-    });
-
-    it('returns error when build fails with error message', async () => {
-      // Arrange
-      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
-        success: false,
-        extensionPath: '',
-        durationMs: 1000,
-        error: 'Compilation error',
-      });
-
-      // Act
-      const result = await handleBuild(
-        {},
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
-        expect(result.error.message).toContain('Compilation error');
-      }
-    });
-
-    it('returns error when build fails without error message', async () => {
-      // Arrange
-      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
-        success: false,
-        extensionPath: '',
-        durationMs: 1000,
-      });
-
-      // Act
-      const result = await handleBuild(
-        {},
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
-        expect(result.error.message).toContain('Unknown error');
-      }
-    });
-
-    it('returns error when build throws exception', async () => {
-      // Arrange
-      vi.spyOn(mockBuildCapability, 'build').mockRejectedValue(
-        new Error('Build process crashed'),
-      );
-
-      // Act
-      const result = await handleBuild(
-        {},
-        { buildCapability: mockBuildCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
-        expect(result.error.message).toContain('Build process crashed');
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/build.ts b/src/mcp-server/tools/build.ts
deleted file mode 100644
index 7d422cf..0000000
--- a/src/mcp-server/tools/build.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import type { BuildCapability } from '../../capabilities/types.js';
-import type {
-  BuildInput,
-  BuildToolResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createSuccessResponse,
-  createErrorResponse,
-  extractErrorMessage,
-} from '../utils';
-
-/**
- * Options for the build tool handler.
- *
- * @returns Handler options with optional build capability
- */
-export type BuildToolOptions = HandlerOptions & {
-  /**
-   * Optional build capability for extension building
-   */
-  buildCapability?: BuildCapability;
-};
-
-/**
- * Handles the build tool request to build the extension.
- *
- * @param input Build configuration with optional buildType and force flag
- * @param options Optional handler options with build capability
- * @returns Promise resolving to MCP response with build result
- */
-export async function handleBuild(
-  input: BuildInput,
-  options?: BuildToolOptions,
-): Promise<McpResponse<BuildToolResult>> {
-  const startTime = Date.now();
-
-  if (options?.buildCapability) {
-    return handleBuildWithCapability(input, options.buildCapability, startTime);
-  }
-
-  return createErrorResponse(
-    ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE,
-    'BuildCapability not available. The mm_build tool requires either: (1) running in e2e mode with the MetaMask extension wrapper, or (2) running directly in the metamask-extension repository with dependencies installed.',
-    { capability: 'BuildCapability' },
-    undefined,
-    startTime,
-  );
-}
-
-/**
- * Handles build using the provided build capability.
- *
- * @param input Build configuration with optional buildType and force flag
- * @param buildCapability Build capability instance for executing the build
- * @param startTime Timestamp when the operation started
- * @returns Promise resolving to MCP response with build result
- */
-async function handleBuildWithCapability(
-  input: BuildInput,
-  buildCapability: BuildCapability,
-  startTime: number,
-): Promise<McpResponse<BuildToolResult>> {
-  try {
-    const result = await buildCapability.build({
-      buildType: input.buildType,
-      force: input.force,
-    });
-
-    if (!result.success) {
-      return createErrorResponse(
-        ErrorCodes.MM_BUILD_FAILED,
-        `Build failed: ${result.error ?? 'Unknown error'}`,
-        { buildType: input.buildType ?? 'build:test' },
-        undefined,
-        startTime,
-      );
-    }
-
-    return createSuccessResponse<BuildToolResult>(
-      {
-        buildType: input.buildType ?? 'build:test',
-        extensionPathResolved: result.extensionPath,
-      },
-      undefined,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-    return createErrorResponse(
-      ErrorCodes.MM_BUILD_FAILED,
-      `Build failed: ${message}`,
-      { buildType: input.buildType ?? 'build:test' },
-      undefined,
-      startTime,
-    );
-  }
-}
diff --git a/src/mcp-server/tools/cleanup.test.ts b/src/mcp-server/tools/cleanup.test.ts
deleted file mode 100644
index 7a8193f..0000000
--- a/src/mcp-server/tools/cleanup.test.ts
+++ /dev/null
@@ -1,161 +0,0 @@
-/**
- * Unit tests for cleanup tool handler.
- *
- * Tests session cleanup with various session states.
- */
-
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-import { handleCleanup } from './cleanup.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-
-describe('handleCleanup', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  it('cleans up active session successfully', async () => {
-    const mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-    });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.cleanedUp).toBe(true);
-      expect(result.meta.sessionId).toBe('test-session-123');
-    }
-    expect(mockSessionManager.cleanup).toHaveBeenCalled();
-  });
-
-  it('returns false when no session to clean up', async () => {
-    const mockSessionManager = createMockSessionManager({ hasActive: false });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(false);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.cleanedUp).toBe(false);
-    }
-  });
-
-  it('uses provided sessionId in input', async () => {
-    const mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'current-session',
-    });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({ sessionId: 'custom-session-456' });
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.meta.sessionId).toBe('custom-session-456');
-    }
-  });
-
-  it('falls back to current sessionId when input sessionId is undefined', async () => {
-    const mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-789',
-    });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.meta.sessionId).toBe('test-session-789');
-    }
-  });
-
-  it('handles cleanup when sessionId is undefined', async () => {
-    const mockSessionManager = createMockSessionManager({ hasActive: false });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(false);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.cleanedUp).toBe(false);
-    }
-  });
-
-  it('includes timestamp in response', async () => {
-    const mockSessionManager = createMockSessionManager({ hasActive: true });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.meta.timestamp).toBeDefined();
-      expect(typeof result.meta.timestamp).toBe('string');
-      expect(new Date(result.meta.timestamp).getTime()).toBeGreaterThan(0);
-    }
-  });
-
-  it('includes durationMs in response', async () => {
-    const mockSessionManager = createMockSessionManager({ hasActive: true });
-    vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleCleanup({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.meta.durationMs).toBeGreaterThanOrEqual(0);
-      expect(typeof result.meta.durationMs).toBe('number');
-    }
-  });
-
-  it('cleans up multiple times without error', async () => {
-    const mockSessionManager = createMockSessionManager({ hasActive: true });
-    vi.spyOn(mockSessionManager, 'cleanup')
-      .mockResolvedValueOnce(true)
-      .mockResolvedValueOnce(false);
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result1 = await handleCleanup({});
-    const result2 = await handleCleanup({});
-
-    expect(result1.ok).toBe(true);
-    if (result1.ok) {
-      expect(result1.result.cleanedUp).toBe(true);
-    }
-
-    expect(result2.ok).toBe(true);
-    if (result2.ok) {
-      expect(result2.result.cleanedUp).toBe(false);
-    }
-
-    expect(mockSessionManager.cleanup).toHaveBeenCalledTimes(2);
-  });
-});
diff --git a/src/mcp-server/tools/cleanup.ts b/src/mcp-server/tools/cleanup.ts
deleted file mode 100644
index 9b6f266..0000000
--- a/src/mcp-server/tools/cleanup.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-import { getSessionManager } from '../session-manager.js';
-import type {
-  CleanupInput,
-  CleanupResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { createSuccessResponse } from '../utils';
-
-/**
- * Handles the cleanup tool request to stop browser and services.
- *
- * @param input - The cleanup input parameters.
- * @param _options - Handler options (unused).
- * @returns Response indicating if cleanup was performed.
- */
-export async function handleCleanup(
-  input: CleanupInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<CleanupResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = input.sessionId ?? sessionManager.getSessionId();
-
-  const cleanedUp = await sessionManager.cleanup();
-
-  return createSuccessResponse<CleanupResult>(
-    { cleanedUp },
-    sessionId,
-    startTime,
-  );
-}
diff --git a/src/mcp-server/tools/clipboard.test.ts b/src/mcp-server/tools/clipboard.test.ts
deleted file mode 100644
index d52c3f6..0000000
--- a/src/mcp-server/tools/clipboard.test.ts
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * Unit tests for clipboard tool handler.
- *
- * Tests CDP-based clipboard operations (read/write) with proper mocking.
- */
-
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-import { handleClipboard } from './clipboard.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('handleClipboard', () => {
-  const mockSessionManager = createMockSessionManager({
-    hasActive: true,
-    sessionId: 'test-session-123',
-    sessionMetadata: {
-      schemaVersion: 1,
-      sessionId: 'test-session-123',
-      createdAt: new Date().toISOString(),
-      flowTags: [],
-      tags: [],
-      launch: { stateMode: 'default' },
-    },
-  });
-
-  beforeEach(() => {
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  describe('write action', () => {
-    it('writes text to clipboard via CDP', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockResolvedValue(undefined),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({
-        action: 'write',
-        text: 'test content',
-      });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.action).toBe('write');
-        expect(result.result.success).toBe(true);
-        expect(result.result.text).toBe('test content');
-      }
-      expect(mockCdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', {
-        expression: 'navigator.clipboard.writeText("test content")',
-        awaitPromise: true,
-        userGesture: true,
-      });
-      expect(mockCdpSession.detach).toHaveBeenCalled();
-    });
-
-    it('detaches CDP session even if write fails', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockRejectedValue(new Error('Write failed')),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'write', text: 'test' });
-
-      expect(result.ok).toBe(false);
-      expect(mockCdpSession.detach).toHaveBeenCalled();
-    });
-  });
-
-  describe('read action', () => {
-    it('reads text from clipboard via CDP', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockResolvedValue({
-          result: { value: 'clipboard content' },
-        }),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.action).toBe('read');
-        expect(result.result.success).toBe(true);
-        expect(result.result.text).toBe('clipboard content');
-      }
-      expect(mockCdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', {
-        expression: 'navigator.clipboard.readText()',
-        awaitPromise: true,
-        userGesture: true,
-      });
-    });
-
-    it('uses description when value is missing', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockResolvedValue({
-          result: { description: 'fallback content' },
-        }),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.text).toBe('fallback content');
-      }
-    });
-
-    it('returns empty string when result is missing', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockResolvedValue({ result: {} }),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.text).toBe('');
-      }
-    });
-  });
-
-  describe('error classification', () => {
-    it('classifies permission denied errors', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockRejectedValue(new Error('permissions denied')),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_CLIPBOARD_PERMISSION_DENIED');
-        expect(result.error.message).toContain('Clipboard permission denied');
-      }
-    });
-
-    it('classifies LavaMoat blocked errors', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockRejectedValue(new Error('LavaMoat policy violation')),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'write', text: 'test' });
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_CLIPBOARD_LAVAMOAT_BLOCKED');
-        expect(result.error.message).toContain(
-          'Clipboard blocked by LavaMoat policy',
-        );
-      }
-    });
-
-    it('classifies generic clipboard errors', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockRejectedValue(new Error('Unknown error')),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_CLIPBOARD_FAILED');
-        expect(result.error.message).toContain('Clipboard operation failed');
-      }
-    });
-  });
-
-  describe('input sanitization', () => {
-    it('sanitizes write input for recording', async () => {
-      const mockCdpSession = {
-        send: vi.fn().mockResolvedValue(undefined),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-      const recordStepSpy = vi.fn().mockResolvedValue(undefined);
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: recordStepSpy,
-        getLastSteps: vi.fn().mockResolvedValue([]),
-        searchSteps: vi.fn().mockResolvedValue([]),
-        summarizeSession: vi
-          .fn()
-          .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-        listSessions: vi.fn().mockResolvedValue([]),
-        generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-        writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-      } as any);
-
-      await handleClipboard({ action: 'write', text: 'sensitive password' });
-
-      expect(recordStepSpy).toHaveBeenCalled();
-      const recordedInput = recordStepSpy.mock.calls[0][0].input;
-      expect(recordedInput).toStrictEqual({
-        action: 'write',
-        textLength: 18,
-      });
-      expect(recordedInput).not.toHaveProperty('text');
-    });
-
-    it('sanitizes read input for recording', async () => {
-      const mockCdpSession = {
-        send: vi
-          .fn()
-          .mockResolvedValue({ result: { value: 'clipboard content' } }),
-        detach: vi.fn().mockResolvedValue(undefined),
-      };
-      const mockPage = {
-        context: vi.fn().mockReturnValue({
-          newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
-        }),
-      };
-      vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-      const recordStepSpy = vi.fn().mockResolvedValue(undefined);
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: recordStepSpy,
-        getLastSteps: vi.fn().mockResolvedValue([]),
-        searchSteps: vi.fn().mockResolvedValue([]),
-        summarizeSession: vi
-          .fn()
-          .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-        listSessions: vi.fn().mockResolvedValue([]),
-        generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-        writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-      } as any);
-
-      await handleClipboard({ action: 'read' });
-
-      expect(recordStepSpy).toHaveBeenCalled();
-      const recordedInput = recordStepSpy.mock.calls[0][0].input;
-      expect(recordedInput).toStrictEqual({
-        action: 'read',
-        textLength: 0,
-      });
-    });
-  });
-
-  describe('session validation', () => {
-    it('returns error when no active session', async () => {
-      const noSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        noSessionManager,
-      );
-
-      const result = await handleClipboard({ action: 'read' });
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/clipboard.ts b/src/mcp-server/tools/clipboard.ts
deleted file mode 100644
index a9e4fd6..0000000
--- a/src/mcp-server/tools/clipboard.ts
+++ /dev/null
@@ -1,117 +0,0 @@
-import { runTool } from './run-tool.js';
-import type {
-  ClipboardInput,
-  ClipboardResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-
-/**
- * Clipboard handler using CDP (Chrome DevTools Protocol) to bypass LavaMoat restrictions.
- *
- * Why CDP instead of page.evaluate()?
- * - page.evaluate() runs JavaScript inside the page context, which is wrapped by LavaMoat
- * - LavaMoat restricts access to navigator.clipboard in the page context
- * - CDP's Runtime.evaluate runs at the browser/DevTools level, bypassing LavaMoat
- * - userGesture: true simulates a user gesture to satisfy clipboard security requirements
- *
- * @param input Clipboard action ('read' or 'write') with optional text content
- * @param options Optional handler configuration
- * @returns Promise resolving to MCP response with clipboard operation result
- */
-export async function handleClipboard(
-  input: ClipboardInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<ClipboardResult>> {
-  return runTool<ClipboardInput, ClipboardResult>({
-    toolName: 'mm_clipboard',
-    input,
-    options,
-
-    /**
-     * Executes the clipboard operation using CDP.
-     *
-     * @param context Tool execution context with page and session info
-     * @returns Promise resolving to clipboard operation result
-     */
-    execute: async (context) => {
-      const { page } = context;
-      const cdpSession = await page.context().newCDPSession(page);
-
-      try {
-        if (input.action === 'write') {
-          await cdpSession.send('Runtime.evaluate', {
-            expression: `navigator.clipboard.writeText(${JSON.stringify(input.text)})`,
-            awaitPromise: true,
-            userGesture: true,
-          });
-
-          return {
-            action: 'write',
-            success: true,
-            text: input.text,
-          };
-        }
-
-        const result = await cdpSession.send('Runtime.evaluate', {
-          expression: `navigator.clipboard.readText()`,
-          awaitPromise: true,
-          userGesture: true,
-        });
-
-        const clipboardText =
-          result.result?.value ?? result.result?.description ?? '';
-
-        return {
-          action: 'read',
-          success: true,
-          text: clipboardText as string,
-        };
-      } finally {
-        // eslint-disable-next-line no-empty-function
-        await cdpSession.detach().catch(() => {});
-      }
-    },
-
-    /**
-     * Classifies clipboard errors into specific error codes.
-     *
-     * @param error The error to classify
-     * @returns Error classification with code and message
-     */
-    classifyError: (error) => {
-      const message = error instanceof Error ? error.message : String(error);
-
-      if (message.includes('permissions') || message.includes('denied')) {
-        return {
-          code: 'MM_CLIPBOARD_PERMISSION_DENIED',
-          message: `Clipboard permission denied: ${message}`,
-        };
-      }
-
-      if (message.includes('LavaMoat') || message.includes('policy')) {
-        return {
-          code: 'MM_CLIPBOARD_LAVAMOAT_BLOCKED',
-          message: `Clipboard blocked by LavaMoat policy: ${message}`,
-        };
-      }
-
-      return {
-        code: 'MM_CLIPBOARD_FAILED',
-        message: `Clipboard operation failed: ${message}`,
-      };
-    },
-
-    /**
-     * Sanitizes clipboard input for recording (removes sensitive text).
-     *
-     * @param inp The clipboard input to sanitize
-     * @returns Sanitized input with text length instead of actual text
-     */
-    sanitizeInputForRecording: (inp) => ({
-      action: inp.action,
-      // Don't record the actual text content for privacy (could be SRP, passwords, etc.)
-      textLength: inp.text?.length ?? 0,
-    }),
-  });
-}
diff --git a/src/mcp-server/tools/context.test.ts b/src/mcp-server/tools/context.test.ts
deleted file mode 100644
index d4c384c..0000000
--- a/src/mcp-server/tools/context.test.ts
+++ /dev/null
@@ -1,221 +0,0 @@
-/**
- * Unit tests for context tool handlers.
- *
- * Tests context switching (e2e/prod) and context info retrieval.
- */
-
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-import { handleSetContext, handleGetContext } from './context.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('handleSetContext', () => {
-  beforeEach(() => {
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  it('switches context from e2e to prod', async () => {
-    const mockSessionManager = createMockSessionManager({
-      environmentMode: 'e2e',
-    });
-    vi.spyOn(mockSessionManager, 'setContext');
-    // eslint-disable-next-line vitest/prefer-spy-on
-    mockSessionManager.getContextInfo = vi.fn().mockReturnValue({
-      currentContext: 'prod',
-      hasActiveSession: false,
-      sessionId: null,
-      capabilities: { available: ['build', 'fixture'] },
-      canSwitchContext: true,
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleSetContext({ context: 'prod' });
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.previousContext).toBe('e2e');
-      expect(result.result.newContext).toBe('prod');
-      expect(result.result.availableCapabilities).toStrictEqual([
-        'build',
-        'fixture',
-      ]);
-    }
-    expect(mockSessionManager.setContext).toHaveBeenCalledWith(
-      'prod',
-      undefined,
-    );
-  });
-
-  it('forwards context options to session manager', async () => {
-    const mockSessionManager = createMockSessionManager({
-      environmentMode: 'e2e',
-    });
-    vi.spyOn(mockSessionManager, 'setContext');
-    // eslint-disable-next-line vitest/prefer-spy-on
-    mockSessionManager.getContextInfo = vi.fn().mockReturnValue({
-      currentContext: 'e2e',
-      hasActiveSession: false,
-      sessionId: null,
-      capabilities: { available: ['build', 'fixture', 'chain'] },
-      canSwitchContext: true,
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const contextOptions = {
-      mockServer: {
-        enabled: true,
-        port: 18000,
-      },
-    };
-
-    const result = await handleSetContext({
-      context: 'e2e',
-      options: contextOptions,
-    });
-
-    expect(result.ok).toBe(true);
-    expect(mockSessionManager.setContext).toHaveBeenCalledWith(
-      'e2e',
-      contextOptions,
-    );
-  });
-
-  it('switches context from prod to e2e', async () => {
-    const mockSessionManager = createMockSessionManager({
-      environmentMode: 'prod',
-    });
-    vi.spyOn(mockSessionManager, 'setContext');
-    // eslint-disable-next-line vitest/prefer-spy-on
-    mockSessionManager.getContextInfo = vi.fn().mockReturnValue({
-      currentContext: 'e2e',
-      hasActiveSession: false,
-      sessionId: null,
-      capabilities: { available: ['build', 'fixture', 'chain', 'seeding'] },
-      canSwitchContext: true,
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleSetContext({ context: 'e2e' });
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.previousContext).toBe('prod');
-      expect(result.result.newContext).toBe('e2e');
-      expect(result.result.availableCapabilities).toStrictEqual([
-        'build',
-        'fixture',
-        'chain',
-        'seeding',
-      ]);
-    }
-  });
-
-  it('classifies context switch blocked errors', async () => {
-    const mockSessionManager = createMockSessionManager({
-      environmentMode: 'e2e',
-    });
-    vi.spyOn(mockSessionManager, 'setContext').mockImplementation(() => {
-      throw new Error(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleSetContext({ context: 'prod' });
-
-    expect(result.ok).toBe(false);
-    if (!result.ok) {
-      expect(result.error.code).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
-      expect(result.error.message).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
-    }
-  });
-
-  it('classifies generic context errors', async () => {
-    const mockSessionManager = createMockSessionManager({
-      environmentMode: 'e2e',
-    });
-    vi.spyOn(mockSessionManager, 'setContext').mockImplementation(() => {
-      throw new Error('Unknown error');
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleSetContext({ context: 'prod' });
-
-    expect(result.ok).toBe(false);
-    if (!result.ok) {
-      expect(result.error.code).toBe(ErrorCodes.MM_SET_CONTEXT_FAILED);
-      expect(result.error.message).toContain('Context switch failed');
-    }
-  });
-});
-
-describe('handleGetContext', () => {
-  beforeEach(() => {
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  it('returns context info when getContextInfo is available', async () => {
-    const mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      environmentMode: 'e2e',
-    });
-    // eslint-disable-next-line vitest/prefer-spy-on
-    mockSessionManager.getContextInfo = vi.fn().mockReturnValue({
-      currentContext: 'e2e',
-      hasActiveSession: true,
-      sessionId: 'test-session-123',
-      capabilities: { available: ['build', 'fixture', 'chain'] },
-      canSwitchContext: false,
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    const result = await handleGetContext({});
-
-    expect(result.ok).toBe(true);
-    if (result.ok) {
-      expect(result.result.currentContext).toBe('e2e');
-      expect(result.result.hasActiveSession).toBe(true);
-      expect(result.result.sessionId).toBe('test-session-123');
-      expect(result.result.capabilities.available).toStrictEqual([
-        'build',
-        'fixture',
-        'chain',
-      ]);
-      expect(result.result.canSwitchContext).toBe(false);
-    }
-  });
-});
diff --git a/src/mcp-server/tools/context.ts b/src/mcp-server/tools/context.ts
deleted file mode 100644
index cbf2b52..0000000
--- a/src/mcp-server/tools/context.ts
+++ /dev/null
@@ -1,94 +0,0 @@
-import { runTool } from './run-tool.js';
-import { getSessionManager } from '../session-manager.js';
-import { classifyContextError } from './error-classification.js';
-import type { McpResponse, HandlerOptions } from '../types';
-
-export type SetContextInput = {
-  context: 'e2e' | 'prod';
-  options?: Record<string, unknown>;
-};
-export type SetContextResult = {
-  previousContext: 'e2e' | 'prod';
-  newContext: 'e2e' | 'prod';
-  availableCapabilities: string[];
-};
-
-/**
- * Handle setting the workflow context (e2e or prod).
- *
- * @param input The context input containing the desired context mode
- * @param options Optional handler options for the operation
- * @returns Promise resolving to the context change result with previous and new context
- */
-export async function handleSetContext(
-  input: SetContextInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<SetContextResult>> {
-  return runTool<SetContextInput, SetContextResult>({
-    toolName: 'mm_set_context',
-    input,
-    options,
-    requiresSession: false,
-    observationPolicy: 'none',
-
-    /**
-     * Execute the context switch operation.
-     *
-     * @returns The result containing previous context, new context, and available capabilities
-     */
-    execute: async () => {
-      const sessionManager = getSessionManager();
-      const previousContext = sessionManager.getEnvironmentMode();
-      sessionManager.setContext(input.context, input.options);
-      const info = sessionManager.getContextInfo();
-
-      return {
-        previousContext,
-        newContext: input.context,
-        availableCapabilities: info.capabilities.available,
-      };
-    },
-
-    classifyError: classifyContextError,
-  });
-}
-
-export type GetContextResult = {
-  currentContext: 'e2e' | 'prod';
-  hasActiveSession: boolean;
-  sessionId: string | null;
-  capabilities: {
-    available: string[];
-  };
-  canSwitchContext: boolean;
-};
-
-/**
- * Handle getting the current workflow context and capabilities.
- *
- * @param input Empty input object for this operation
- * @param options Optional handler options for the operation
- * @returns Promise resolving to the current context, session state, and available capabilities
- */
-export async function handleGetContext(
-  input: Record<string, never>,
-  options?: HandlerOptions,
-): Promise<McpResponse<GetContextResult>> {
-  return runTool<Record<string, never>, GetContextResult>({
-    toolName: 'mm_get_context',
-    input,
-    options,
-    requiresSession: false,
-    observationPolicy: 'none',
-
-    /**
-     * Execute the get context operation.
-     *
-     * @returns The result containing current context, session state, and capabilities
-     */
-    execute: async () => {
-      const sessionManager = getSessionManager();
-      return sessionManager.getContextInfo();
-    },
-  });
-}
diff --git a/src/mcp-server/tools/definitions.test.ts b/src/mcp-server/tools/definitions.test.ts
deleted file mode 100644
index 2d1634f..0000000
--- a/src/mcp-server/tools/definitions.test.ts
+++ /dev/null
@@ -1,759 +0,0 @@
-/* eslint-disable vitest/require-to-throw-message */
-import { describe, it, expect, beforeAll } from 'vitest';
-
-import {
-  getToolDefinitions,
-  TOOL_PREFIX,
-  extractBaseName,
-  validateToolInput,
-  safeValidateToolInput,
-  getToolNames,
-  getPrefixedToolNames,
-  buildToolHandlersRecord,
-  getToolHandler,
-  hasToolHandler,
-} from './definitions.js';
-import type { ToolDefinition } from './definitions.js';
-
-describe('tool-definitions', () => {
-  describe('getToolDefinitions', () => {
-    it('creates tool definitions with mm_ prefix', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        expect(def.name.startsWith(`${TOOL_PREFIX}_`)).toBe(true);
-      }
-    });
-
-    it('creates 27 tool definitions', () => {
-      const definitions = getToolDefinitions();
-      expect(definitions).toHaveLength(27);
-    });
-
-    it('includes all expected tools', () => {
-      const definitions = getToolDefinitions();
-      const toolNames = definitions.map((d) => d.name);
-
-      const expectedTools = [
-        'mm_build',
-        'mm_launch',
-        'mm_cleanup',
-        'mm_get_state',
-        'mm_navigate',
-        'mm_wait_for_notification',
-        'mm_switch_to_tab',
-        'mm_close_tab',
-        'mm_list_testids',
-        'mm_accessibility_snapshot',
-        'mm_describe_screen',
-        'mm_screenshot',
-        'mm_click',
-        'mm_type',
-        'mm_wait_for',
-        'mm_knowledge_last',
-        'mm_knowledge_search',
-        'mm_knowledge_summarize',
-        'mm_knowledge_sessions',
-        'mm_seed_contract',
-        'mm_seed_contracts',
-        'mm_get_contract_address',
-        'mm_list_contracts',
-        'mm_run_steps',
-        'mm_set_context',
-        'mm_get_context',
-      ];
-
-      for (const expected of expectedTools) {
-        expect(toolNames).toContain(expected);
-      }
-    });
-
-    it('all tools have valid input schema', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        expect(def.inputSchema).toBeDefined();
-        const hasObjectType = def.inputSchema.type === 'object';
-        const hasAllOf = Array.isArray(def.inputSchema.allOf);
-        expect(hasObjectType || hasAllOf).toBe(true);
-      }
-    });
-
-    it('all tools have descriptions', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        expect(def.description).toBeDefined();
-        expect(typeof def.description).toBe('string');
-        expect(def.description.length).toBeGreaterThan(10);
-      }
-    });
-
-    describe('specific tool schemas', () => {
-      let definitions: ToolDefinition[];
-
-      beforeAll(() => {
-        definitions = getToolDefinitions();
-      });
-
-      /**
-       * Find a tool definition by its name.
-       *
-       * @param name The tool name to search for
-       * @returns The matching tool definition or undefined if not found
-       */
-      const findTool = (name: string): ToolDefinition | undefined =>
-        definitions.find((d) => d.name === name);
-
-      /**
-       * Schema object structure for testing.
-       */
-      type SchemaObj = {
-        /**
-         * Object properties mapping
-         */
-        properties?: Record<string, unknown>;
-        /**
-         * Required property names
-         */
-        required?: string[];
-        /**
-         * Array of schemas to combine
-         */
-        allOf?: SchemaObj[];
-      };
-
-      /**
-       * Get all properties from a schema, including those in allOf.
-       *
-       * @param schema The schema object to extract properties from
-       * @returns Combined properties from schema and allOf items
-       */
-      const getAllProperties = (schema: SchemaObj): Record<string, unknown> => {
-        if (schema.properties) {
-          return schema.properties;
-        }
-        if (schema.allOf) {
-          return schema.allOf.reduce(
-            (acc, item) => ({ ...acc, ...getAllProperties(item) }),
-            {},
-          );
-        }
-        return {};
-      };
-
-      /**
-       * Get all required properties from a schema, including those in allOf.
-       *
-       * @param schema The schema object to extract required properties from
-       * @returns Combined required property names from schema and allOf items
-       */
-      const getAllRequired = (schema: SchemaObj): string[] => {
-        const required: string[] = [];
-        if (schema.required) {
-          required.push(...schema.required);
-        }
-        if (schema.allOf) {
-          for (const item of schema.allOf) {
-            required.push(...getAllRequired(item));
-          }
-        }
-        return required;
-      };
-
-      it('mm_click has correct schema', () => {
-        const tool = findTool('mm_click');
-        expect(tool).toBeDefined();
-
-        const props = getAllProperties(tool?.inputSchema as SchemaObj);
-        expect(props.a11yRef).toBeDefined();
-        expect(props.testId).toBeDefined();
-        expect(props.selector).toBeDefined();
-        expect(props.timeoutMs).toBeDefined();
-      });
-
-      it('mm_type has required text property', () => {
-        const tool = findTool('mm_type');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('text');
-      });
-
-      it('mm_navigate has required screen property', () => {
-        const tool = findTool('mm_navigate');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('screen');
-
-        const props = getAllProperties(
-          tool?.inputSchema as SchemaObj,
-        ) as Record<
-          string,
-          {
-            /**
-             *
-             */
-            enum?: string[];
-          }
-        >;
-        expect(props.screen?.enum).toStrictEqual([
-          'home',
-          'settings',
-          'notification',
-          'url',
-        ]);
-      });
-
-      it('mm_screenshot has required name property', () => {
-        const tool = findTool('mm_screenshot');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('name');
-      });
-
-      it('mm_run_steps has required steps property', () => {
-        const tool = findTool('mm_run_steps');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('steps');
-
-        const props = getAllProperties(
-          tool?.inputSchema as SchemaObj,
-        ) as Record<
-          string,
-          {
-            /**
-             * The JSON schema type
-             */
-            type?: string;
-            /**
-             * Array item schema definition
-             */
-            items?: {
-              /**
-               * The item type
-               */
-              type: string;
-            };
-          }
-        >;
-        expect(props.steps?.type).toBe('array');
-      });
-
-      it('mm_seed_contract has required contractName property', () => {
-        const tool = findTool('mm_seed_contract');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('contractName');
-
-        const props = getAllProperties(
-          tool?.inputSchema as SchemaObj,
-        ) as Record<
-          string,
-          {
-            /**
-             *
-             */
-            enum?: string[];
-          }
-        >;
-        expect(props.contractName?.enum).toContain('hst');
-        expect(props.contractName?.enum).toContain('nfts');
-      });
-
-      it('mm_launch has stateMode enum', () => {
-        const tool = findTool('mm_launch');
-        expect(tool).toBeDefined();
-
-        const props = getAllProperties(
-          tool?.inputSchema as SchemaObj,
-        ) as Record<
-          string,
-          {
-            /**
-             *
-             */
-            enum?: string[];
-          }
-        >;
-        expect(props.stateMode?.enum).toStrictEqual([
-          'default',
-          'onboarding',
-          'custom',
-        ]);
-      });
-
-      it('mm_switch_to_tab has role enum', () => {
-        const tool = findTool('mm_switch_to_tab');
-        expect(tool).toBeDefined();
-
-        const props = getAllProperties(
-          tool?.inputSchema as SchemaObj,
-        ) as Record<
-          string,
-          {
-            /**
-             *
-             */
-            enum?: string[];
-          }
-        >;
-        expect(props.role?.enum).toStrictEqual([
-          'extension',
-          'notification',
-          'dapp',
-          'other',
-        ]);
-      });
-
-      it('mm_knowledge_search has required query property', () => {
-        const tool = findTool('mm_knowledge_search');
-        expect(tool).toBeDefined();
-
-        const required = getAllRequired(tool?.inputSchema as SchemaObj);
-        expect(required).toContain('query');
-      });
-    });
-
-    it('uses mm_ prefix in descriptions', () => {
-      const definitions = getToolDefinitions();
-
-      const a11yTool = definitions.find(
-        (d) => d.name === 'mm_accessibility_snapshot',
-      );
-      expect(a11yTool?.description).toContain('mm_click');
-      expect(a11yTool?.description).toContain('mm_type');
-    });
-
-    it('all schemas have additionalProperties set to false', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        if (schema.type === 'object') {
-          expect(schema.additionalProperties).toBe(false);
-        }
-      }
-    });
-
-    it('all schemas have properties defined', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        expect(
-          schema.properties ?? schema.allOf ?? schema.anyOf ?? schema.oneOf,
-        ).toBeDefined();
-      }
-    });
-
-    it('all required properties are defined in properties', () => {
-      const definitions = getToolDefinitions();
-
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        if (Array.isArray(schema.required) && schema.properties) {
-          const props = schema.properties as Record<string, unknown>;
-          for (const req of schema.required) {
-            expect(props[req as string]).toBeDefined();
-          }
-        }
-      }
-    });
-
-    it('processes anyOf arrays in nested properties', () => {
-      const definitions = getToolDefinitions();
-
-      // Find tools with anyOf in properties (e.g., knowledge tools with scope)
-      // This exercises the anyOf handling in removeDefaultsFromRequired (lines 397-400)
-      let foundAnyOf = false;
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        if (schema.properties && typeof schema.properties === 'object') {
-          const props = schema.properties as Record<string, unknown>;
-          for (const [, prop] of Object.entries(props)) {
-            if (prop && typeof prop === 'object') {
-              const propObj = prop as Record<string, unknown>;
-              if ('anyOf' in propObj) {
-                foundAnyOf = true;
-                expect(Array.isArray(propObj.anyOf)).toBe(true);
-                // Verify anyOf items are properly processed
-                const anyOfArray = propObj.anyOf as unknown[];
-                for (const item of anyOfArray) {
-                  expect(item).toBeDefined();
-                }
-              }
-            }
-          }
-        }
-      }
-      // Verify we found at least one tool with anyOf (knowledge tools)
-      expect(foundAnyOf).toBe(true);
-    });
-
-    it('processes nested object properties recursively', () => {
-      const definitions = getToolDefinitions();
-
-      // Verify that nested object properties are processed correctly
-      // This exercises the recursive property handling in removeDefaultsFromRequired (lines 418-421)
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        if (schema.properties && typeof schema.properties === 'object') {
-          const props = schema.properties as Record<string, unknown>;
-          for (const [, value] of Object.entries(props)) {
-            if (value && typeof value === 'object') {
-              const propObj = value as Record<string, unknown>;
-              // Nested objects should have proper structure
-              expect(propObj).toBeDefined();
-              // If it has properties, they should be objects
-              if ('properties' in propObj && propObj.properties) {
-                expect(typeof propObj.properties).toBe('object');
-              }
-            }
-          }
-        }
-      }
-    });
-
-    it('sets additionalProperties false on top-level object schemas', () => {
-      const definitions = getToolDefinitions();
-
-      // Verify that additionalProperties is set to false on top-level schemas
-      // This exercises the additionalProperties assignment in zodSchemaToJsonSchema (line 503)
-      for (const def of definitions) {
-        const schema = def.inputSchema;
-        // All tool schemas should be objects with additionalProperties: false
-        if (schema.type === 'object') {
-          expect(schema.additionalProperties).toBe(false);
-        }
-      }
-    });
-  });
-
-  describe('extractBaseName', () => {
-    it('removes mm_ prefix from tool name', () => {
-      const result = extractBaseName('mm_click');
-
-      expect(result).toBe('click');
-    });
-
-    it('returns original name when no prefix', () => {
-      const result = extractBaseName('click');
-
-      expect(result).toBe('click');
-    });
-
-    it('handles multiple underscores correctly', () => {
-      const result = extractBaseName('mm_wait_for_notification');
-
-      expect(result).toBe('wait_for_notification');
-    });
-
-    it('handles empty string', () => {
-      const result = extractBaseName('');
-
-      expect(result).toBe('');
-    });
-
-    it('handles string with only prefix', () => {
-      const result = extractBaseName('mm_');
-
-      expect(result).toBe('');
-    });
-
-    it('handles all tool names from getToolNames', () => {
-      const baseNames = getToolNames();
-
-      for (const baseName of baseNames) {
-        const prefixed = `${TOOL_PREFIX}_${baseName}`;
-        const extracted = extractBaseName(prefixed);
-        expect(extracted).toBe(baseName);
-      }
-    });
-  });
-
-  describe('validateToolInput', () => {
-    it('parses valid input for known tool', () => {
-      const result = validateToolInput('mm_click', { testId: 'button' });
-
-      expect(result).toBeDefined();
-      expect(result).toHaveProperty('testId', 'button');
-    });
-
-    it('throws error for unknown tool', () => {
-      expect(() => {
-        validateToolInput('mm_unknown_tool', {});
-      }).toThrowError('Unknown tool: mm_unknown_tool');
-    });
-
-    it('throws error for invalid input schema', () => {
-      expect(() => {
-        validateToolInput('mm_type', { text: 123 });
-      }).toThrowError();
-    });
-
-    it('accepts input without prefix', () => {
-      const result = validateToolInput('click', { testId: 'button' });
-
-      expect(result).toBeDefined();
-      expect(result).toHaveProperty('testId', 'button');
-    });
-
-    it('parses input with multiple valid properties', () => {
-      const result = validateToolInput('mm_click', {
-        testId: 'button',
-        timeoutMs: 5000,
-      });
-
-      expect(result).toBeDefined();
-      expect(result).toHaveProperty('testId', 'button');
-      expect(result).toHaveProperty('timeoutMs', 5000);
-    });
-  });
-
-  describe('safeValidateToolInput', () => {
-    it('returns success with data for valid input', () => {
-      const result = safeValidateToolInput('mm_click', { testId: 'button' });
-
-      expect(result.success).toBe(true);
-      expect(result).toHaveProperty('data');
-      if (result.success) {
-        expect(result.data).toHaveProperty('testId', 'button');
-      }
-    });
-
-    it('returns failure for unknown tool', () => {
-      const result = safeValidateToolInput('mm_unknown_tool', {});
-
-      expect(result.success).toBe(false);
-      expect(result).toHaveProperty('error');
-      if (!result.success) {
-        expect(result.error).toContain('Unknown tool');
-      }
-    });
-
-    it('returns failure for invalid input', () => {
-      const result = safeValidateToolInput('mm_type', { text: 123 });
-
-      expect(result.success).toBe(false);
-      expect(result).toHaveProperty('error');
-    });
-
-    it('accepts input without prefix', () => {
-      const result = safeValidateToolInput('click', { testId: 'button' });
-
-      expect(result.success).toBe(true);
-      if (result.success) {
-        expect(result.data).toHaveProperty('testId', 'button');
-      }
-    });
-
-    it('returns success with multiple valid properties', () => {
-      const result = safeValidateToolInput('mm_click', {
-        testId: 'button',
-        timeoutMs: 5000,
-      });
-
-      expect(result.success).toBe(true);
-      if (result.success) {
-        expect(result.data).toHaveProperty('testId', 'button');
-        expect(result.data).toHaveProperty('timeoutMs', 5000);
-      }
-    });
-
-    it('includes error message with path for validation errors', () => {
-      const result = safeValidateToolInput('mm_type', { text: 123 });
-
-      expect(result.success).toBe(false);
-      if (!result.success) {
-        expect(result.error).toMatch(/text/u);
-      }
-    });
-  });
-
-  describe('getToolNames', () => {
-    it('returns array of tool base names', () => {
-      const names = getToolNames();
-
-      expect(Array.isArray(names)).toBe(true);
-      expect(names.length).toBeGreaterThan(0);
-    });
-
-    it('includes expected tool names without prefix', () => {
-      const names = getToolNames();
-
-      expect(names).toContain('click');
-      expect(names).toContain('type');
-      expect(names).toContain('launch');
-      expect(names).toContain('cleanup');
-    });
-
-    it('does not include mm_ prefix in names', () => {
-      const names = getToolNames();
-
-      for (const name of names) {
-        expect(name).not.toMatch(/^mm_/u);
-      }
-    });
-
-    it('returns 27 tool names', () => {
-      const names = getToolNames();
-
-      expect(names).toHaveLength(27);
-    });
-
-    it('all names are strings', () => {
-      const names = getToolNames();
-
-      for (const name of names) {
-        expect(typeof name).toBe('string');
-        expect(name.length).toBeGreaterThan(0);
-      }
-    });
-  });
-
-  describe('getPrefixedToolNames', () => {
-    it('returns array of prefixed tool names', () => {
-      const names = getPrefixedToolNames();
-
-      expect(Array.isArray(names)).toBe(true);
-      expect(names.length).toBeGreaterThan(0);
-    });
-
-    it('includes mm_ prefix in all names', () => {
-      const names = getPrefixedToolNames();
-
-      for (const name of names) {
-        expect(name).toMatch(/^mm_/u);
-      }
-    });
-
-    it('includes expected prefixed tool names', () => {
-      const names = getPrefixedToolNames();
-
-      expect(names).toContain('mm_click');
-      expect(names).toContain('mm_type');
-      expect(names).toContain('mm_launch');
-      expect(names).toContain('mm_cleanup');
-    });
-
-    it('has same count as getToolNames', () => {
-      const baseNames = getToolNames();
-      const prefixedNames = getPrefixedToolNames();
-
-      expect(prefixedNames).toHaveLength(baseNames.length);
-    });
-  });
-
-  describe('buildToolHandlersRecord', () => {
-    it('returns record mapping prefixed names to handlers', () => {
-      const handlers = buildToolHandlersRecord();
-
-      expect(typeof handlers).toBe('object');
-      expect(handlers).not.toBeNull();
-    });
-
-    it('includes all prefixed tool names as keys', () => {
-      const handlers = buildToolHandlersRecord();
-      const prefixedNames = getPrefixedToolNames();
-
-      for (const name of prefixedNames) {
-        expect(handlers).toHaveProperty(name);
-      }
-    });
-
-    it('all values are functions', () => {
-      const handlers = buildToolHandlersRecord();
-
-      for (const [, handler] of Object.entries(handlers)) {
-        expect(typeof handler).toBe('function');
-      }
-    });
-
-    it('has same count as getPrefixedToolNames', () => {
-      const handlers = buildToolHandlersRecord();
-      const prefixedNames = getPrefixedToolNames();
-
-      expect(Object.keys(handlers)).toHaveLength(prefixedNames.length);
-    });
-
-    it('does not include base names without prefix', () => {
-      const handlers = buildToolHandlersRecord();
-      const baseNames = getToolNames();
-
-      for (const baseName of baseNames) {
-        expect(handlers).not.toHaveProperty(baseName);
-      }
-    });
-  });
-
-  describe('getToolHandler', () => {
-    it('returns handler for prefixed tool name', () => {
-      const handler = getToolHandler('mm_click');
-
-      expect(handler).toBeDefined();
-      expect(typeof handler).toBe('function');
-    });
-
-    it('returns handler for base tool name', () => {
-      const handler = getToolHandler('click');
-
-      expect(handler).toBeDefined();
-      expect(typeof handler).toBe('function');
-    });
-
-    it('returns undefined for unknown tool', () => {
-      const handler = getToolHandler('mm_unknown_tool');
-
-      expect(handler).toBeUndefined();
-    });
-
-    it('returns same handler for prefixed and base names', () => {
-      const prefixedHandler = getToolHandler('mm_click');
-      const baseHandler = getToolHandler('click');
-
-      expect(prefixedHandler).toBe(baseHandler);
-    });
-  });
-
-  describe('hasToolHandler', () => {
-    it('returns true for existing prefixed tool', () => {
-      const exists = hasToolHandler('mm_click');
-
-      expect(exists).toBe(true);
-    });
-
-    it('returns true for existing base tool name', () => {
-      const exists = hasToolHandler('click');
-
-      expect(exists).toBe(true);
-    });
-
-    it('returns false for unknown tool', () => {
-      const exists = hasToolHandler('mm_unknown_tool');
-
-      expect(exists).toBe(false);
-    });
-
-    it('returns true for all prefixed tool names', () => {
-      const prefixedNames = getPrefixedToolNames();
-
-      for (const name of prefixedNames) {
-        expect(hasToolHandler(name)).toBe(true);
-      }
-    });
-
-    it('returns true for all base tool names', () => {
-      const baseNames = getToolNames();
-
-      for (const name of baseNames) {
-        expect(hasToolHandler(name)).toBe(true);
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/definitions.ts b/src/mcp-server/tools/definitions.ts
deleted file mode 100644
index f6e7fdc..0000000
--- a/src/mcp-server/tools/definitions.ts
+++ /dev/null
@@ -1,638 +0,0 @@
-import type { ZodType } from 'zod';
-
-import {
-  buildInputSchema,
-  launchInputSchema,
-  cleanupInputSchema,
-  getStateInputSchema,
-  navigateInputSchema,
-  waitForNotificationInputSchema,
-  switchToTabInputSchema,
-  closeTabInputSchema,
-  listTestIdsInputSchema,
-  accessibilitySnapshotInputSchema,
-  describeScreenInputSchema,
-  screenshotInputSchema,
-  clickInputSchema,
-  typeInputSchema,
-  waitForInputSchema,
-  knowledgeLastInputSchema,
-  knowledgeSearchInputSchema,
-  knowledgeSummarizeInputSchema,
-  knowledgeSessionsInputSchema,
-  seedContractInputSchema,
-  seedContractsInputSchema,
-  getContractAddressInputSchema,
-  listDeployedContractsInputSchema,
-  runStepsInputSchema,
-  setContextInputSchema,
-  getContextInputSchema,
-  clipboardInputSchema,
-} from '../schemas.js';
-import { getSessionManager } from '../session-manager.js';
-import { handleRunSteps } from './batch.js';
-import type { ToolHandler } from './batch.js';
-import type { BuildToolOptions } from './build.js';
-import { handleBuild } from './build.js';
-import { handleCleanup } from './cleanup.js';
-import { handleClipboard } from './clipboard.js';
-import { handleSetContext, handleGetContext } from './context.js';
-import {
-  handleListTestIds,
-  handleAccessibilitySnapshot,
-  handleDescribeScreen,
-} from './discovery-tools.js';
-import { handleClick, handleType, handleWaitFor } from './interaction.js';
-import {
-  handleKnowledgeLast,
-  handleKnowledgeSearch,
-  handleKnowledgeSummarize,
-  handleKnowledgeSessions,
-} from './knowledge.js';
-import { handleLaunch } from './launch.js';
-import {
-  handleNavigate,
-  handleWaitForNotification,
-  handleSwitchToTab,
-  handleCloseTab,
-} from './navigation.js';
-import { handleScreenshot } from './screenshot.js';
-import {
-  handleSeedContract,
-  handleSeedContracts,
-  handleGetContractAddress,
-  handleListDeployedContracts,
-} from './seeding.js';
-import type { SeedingToolOptions } from './seeding.js';
-import { handleGetState } from './state.js';
-import type { StateToolOptions } from './state.js';
-import type {
-  SeedContractInput,
-  SeedContractsInput,
-  GetContractAddressInput,
-  ListDeployedContractsInput,
-} from '../types';
-
-export const TOOL_PREFIX = 'mm';
-
-export type ToolDefinition = {
-  name: string;
-  description: string;
-  inputSchema: Record<string, unknown>;
-};
-
-type ZodSchema = ZodType<unknown> & { toJSONSchema(): Record<string, unknown> };
-
-type ToolEntry = {
-  schema: ZodSchema;
-  description: string;
-  handler: ToolHandler;
-};
-
-/**
- * Create a handler for the build tool that injects build capability.
- *
- * @returns A tool handler function for building the extension
- */
-function createBuildHandler(): ToolHandler {
-  return async (input, options) => {
-    const sessionManager = getSessionManager();
-    const buildOptions: BuildToolOptions = {
-      ...options,
-      buildCapability: sessionManager.getBuildCapability?.(),
-    };
-    return handleBuild(input, buildOptions);
-  };
-}
-
-/**
- * Create a handler for the state tool that injects state snapshot capability.
- *
- * @returns A tool handler function for getting extension state
- */
-function createStateHandler(): ToolHandler {
-  return async (_, options) => {
-    const sessionManager = getSessionManager();
-    const stateOptions: StateToolOptions = {
-      ...options,
-      stateSnapshotCapability: sessionManager.getStateSnapshotCapability?.(),
-    };
-    return handleGetState(stateOptions);
-  };
-}
-
-/**
- * Create a handler for the seed contract tool that injects seeding capability.
- *
- * @returns A tool handler function for deploying a single contract
- */
-function createSeedContractHandler(): ToolHandler {
-  return async (input, options) => {
-    const sessionManager = getSessionManager();
-    const seedingOptions: SeedingToolOptions = {
-      ...options,
-      seedingCapability: sessionManager.getContractSeedingCapability?.(),
-    };
-    return handleSeedContract(input as SeedContractInput, seedingOptions);
-  };
-}
-
-/**
- * Create a handler for the seed contracts tool that injects seeding capability.
- *
- * @returns A tool handler function for deploying multiple contracts
- */
-function createSeedContractsHandler(): ToolHandler {
-  return async (input, options) => {
-    const sessionManager = getSessionManager();
-    const seedingOptions: SeedingToolOptions = {
-      ...options,
-      seedingCapability: sessionManager.getContractSeedingCapability?.(),
-    };
-    return handleSeedContracts(input as SeedContractsInput, seedingOptions);
-  };
-}
-
-/**
- * Create a handler for the get contract address tool that injects seeding capability.
- *
- * @returns A tool handler function for retrieving a deployed contract address
- */
-function createGetContractAddressHandler(): ToolHandler {
-  return async (input, options) => {
-    const sessionManager = getSessionManager();
-    const seedingOptions: SeedingToolOptions = {
-      ...options,
-      seedingCapability: sessionManager.getContractSeedingCapability?.(),
-    };
-    return handleGetContractAddress(
-      input as GetContractAddressInput,
-      seedingOptions,
-    );
-  };
-}
-
-/**
- * Create a handler for the list contracts tool that injects seeding capability.
- *
- * @returns A tool handler function for listing deployed contracts
- */
-function createListDeployedContractsHandler(): ToolHandler {
-  return async (input, options) => {
-    const sessionManager = getSessionManager();
-    const seedingOptions: SeedingToolOptions = {
-      ...options,
-      seedingCapability: sessionManager.getContractSeedingCapability?.(),
-    };
-    return handleListDeployedContracts(
-      input as ListDeployedContractsInput,
-      seedingOptions,
-    );
-  };
-}
-
-const tools: Record<string, ToolEntry> = {
-  build: {
-    schema: buildInputSchema,
-    description: `Build the extension using yarn build:test. Call before ${TOOL_PREFIX}_launch if extension is not built.`,
-    handler: createBuildHandler(),
-  },
-  launch: {
-    schema: launchInputSchema,
-    description:
-      'Launch extension in a headed Chrome browser with Playwright. Returns session info and initial state.',
-    handler: handleLaunch as ToolHandler,
-  },
-  cleanup: {
-    schema: cleanupInputSchema,
-    description:
-      'Stop the browser, Anvil, and all services. Always call when done.',
-    handler: handleCleanup as ToolHandler,
-  },
-  get_state: {
-    schema: getStateInputSchema,
-    description:
-      'Get current extension state including screen, URL, balance, network, and account address.',
-    handler: createStateHandler(),
-  },
-  navigate: {
-    schema: navigateInputSchema,
-    description: 'Navigate to a specific screen in the extension.',
-    handler: handleNavigate as ToolHandler,
-  },
-  wait_for_notification: {
-    schema: waitForNotificationInputSchema,
-    description:
-      'Wait for notification popup to appear (e.g., after dapp interaction). Sets the notification page as the active page for subsequent interactions.',
-    handler: handleWaitForNotification as ToolHandler,
-  },
-  switch_to_tab: {
-    schema: switchToTabInputSchema,
-    description: `Switch the active page to a different tracked tab. Use this to direct ${TOOL_PREFIX}_click, ${TOOL_PREFIX}_type, and other interaction tools to a specific page.`,
-    handler: handleSwitchToTab as ToolHandler,
-  },
-  close_tab: {
-    schema: closeTabInputSchema,
-    description:
-      'Close a specific tab by role or URL. Cannot close the extension home page. If closing the active tab, automatically switches to extension home.',
-    handler: handleCloseTab as ToolHandler,
-  },
-  list_testids: {
-    schema: listTestIdsInputSchema,
-    description:
-      'List all visible data-testid attributes on the current page. Use to discover available interaction targets.',
-    handler: handleListTestIds as ToolHandler,
-  },
-  accessibility_snapshot: {
-    schema: accessibilitySnapshotInputSchema,
-    description: `Get trimmed accessibility tree with deterministic refs (e1, e2, ...). Use refs with ${TOOL_PREFIX}_click/${TOOL_PREFIX}_type.`,
-    handler: handleAccessibilitySnapshot as ToolHandler,
-  },
-  describe_screen: {
-    schema: describeScreenInputSchema,
-    description:
-      'Get comprehensive screen state: extension state + testIds + accessibility snapshot. Optional screenshot.',
-    handler: handleDescribeScreen as ToolHandler,
-  },
-  screenshot: {
-    schema: screenshotInputSchema,
-    description: 'Take a screenshot and save to test-artifacts/screenshots/',
-    handler: handleScreenshot as ToolHandler,
-  },
-  click: {
-    schema: clickInputSchema,
-    description:
-      'Click an element. Specify exactly one of: a11yRef, testId, or selector.',
-    handler: handleClick as ToolHandler,
-  },
-  type: {
-    schema: typeInputSchema,
-    description:
-      'Type text into an element. Specify exactly one of: a11yRef, testId, or selector.',
-    handler: handleType as ToolHandler,
-  },
-  wait_for: {
-    schema: waitForInputSchema,
-    description:
-      'Wait for an element to become visible. Specify exactly one of: a11yRef, testId, or selector.',
-    handler: handleWaitFor as ToolHandler,
-  },
-  knowledge_last: {
-    schema: knowledgeLastInputSchema,
-    description:
-      'Get the last N step records from the knowledge store for the current session.',
-    handler: handleKnowledgeLast as ToolHandler,
-  },
-  knowledge_search: {
-    schema: knowledgeSearchInputSchema,
-    description:
-      'Search step records by tool name, screen, testId, or accessibility names. Default searches all sessions.',
-    handler: handleKnowledgeSearch as ToolHandler,
-  },
-  knowledge_summarize: {
-    schema: knowledgeSummarizeInputSchema,
-    description: 'Generate a recipe-like summary of steps taken in a session.',
-    handler: handleKnowledgeSummarize as ToolHandler,
-  },
-  knowledge_sessions: {
-    schema: knowledgeSessionsInputSchema,
-    description:
-      'List recent sessions with metadata for cross-session knowledge retrieval.',
-    handler: handleKnowledgeSessions as ToolHandler,
-  },
-  seed_contract: {
-    schema: seedContractInputSchema,
-    description:
-      'Deploy a smart contract to the local Anvil node. Available: hst (ERC20 TST token), nfts (ERC721), erc1155, piggybank, failing (reverts), multisig, entrypoint (ERC-4337), simpleAccountFactory, verifyingPaymaster.',
-    handler: createSeedContractHandler(),
-  },
-  seed_contracts: {
-    schema: seedContractsInputSchema,
-    description: 'Deploy multiple smart contracts in sequence.',
-    handler: createSeedContractsHandler(),
-  },
-  get_contract_address: {
-    schema: getContractAddressInputSchema,
-    description: 'Get the deployed address of a smart contract.',
-    handler: createGetContractAddressHandler(),
-  },
-  list_contracts: {
-    schema: listDeployedContractsInputSchema,
-    description: 'List all smart contracts deployed in this session.',
-    handler: createListDeployedContractsHandler(),
-  },
-  run_steps: {
-    schema: runStepsInputSchema,
-    description:
-      'Execute multiple tools in sequence. Reduces round trips for multi-step flows.',
-    handler: handleRunSteps as ToolHandler,
-  },
-  set_context: {
-    schema: setContextInputSchema,
-    description:
-      'Switch workflow context (e2e or prod). Cannot switch during active session.',
-    handler: handleSetContext as ToolHandler,
-  },
-  get_context: {
-    schema: getContextInputSchema,
-    description:
-      'Get current context, available capabilities, and whether context can be switched.',
-    handler: handleGetContext as ToolHandler,
-  },
-  clipboard: {
-    schema: clipboardInputSchema,
-    description:
-      "Write text to or read text from the browser clipboard. Use action='write' with text parameter to write, or action='read' to read current clipboard content. Useful for pasting SRP or other data into components that have paste handlers.",
-    handler: handleClipboard as ToolHandler,
-  },
-};
-
-/**
- * Zod v4's toJSONSchema() marks properties with defaults as required.
- * This is incorrect for MCP tool input schemas where LLM clients shouldn't
- * be required to provide values that have defaults. This function recursively
- * removes those properties from the required array.
- *
- * @param schema The JSON schema to process
- * @returns The modified schema with defaults removed from required array
- */
-function removeDefaultsFromRequired(
-  schema: Record<string, unknown>,
-): Record<string, unknown> {
-  const result = { ...schema };
-
-  if (Array.isArray(result.allOf)) {
-    result.allOf = result.allOf.map((item: Record<string, unknown>) =>
-      removeDefaultsFromRequired(item),
-    );
-  }
-
-  if (Array.isArray(result.anyOf)) {
-    result.anyOf = result.anyOf.map((item: Record<string, unknown>) =>
-      removeDefaultsFromRequired(item),
-    );
-  }
-
-  if (Array.isArray(result.oneOf)) {
-    result.oneOf = result.oneOf.map((item: Record<string, unknown>) =>
-      removeDefaultsFromRequired(item),
-    );
-  }
-
-  if (
-    result.properties &&
-    typeof result.properties === 'object' &&
-    result.properties !== null
-  ) {
-    const newProperties: Record<string, unknown> = {};
-    for (const [key, value] of Object.entries(
-      result.properties as Record<string, unknown>,
-    )) {
-      if (value && typeof value === 'object') {
-        newProperties[key] = removeDefaultsFromRequired(
-          value as Record<string, unknown>,
-        );
-      } else {
-        newProperties[key] = value;
-      }
-    }
-    result.properties = newProperties;
-  }
-
-  if (
-    Array.isArray(result.required) &&
-    result.properties &&
-    typeof result.properties === 'object'
-  ) {
-    const properties = result.properties as Record<
-      string,
-      Record<string, unknown>
-    >;
-    result.required = result.required.filter((propName: string) => {
-      const prop = properties[propName];
-      return prop && !('default' in prop);
-    });
-
-    if ((result.required as string[]).length === 0) {
-      delete result.required;
-    }
-  }
-
-  return result;
-}
-
-/**
- * MCP protocol doesn't support allOf/oneOf/anyOf at the top level of input schemas.
- * This flattens allOf into a single merged object schema.
- *
- * @param schema The JSON schema to flatten
- * @returns The flattened schema with allOf merged into properties
- */
-function flattenTopLevelAllOf(
-  schema: Record<string, unknown>,
-): Record<string, unknown> {
-  if (!Array.isArray(schema.allOf)) {
-    return schema;
-  }
-
-  const mergedProperties: Record<string, unknown> = {};
-  const mergedRequired: string[] = [];
-
-  for (const subSchema of schema.allOf as Record<string, unknown>[]) {
-    if (subSchema.properties && typeof subSchema.properties === 'object') {
-      Object.assign(mergedProperties, subSchema.properties);
-    }
-    if (Array.isArray(subSchema.required)) {
-      mergedRequired.push(...subSchema.required);
-    }
-  }
-
-  const result: Record<string, unknown> = {
-    type: 'object',
-    properties: mergedProperties,
-    additionalProperties: false,
-  };
-
-  if (mergedRequired.length > 0) {
-    result.required = [...new Set(mergedRequired)];
-  }
-
-  return result;
-}
-
-/**
- * Convert a Zod schema to a JSON schema suitable for MCP tool definitions.
- *
- * @param schema The Zod schema to convert
- * @returns The converted JSON schema with defaults removed and allOf flattened
- */
-function zodSchemaToJsonSchema(schema: ZodSchema): Record<string, unknown> {
-  const jsonSchema = schema.toJSONSchema();
-  const { $schema: _, ...rest } = jsonSchema;
-
-  const flattened = flattenTopLevelAllOf(rest);
-
-  if (flattened.type === 'object' && !('additionalProperties' in flattened)) {
-    flattened.additionalProperties = false;
-  }
-
-  return removeDefaultsFromRequired(flattened);
-}
-
-/**
- * Get all tool definitions with their schemas and descriptions.
- *
- * @returns Array of tool definitions for all available MCP tools
- */
-export function getToolDefinitions(): ToolDefinition[] {
-  return Object.entries(tools).map(([baseName, tool]) => ({
-    name: `${TOOL_PREFIX}_${baseName}`,
-    description: tool.description,
-    inputSchema: zodSchemaToJsonSchema(tool.schema),
-  }));
-}
-
-/**
- * Get the handler function for a specific tool by name.
- *
- * @param name The tool name (with or without mm_ prefix)
- * @returns The tool handler function or undefined if tool not found
- */
-export function getToolHandler(name: string): ToolHandler | undefined {
-  const prefixedMatch = Object.entries(tools).find(
-    ([baseName]) => `${TOOL_PREFIX}_${baseName}` === name,
-  );
-  if (prefixedMatch) {
-    return prefixedMatch[1].handler;
-  }
-
-  const tool = tools[name];
-  return tool?.handler;
-}
-
-/**
- * Check if a tool handler exists for the given tool name.
- *
- * @param name The tool name to check
- * @returns True if a handler exists for the tool, false otherwise
- */
-export function hasToolHandler(name: string): boolean {
-  return getToolHandler(name) !== undefined;
-}
-
-/**
- * Extract the base name from a tool name by removing the mm_ prefix.
- *
- * @param toolName The tool name (with or without mm_ prefix)
- * @returns The base name without the prefix
- */
-export function extractBaseName(toolName: string): string {
-  const prefixWithUnderscore = `${TOOL_PREFIX}_`;
-  if (toolName.startsWith(prefixWithUnderscore)) {
-    return toolName.slice(prefixWithUnderscore.length);
-  }
-  return toolName;
-}
-
-/**
- * Validate tool input against the tool's schema and return parsed data.
- *
- * @param toolName The tool name to validate input for
- * @param input The input data to validate
- * @returns The validated and parsed input data
- */
-export function validateToolInput<Type = unknown>(
-  toolName: string,
-  input: unknown,
-): Type {
-  const baseName = extractBaseName(toolName);
-  const tool = tools[baseName];
-
-  if (!tool) {
-    throw new Error(`Unknown tool: ${toolName}`);
-  }
-
-  return tool.schema.parse(input ?? {}) as Type;
-}
-
-/**
- * Safely validate tool input without throwing errors.
- *
- * @param toolName The tool name to validate input for
- * @param input The input data to validate
- * @returns Object with success flag and either parsed data or error message
- */
-export function safeValidateToolInput(
-  toolName: string,
-  input: unknown,
-):
-  | {
-      /**
-       * Indicates validation succeeded
-       */
-      success: true;
-      /**
-       * The validated and parsed input data
-       */
-      data: unknown;
-    }
-  | {
-      /**
-       * Indicates validation failed
-       */
-      success: false;
-      /**
-       * Error message describing validation failure
-       */
-      error: string;
-    } {
-  const baseName = extractBaseName(toolName);
-  const tool = tools[baseName];
-
-  if (!tool) {
-    return { success: false, error: `Unknown tool: ${toolName}` };
-  }
-
-  const result = tool.schema.safeParse(input ?? {});
-  if (!result.success) {
-    const errorMessage = result.error.issues
-      .map((issue) => `${issue.path.join('.')}: ${issue.message}`)
-      .join('; ');
-    return { success: false, error: errorMessage };
-  }
-
-  return { success: true, data: result.data };
-}
-
-/**
- * Get all available tool base names (without mm_ prefix).
- *
- * @returns Array of tool base names
- */
-export function getToolNames(): string[] {
-  return Object.keys(tools);
-}
-
-/**
- * Get all available tool names with mm_ prefix.
- *
- * @returns Array of prefixed tool names
- */
-export function getPrefixedToolNames(): string[] {
-  return Object.keys(tools).map((name) => `${TOOL_PREFIX}_${name}`);
-}
-
-/**
- * Build a record mapping prefixed tool names to their handler functions.
- *
- * @returns Record of tool name to handler function mappings
- */
-export function buildToolHandlersRecord(): Record<string, ToolHandler> {
-  const handlers: Record<string, ToolHandler> = {};
-  for (const [baseName, tool] of Object.entries(tools)) {
-    handlers[`${TOOL_PREFIX}_${baseName}`] = tool.handler;
-  }
-  return handlers;
-}
-
-export type { ToolEntry };
diff --git a/src/mcp-server/tools/discovery-tools.ts b/src/mcp-server/tools/discovery-tools.ts
deleted file mode 100644
index be5dae6..0000000
--- a/src/mcp-server/tools/discovery-tools.ts
+++ /dev/null
@@ -1,223 +0,0 @@
-import {
-  DEFAULT_TESTID_LIMIT,
-  OBSERVATION_TESTID_LIMIT,
-} from '../constants.js';
-import { collectTestIds, collectTrimmedA11ySnapshot } from '../discovery.js';
-import {
-  knowledgeStore,
-  createDefaultObservation,
-} from '../knowledge-store.js';
-import { getSessionManager } from '../session-manager.js';
-import { classifyDiscoveryError } from './error-classification.js';
-import { runTool } from './run-tool.js';
-import type {
-  ListTestIdsInput,
-  ListTestIdsResult,
-  AccessibilitySnapshotInput,
-  AccessibilitySnapshotResult,
-  DescribeScreenInput,
-  DescribeScreenResult,
-  McpResponse,
-  PriorKnowledgeContext,
-  HandlerOptions,
-} from '../types';
-
-/**
- * Handle listing all visible data-testid attributes on the current page.
- *
- * @param input The input containing optional limit for number of items
- * @param options Optional handler options for the operation
- * @returns Promise resolving to list of visible test IDs with metadata
- */
-export async function handleListTestIds(
-  input: ListTestIdsInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<ListTestIdsResult>> {
-  const limit = input.limit ?? DEFAULT_TESTID_LIMIT;
-
-  return runTool<ListTestIdsInput, ListTestIdsResult>({
-    toolName: 'mm_list_testids',
-    input,
-    options,
-    observationPolicy: 'custom',
-
-    /**
-     * Execute the list test IDs operation.
-     *
-     * @param context The workflow context containing the page
-     * @returns The result with test ID items and observation data
-     */
-    execute: async (context) => {
-      const items = await collectTestIds(context.page, limit);
-      const state = await getSessionManager().getExtensionState();
-      const { nodes, refMap } = await collectTrimmedA11ySnapshot(context.page);
-
-      getSessionManager().setRefMap(refMap);
-
-      return {
-        result: { items },
-        observation: createDefaultObservation(state, items, nodes),
-      };
-    },
-
-    classifyError: classifyDiscoveryError,
-
-    /**
-     * Sanitizes input for recording by extracting only the limit parameter.
-     *
-     * @returns Sanitized input with limit value
-     */
-    sanitizeInputForRecording: () => ({ limit }),
-  });
-}
-
-/**
- * Handle getting a trimmed accessibility tree with deterministic refs.
- *
- * @param input The input containing optional root selector for scoping
- * @param options Optional handler options for the operation
- * @returns Promise resolving to accessibility nodes with deterministic refs
- */
-export async function handleAccessibilitySnapshot(
-  input: AccessibilitySnapshotInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<AccessibilitySnapshotResult>> {
-  return runTool<AccessibilitySnapshotInput, AccessibilitySnapshotResult>({
-    toolName: 'mm_accessibility_snapshot',
-    input,
-    options,
-    observationPolicy: 'custom',
-
-    /**
-     * Execute the accessibility snapshot operation.
-     *
-     * @param context The workflow context containing the page
-     * @returns The result with accessibility nodes and observation data
-     */
-    execute: async (context) => {
-      const { nodes, refMap } = await collectTrimmedA11ySnapshot(
-        context.page,
-        input.rootSelector,
-      );
-
-      getSessionManager().setRefMap(refMap);
-
-      const state = await getSessionManager().getExtensionState();
-      const testIds = await collectTestIds(
-        context.page,
-        OBSERVATION_TESTID_LIMIT,
-      );
-
-      return {
-        result: { nodes },
-        observation: createDefaultObservation(state, testIds, nodes),
-      };
-    },
-
-    classifyError: classifyDiscoveryError,
-
-    /**
-     * Sanitizes input for recording by extracting only the root selector.
-     *
-     * @returns Sanitized input with rootSelector value
-     */
-    sanitizeInputForRecording: () => ({ rootSelector: input.rootSelector }),
-  });
-}
-
-/**
- * Handle getting comprehensive screen state with state, testIds, a11y, and optional screenshot.
- *
- * @param input The input containing screenshot options and selector
- * @param options Optional handler options for the operation
- * @returns Promise resolving to comprehensive screen description with prior knowledge
- */
-export async function handleDescribeScreen(
-  input: DescribeScreenInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<DescribeScreenResult>> {
-  return runTool<DescribeScreenInput, DescribeScreenResult>({
-    toolName: 'mm_describe_screen',
-    input,
-    options,
-    observationPolicy: 'custom',
-
-    /**
-     * Execute the describe screen operation.
-     *
-     * @param context The workflow context containing the page
-     * @returns The result with state, testIds, a11y, screenshot, and prior knowledge
-     */
-    execute: async (context) => {
-      const sessionManager = getSessionManager();
-      const { page } = context;
-
-      const state = await sessionManager.getExtensionState();
-      const testIds = await collectTestIds(page, DEFAULT_TESTID_LIMIT);
-      const { nodes, refMap } = await collectTrimmedA11ySnapshot(page);
-
-      sessionManager.setRefMap(refMap);
-
-      let screenshot: DescribeScreenResult['screenshot'] = null;
-
-      if (input.includeScreenshot) {
-        const screenshotName = input.screenshotName ?? 'describe-screen';
-        const result = await sessionManager.screenshot({
-          name: screenshotName,
-          fullPage: true,
-        });
-
-        screenshot = {
-          path: result.path,
-          width: result.width,
-          height: result.height,
-          base64: input.includeScreenshotBase64 ? result.base64 : null,
-        };
-      }
-
-      const sessionMetadata = sessionManager.getSessionMetadata();
-      const priorKnowledgeContext: PriorKnowledgeContext = {
-        currentScreen: state.currentScreen,
-        currentUrl: state.currentUrl,
-        visibleTestIds: testIds,
-        a11yNodes: nodes,
-        currentSessionFlowTags: sessionMetadata?.flowTags,
-      };
-
-      const priorKnowledge = await knowledgeStore.generatePriorKnowledge(
-        priorKnowledgeContext,
-        context.sessionId,
-      );
-
-      const observation = createDefaultObservation(
-        state,
-        testIds,
-        nodes,
-        priorKnowledge,
-      );
-
-      return {
-        result: {
-          state,
-          testIds: { items: testIds },
-          a11y: { nodes },
-          screenshot,
-          priorKnowledge,
-        },
-        observation,
-      };
-    },
-
-    classifyError: classifyDiscoveryError,
-
-    /**
-     * Sanitizes input for recording by extracting screenshot-related parameters.
-     *
-     * @returns Sanitized input with screenshot options
-     */
-    sanitizeInputForRecording: () => ({
-      includeScreenshot: input.includeScreenshot,
-      screenshotName: input.screenshotName,
-    }),
-  });
-}
diff --git a/src/mcp-server/tools/helpers.test.ts b/src/mcp-server/tools/helpers.test.ts
deleted file mode 100644
index 64e463d..0000000
--- a/src/mcp-server/tools/helpers.test.ts
+++ /dev/null
@@ -1,745 +0,0 @@
-/**
- * Unit tests for tool helper functions.
- *
- * Tests session validation, observation collection, error handling, and step recording.
- */
-
-import type { Page } from '@playwright/test';
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import {
-  requireActiveSession,
-  collectObservation,
-  withActiveSession,
-  recordToolStep,
-  collectObservationAndRecord,
-  handleToolError,
-} from './helpers';
-import type { ObservationLevel, RecordStepParams } from './helpers';
-import * as discoveryModule from '../discovery.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types';
-
-describe('helpers', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager();
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('requireActiveSession', () => {
-    describe('when no active session exists', () => {
-      it('returns error response with NO_ACTIVE_SESSION code', () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-        const startTime = Date.now();
-
-        // Act
-        const result = requireActiveSession(startTime);
-
-        // Assert
-        expect(result).toBeDefined();
-        expect(result?.ok).toBe(false);
-        if (result && !result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-          expect(result.error.message).toBe(
-            'No active session. Call launch first.',
-          );
-        }
-      });
-
-      it('includes timestamp in error response', () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-        const startTime = Date.now();
-
-        // Act
-        const result = requireActiveSession(startTime);
-
-        // Assert
-        if (result && !result.ok) {
-          expect(result.meta.timestamp).toBeDefined();
-        }
-      });
-    });
-
-    describe('when active session exists', () => {
-      it('returns undefined', () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-        const startTime = Date.now();
-
-        // Act
-        const result = requireActiveSession(startTime);
-
-        // Assert
-        expect(result).toBeUndefined();
-      });
-    });
-  });
-
-  describe('collectObservation', () => {
-    describe('when level is "none"', () => {
-      it('returns default observation with empty arrays', async () => {
-        // Arrange
-        const mockPage = {} as Page;
-        const level: ObservationLevel = 'none';
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        const result = await collectObservation(mockPage, level);
-
-        // Assert
-        expect(result.testIds).toStrictEqual([]);
-        expect(result.a11y.nodes).toStrictEqual([]);
-      });
-
-      it('does not query extension state', async () => {
-        // Arrange
-        const mockPage = {} as Page;
-        const level: ObservationLevel = 'none';
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        await collectObservation(mockPage, level);
-
-        // Assert
-        expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled();
-      });
-    });
-
-    describe('when level is "minimal"', () => {
-      it('returns observation with state only', async () => {
-        // Arrange
-        const mockPage = {} as Page;
-        const level: ObservationLevel = 'minimal';
-        const mockState = {
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home' as const,
-          accountAddress: '0x123',
-          networkName: 'Ethereum Mainnet',
-          chainId: 1,
-          balance: '1.5 ETH',
-        };
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue(
-          mockState,
-        );
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: mockState,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        const result = await collectObservation(mockPage, level);
-
-        // Assert
-        expect(result.state).toStrictEqual(mockState);
-        expect(result.testIds).toStrictEqual([]);
-        expect(result.a11y.nodes).toStrictEqual([]);
-      });
-
-      it('uses preset state when provided', async () => {
-        // Arrange
-        const mockPage = {} as Page;
-        const level: ObservationLevel = 'minimal';
-        const presetState = {
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-456/home.html',
-          extensionId: 'ext-456',
-          isUnlocked: false,
-          currentScreen: 'unlock' as const,
-          accountAddress: null,
-          networkName: null,
-          chainId: null,
-          balance: null,
-        };
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: presetState,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        const result = await collectObservation(mockPage, level, presetState);
-
-        // Assert
-        expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled();
-        expect(result.state).toStrictEqual(presetState);
-      });
-    });
-
-    describe('when level is "full"', () => {
-      it('collects state, testIds, and a11y tree', async () => {
-        // Arrange
-        const mockPage = { locator: vi.fn() } as unknown as Page;
-        const level: ObservationLevel = 'full';
-        const mockState = {
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home' as const,
-          accountAddress: '0x123',
-          networkName: 'Ethereum Mainnet',
-          chainId: 1,
-          balance: '1.5 ETH',
-        };
-        const mockTestIds = [
-          { testId: 'send-button', tag: 'button', text: 'Send', visible: true },
-        ];
-        const mockA11yNodes = [
-          { ref: 'e1', role: 'button', name: 'Send', path: [] },
-        ];
-        const mockRefMap = new Map([['e1', '[data-testid="send-button"]']]);
-
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue(
-          mockState,
-        );
-        vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(
-          mockTestIds,
-        );
-        vi.spyOn(
-          discoveryModule,
-          'collectTrimmedA11ySnapshot',
-        ).mockResolvedValue({
-          nodes: mockA11yNodes,
-          refMap: mockRefMap,
-        });
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: mockState,
-          testIds: mockTestIds,
-          a11y: { nodes: mockA11yNodes },
-        });
-
-        // Act
-        const result = await collectObservation(mockPage, level);
-
-        // Assert
-        expect(result.state).toStrictEqual(mockState);
-        expect(result.testIds).toStrictEqual(mockTestIds);
-        expect(result.a11y.nodes).toStrictEqual(mockA11yNodes);
-        expect(mockSessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
-      });
-
-      it('returns default observation when page is undefined', async () => {
-        // Arrange
-        const level: ObservationLevel = 'full';
-        const mockState = {
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home' as const,
-          accountAddress: null,
-          networkName: null,
-          chainId: null,
-          balance: null,
-        };
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue(
-          mockState,
-        );
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: mockState,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        const result = await collectObservation(undefined, level);
-
-        // Assert
-        expect(result.testIds).toStrictEqual([]);
-        expect(result.a11y.nodes).toStrictEqual([]);
-      });
-
-      it('returns default observation when discovery throws error', async () => {
-        // Arrange
-        const mockPage = { locator: vi.fn() } as unknown as Page;
-        const level: ObservationLevel = 'full';
-        const mockState = {
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home' as const,
-          accountAddress: null,
-          networkName: null,
-          chainId: null,
-          balance: null,
-        };
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue(
-          mockState,
-        );
-        vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue(
-          new Error('Page closed'),
-        );
-        vi.spyOn(
-          knowledgeStoreModule,
-          'createDefaultObservation',
-        ).mockReturnValue({
-          state: mockState,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-
-        // Act
-        const result = await collectObservation(mockPage, level);
-
-        // Assert
-        expect(result.testIds).toStrictEqual([]);
-        expect(result.a11y.nodes).toStrictEqual([]);
-      });
-    });
-  });
-
-  describe('withActiveSession', () => {
-    describe('when no active session exists', () => {
-      it('returns error response without calling handler', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-        const handler = vi.fn();
-        const wrappedHandler = withActiveSession(handler);
-
-        // Act
-        const result = await wrappedHandler({ test: 'input' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-        expect(handler).not.toHaveBeenCalled();
-      });
-    });
-
-    describe('when session ID is missing', () => {
-      it('returns error response', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-        vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined);
-        const handler = vi.fn();
-        const wrappedHandler = withActiveSession(handler);
-
-        // Act
-        const result = await wrappedHandler({ test: 'input' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-          expect(result.error.message).toBe('Session ID not found');
-        }
-        expect(handler).not.toHaveBeenCalled();
-      });
-    });
-
-    describe('when active session exists', () => {
-      it('calls handler with input, context, and startTime', async () => {
-        // Arrange
-        const mockPage = { url: () => 'test-url' } as unknown as Page;
-        const mockRefMap = new Map([['e1', '[data-testid="test"]']]);
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-        vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(
-          'session-123',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(mockRefMap);
-
-        const handler = vi.fn().mockResolvedValue({
-          ok: true,
-          ts: Date.now(),
-          durationMs: 100,
-          result: { success: true },
-        });
-        const wrappedHandler = withActiveSession(handler);
-        const input = { test: 'input' };
-
-        // Act
-        const result = await wrappedHandler(input);
-
-        // Assert
-        expect(handler).toHaveBeenCalledWith(
-          input,
-          {
-            sessionId: 'session-123',
-            page: mockPage,
-            refMap: mockRefMap,
-          },
-          expect.any(Number),
-        );
-        expect(result.ok).toBe(true);
-      });
-
-      it('passes through handler result', async () => {
-        // Arrange
-        const mockPage = { url: () => 'test-url' } as unknown as Page;
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-        vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(
-          'session-123',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        const expectedResult = {
-          ok: true,
-          ts: Date.now(),
-          durationMs: 100,
-          result: { data: 'test-data' },
-        };
-        const handler = vi.fn().mockResolvedValue(expectedResult);
-        const wrappedHandler = withActiveSession(handler);
-
-        // Act
-        const result = await wrappedHandler({ test: 'input' });
-
-        // Assert
-        expect(result).toStrictEqual(expectedResult);
-      });
-    });
-  });
-
-  describe('recordToolStep', () => {
-    it('records step with all parameters', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(
-        'session-123',
-      );
-      const mockRecordStep = vi.fn().mockResolvedValue(undefined);
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: mockRecordStep,
-      } as any);
-
-      const params: RecordStepParams = {
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        startTime: Date.now() - 100,
-        observation: {
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        },
-        target: { testId: 'send-button' },
-        screenshotPath: '/path/to/screenshot.png',
-        screenshotDimensions: { width: 1280, height: 720 },
-      };
-
-      // Act
-      await recordToolStep(params);
-
-      // Assert
-      expect(mockRecordStep).toHaveBeenCalledWith({
-        sessionId: 'session-123',
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        target: { testId: 'send-button' },
-        outcome: { ok: true },
-        observation: params.observation,
-        durationMs: expect.any(Number),
-        screenshotPath: '/path/to/screenshot.png',
-        screenshotDimensions: { width: 1280, height: 720 },
-      });
-    });
-
-    it('uses empty string when session ID is undefined', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined);
-      const mockRecordStep = vi.fn().mockResolvedValue(undefined);
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: mockRecordStep,
-      } as any);
-
-      const params: RecordStepParams = {
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        startTime: Date.now(),
-        observation: {
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        },
-      };
-
-      // Act
-      await recordToolStep(params);
-
-      // Assert
-      expect(mockRecordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          sessionId: '',
-        }),
-      );
-    });
-  });
-
-  describe('collectObservationAndRecord', () => {
-    it('collects observation and records step', async () => {
-      // Arrange
-      const mockPage = { locator: vi.fn() } as unknown as Page;
-      const mockObservation = {
-        state: {} as any,
-        testIds: [
-          { testId: 'send-button', tag: 'button', text: 'Send', visible: true },
-        ],
-        a11y: {
-          nodes: [{ ref: 'e1', role: 'button', name: 'Send', path: [] }],
-        },
-      };
-      const mockRecordStep = vi.fn().mockResolvedValue(undefined);
-
-      vi.spyOn(
-        knowledgeStoreModule,
-        'createDefaultObservation',
-      ).mockReturnValue(mockObservation);
-      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(
-        mockObservation.testIds,
-      );
-      vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
-        {
-          nodes: mockObservation.a11y.nodes,
-          refMap: new Map(),
-        },
-      );
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: mockRecordStep,
-      } as any);
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(
-        'session-123',
-      );
-
-      // Act
-      const result = await collectObservationAndRecord(
-        mockPage,
-        'mm_click',
-        { testId: 'send-button' },
-        Date.now(),
-        {
-          target: { testId: 'send-button' },
-          screenshotPath: '/path/to/screenshot.png',
-          screenshotDimensions: { width: 1280, height: 720 },
-        },
-      );
-
-      // Assert
-      expect(result).toStrictEqual(mockObservation);
-      expect(mockRecordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          toolName: 'mm_click',
-          input: { testId: 'send-button' },
-          observation: mockObservation,
-          target: { testId: 'send-button' },
-          screenshotPath: '/path/to/screenshot.png',
-          screenshotDimensions: { width: 1280, height: 720 },
-        }),
-      );
-    });
-
-    it('works without optional parameters', async () => {
-      // Arrange
-      const mockPage = { locator: vi.fn() } as unknown as Page;
-      const mockObservation = {
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      };
-      const mockRecordStep = vi.fn().mockResolvedValue(undefined);
-
-      vi.spyOn(
-        knowledgeStoreModule,
-        'createDefaultObservation',
-      ).mockReturnValue(mockObservation);
-      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
-      vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
-        {
-          nodes: [],
-          refMap: new Map(),
-        },
-      );
-      vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-        recordStep: mockRecordStep,
-      } as any);
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(
-        'session-123',
-      );
-
-      // Act
-      const result = await collectObservationAndRecord(
-        mockPage,
-        'mm_get_state',
-        {},
-        Date.now(),
-      );
-
-      // Assert
-      expect(result).toStrictEqual(mockObservation);
-      expect(mockRecordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          toolName: 'mm_get_state',
-          input: {},
-          observation: mockObservation,
-          target: undefined,
-          screenshotPath: undefined,
-          screenshotDimensions: undefined,
-        }),
-      );
-    });
-  });
-
-  describe('handleToolError', () => {
-    describe('when error contains "Unknown a11yRef"', () => {
-      it('returns TARGET_NOT_FOUND error code', () => {
-        // Arrange
-        const error = new Error('Unknown a11yRef: e99');
-        const startTime = Date.now();
-
-        // Act
-        const result = handleToolError(
-          error,
-          ErrorCodes.MM_CLICK_FAILED,
-          'Click failed',
-          { a11yRef: 'e99' },
-          'session-123',
-          startTime,
-        );
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TARGET_NOT_FOUND);
-          expect(result.error.message).toContain('Unknown a11yRef: e99');
-        }
-      });
-    });
-
-    describe('when error contains "not found"', () => {
-      it('returns TARGET_NOT_FOUND error code', () => {
-        // Arrange
-        const error = new Error('Element not found');
-        const startTime = Date.now();
-
-        // Act
-        const result = handleToolError(
-          error,
-          ErrorCodes.MM_TYPE_FAILED,
-          'Type failed',
-          { testId: 'missing-input' },
-          'session-123',
-          startTime,
-        );
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TARGET_NOT_FOUND);
-          expect(result.error.message).toContain('not found');
-        }
-      });
-    });
-
-    describe('when error does not match special patterns', () => {
-      it('returns default error code with combined message', () => {
-        // Arrange
-        const error = new Error('Timeout exceeded');
-        const startTime = Date.now();
-
-        // Act
-        const result = handleToolError(
-          error,
-          ErrorCodes.MM_CLICK_FAILED,
-          'Click failed',
-          { testId: 'slow-button' },
-          'session-123',
-          startTime,
-        );
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED);
-          expect(result.error.message).toBe('Click failed: Timeout exceeded');
-        }
-      });
-
-      it('includes input in error details', () => {
-        // Arrange
-        const error = new Error('Generic error');
-        const input = { testId: 'test-button', timeoutMs: 5000 };
-        const startTime = Date.now();
-
-        // Act
-        const result = handleToolError(
-          error,
-          ErrorCodes.MM_CLICK_FAILED,
-          'Click failed',
-          input,
-          'session-123',
-          startTime,
-        );
-
-        // Assert
-        if (!result.ok) {
-          expect(result.error.details).toStrictEqual({ input });
-        }
-      });
-
-      it('includes session ID in response', () => {
-        // Arrange
-        const error = new Error('Generic error');
-        const startTime = Date.now();
-
-        // Act
-        const result = handleToolError(
-          error,
-          ErrorCodes.MM_CLICK_FAILED,
-          'Click failed',
-          {},
-          'session-456',
-          startTime,
-        );
-
-        // Assert
-        if (!result.ok) {
-          expect(result.meta.sessionId).toBe('session-456');
-        }
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/tools/helpers.ts b/src/mcp-server/tools/helpers.ts
deleted file mode 100644
index cf94f48..0000000
--- a/src/mcp-server/tools/helpers.ts
+++ /dev/null
@@ -1,313 +0,0 @@
-import type { Page } from '@playwright/test';
-
-import type { ExtensionState } from '../../capabilities/types.js';
-import { OBSERVATION_TESTID_LIMIT } from '../constants.js';
-import { collectTestIds, collectTrimmedA11ySnapshot } from '../discovery.js';
-import {
-  knowledgeStore,
-  createDefaultObservation,
-} from '../knowledge-store.js';
-import { getSessionManager } from '../session-manager.js';
-import type {
-  McpResponse,
-  ErrorCode,
-  TestIdItem,
-  StepRecordObservation,
-} from '../types';
-import { ErrorCodes } from '../types';
-import { createErrorResponse, extractErrorMessage, debugWarn } from '../utils';
-
-/**
- * Level of detail to collect for observation data.
- * - "full": Collect state, testIds, and a11y tree
- * - "minimal": Collect state only (no testIds or a11y)
- * - "none": Return empty observation
- */
-export type ObservationLevel = 'full' | 'minimal' | 'none';
-
-/**
- * Parameters for recording a tool step in the knowledge store.
- */
-export type RecordStepParams = {
-  /**
-   * Name of the tool that was executed
-   */
-  toolName: string;
-  /**
-   * Input parameters passed to the tool
-   */
-  input: Record<string, unknown>;
-  /**
-   * Timestamp when the tool execution started
-   */
-  startTime: number;
-  /**
-   * Observation data collected after tool execution
-   */
-  observation: StepRecordObservation;
-  /**
-   * Target element information (selector, testId, etc.)
-   */
-  target?: Record<string, string>;
-  /**
-   * Path to screenshot file if captured
-   */
-  screenshotPath?: string;
-  /**
-   * Screenshot dimensions if captured
-   */
-  screenshotDimensions?: {
-    /**
-     * Screenshot width in pixels
-     */
-    width: number;
-    /**
-     * Screenshot height in pixels
-     */
-    height: number;
-  };
-};
-
-/**
- * Context information for an active session.
- */
-export type ActiveSessionContext = {
-  /**
-   * Unique session identifier
-   */
-  sessionId: string;
-  /**
-   * Current active page instance
-   */
-  page: Page;
-  /**
-   * Map of accessibility references to selectors
-   */
-  refMap: Map<string, string>;
-};
-
-/**
- * Check if an active session exists and return error if not.
- *
- * @param startTime - Timestamp when the operation started
- * @returns Error response if no active session, undefined otherwise
- */
-export function requireActiveSession<Result>(
-  startTime: number,
-): McpResponse<Result> | undefined {
-  const sessionManager = getSessionManager();
-  if (!sessionManager.hasActiveSession()) {
-    return createErrorResponse(
-      ErrorCodes.MM_NO_ACTIVE_SESSION,
-      'No active session. Call launch first.',
-      undefined,
-      undefined,
-      startTime,
-    ) as McpResponse<Result>;
-  }
-  return undefined;
-}
-
-/**
- * Collect observation data from the current page state.
- *
- * @param page - The page to collect observation from
- * @param level - Level of detail to collect (full, minimal, or none)
- * @param presetState - Optional pre-fetched extension state to use instead of querying
- * @returns Observation data with state, testIds, and accessibility tree
- */
-export async function collectObservation(
-  page: Page | undefined,
-  level: ObservationLevel,
-  presetState?: ExtensionState,
-): Promise<StepRecordObservation> {
-  const sessionManager = getSessionManager();
-
-  if (level === 'none') {
-    return createDefaultObservation({} as ExtensionState, [], []);
-  }
-
-  const state = presetState ?? (await sessionManager.getExtensionState());
-
-  if (level === 'minimal') {
-    return createDefaultObservation(state, [], []);
-  }
-
-  if (!page) {
-    debugWarn('collectObservation', 'Page not provided for full observation');
-    return createDefaultObservation(state, [], []);
-  }
-
-  try {
-    const testIds: TestIdItem[] = await collectTestIds(
-      page,
-      OBSERVATION_TESTID_LIMIT,
-    );
-    const { nodes, refMap } = await collectTrimmedA11ySnapshot(page);
-    sessionManager.setRefMap(refMap);
-    return createDefaultObservation(state, testIds, nodes);
-  } catch (error) {
-    debugWarn('collectObservation', error);
-    return createDefaultObservation(state, [], []);
-  }
-}
-
-/**
- * Wrapper that ensures an active session exists before executing a handler.
- *
- * @param handler - Function to execute with active session context
- * @returns Wrapped function that validates session before calling handler
- */
-export function withActiveSession<TInput, TResult>(
-  handler: (
-    input: TInput,
-    ctx: ActiveSessionContext,
-    startTime: number,
-  ) => Promise<McpResponse<TResult>>,
-): (input: TInput) => Promise<McpResponse<TResult>> {
-  return async (input: TInput): Promise<McpResponse<TResult>> => {
-    const startTime = Date.now();
-    const sessionManager = getSessionManager();
-
-    const sessionError = requireActiveSession<TResult>(startTime);
-    if (sessionError) {
-      return sessionError;
-    }
-
-    const sessionId = sessionManager.getSessionId();
-    if (!sessionId) {
-      return createErrorResponse(
-        ErrorCodes.MM_NO_ACTIVE_SESSION,
-        'Session ID not found',
-        undefined,
-        undefined,
-        startTime,
-      ) as McpResponse<TResult>;
-    }
-    const page = sessionManager.getPage();
-    const refMap = sessionManager.getRefMap();
-
-    return handler(input, { sessionId, page, refMap }, startTime);
-  };
-}
-
-/**
- * Record a tool execution step in the knowledge store.
- *
- * @param params - Parameters containing tool name, input, observation, and metadata
- */
-export async function recordToolStep(params: RecordStepParams): Promise<void> {
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId() ?? '';
-
-  await knowledgeStore.recordStep({
-    sessionId,
-    toolName: params.toolName,
-    input: params.input,
-    target: params.target,
-    outcome: { ok: true },
-    observation: params.observation,
-    durationMs: Date.now() - params.startTime,
-    screenshotPath: params.screenshotPath,
-    screenshotDimensions: params.screenshotDimensions,
-  });
-}
-
-/**
- * Collect observation data and record the tool step in the knowledge store.
- *
- * @param page - The page to collect observation from
- * @param toolName - Name of the tool that was executed
- * @param input - Input parameters passed to the tool
- * @param startTime - Timestamp when the tool execution started
- * @param options - Optional metadata for the step record
- * @param options.target - Target element information
- * @param options.screenshotPath - Path to screenshot file if captured
- * @param options.screenshotDimensions - Screenshot dimensions
- * @param options.screenshotDimensions.width - Screenshot width in pixels
- * @param options.screenshotDimensions.height - Screenshot height in pixels
- * @returns Observation data collected after tool execution
- */
-export async function collectObservationAndRecord(
-  page: Page,
-  toolName: string,
-  input: Record<string, unknown>,
-  startTime: number,
-  options: {
-    /**
-     * Target element information (selector, testId, etc.)
-     */
-    target?: Record<string, string>;
-    /**
-     * Path to screenshot file if captured
-     */
-    screenshotPath?: string;
-    /**
-     * Screenshot dimensions if captured
-     */
-    screenshotDimensions?: {
-      /**
-       * Screenshot width in pixels
-       */
-      width: number;
-      /**
-       * Screenshot height in pixels
-       */
-      height: number;
-    };
-  } = {},
-): Promise<StepRecordObservation> {
-  const observation = await collectObservation(page, 'full');
-
-  await recordToolStep({
-    toolName,
-    input,
-    startTime,
-    observation,
-    target: options.target,
-    screenshotPath: options.screenshotPath,
-    screenshotDimensions: options.screenshotDimensions,
-  });
-
-  return observation;
-}
-
-/**
- * Handle tool execution errors and return appropriate error response.
- *
- * @param error - The error that occurred during tool execution
- * @param defaultCode - Default error code to use if no specific match found
- * @param defaultMessage - Default error message to use
- * @param input - Input parameters that were passed to the tool
- * @param sessionId - Current session ID for error context
- * @param startTime - Timestamp when the tool execution started
- * @returns Error response with appropriate code and message
- */
-export function handleToolError<Result>(
-  error: unknown,
-  defaultCode: ErrorCode,
-  defaultMessage: string,
-  input: unknown,
-  sessionId: string | undefined,
-  startTime: number,
-): McpResponse<Result> {
-  const message = extractErrorMessage(error);
-
-  if (message.includes('Unknown a11yRef') || message.includes('not found')) {
-    return createErrorResponse(
-      ErrorCodes.MM_TARGET_NOT_FOUND,
-      message,
-      { input },
-      sessionId,
-      startTime,
-    ) as McpResponse<Result>;
-  }
-
-  return createErrorResponse(
-    defaultCode,
-    `${defaultMessage}: ${message}`,
-    { input },
-    sessionId,
-    startTime,
-  ) as McpResponse<Result>;
-}
diff --git a/src/mcp-server/tools/index.ts b/src/mcp-server/tools/index.ts
deleted file mode 100644
index 2621238..0000000
--- a/src/mcp-server/tools/index.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-export * from './error-classification.js';
-export * from './run-tool.js';
-export * from './helpers.js';
-export * from './interaction.js';
-export * from './navigation.js';
-export * from './discovery-tools.js';
-export * from './screenshot.js';
-export * from './knowledge.js';
-export * from './batch.js';
-export * from './clipboard.js';
diff --git a/src/mcp-server/tools/interaction.test.ts b/src/mcp-server/tools/interaction.test.ts
deleted file mode 100644
index cd4fea7..0000000
--- a/src/mcp-server/tools/interaction.test.ts
+++ /dev/null
@@ -1,822 +0,0 @@
-/**
- * Unit tests for interaction tool handlers.
- *
- * Tests handleClick, handleType, and handleWaitFor with various target types,
- * error scenarios, and page closure detection.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import { handleClick, handleType, handleWaitFor } from './interaction';
-import * as discoveryModule from '../discovery.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import {
-  createMockSessionManager,
-  createMockPage,
-  createMockLocator,
-} from '../test-utils';
-import { ErrorCodes } from '../types';
-import * as utilsModule from '../utils';
-
-describe('interaction', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleClick', () => {
-    describe('with testId target', () => {
-      it('clicks element by testId', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ testId: 'my-button' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.clicked).toBe(true);
-          expect(result.result.target).toBe('testId:my-button');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'my-button',
-          expect.any(Map),
-          15000,
-        );
-        expect(mockLocator.click).toHaveBeenCalled();
-      });
-
-      it('uses custom timeout when provided', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        await handleClick({ testId: 'my-button', timeoutMs: 5000 });
-
-        // Assert
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'my-button',
-          expect.any(Map),
-          5000,
-        );
-      });
-    });
-
-    describe('with selector target', () => {
-      it('clicks element by CSS selector', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ selector: 'button.primary' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.clicked).toBe(true);
-          expect(result.result.target).toBe('selector:button.primary');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'selector',
-          'button.primary',
-          expect.any(Map),
-          15000,
-        );
-      });
-    });
-
-    describe('with a11yRef target', () => {
-      it('clicks element by accessibility reference', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        const refMap = new Map([['e5', 'button[aria-label="Submit"]']]);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap);
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ a11yRef: 'e5' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.clicked).toBe(true);
-          expect(result.result.target).toBe('a11yRef:e5');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'a11yRef',
-          'e5',
-          refMap,
-          15000,
-        );
-      });
-    });
-
-    describe('with invalid target selection', () => {
-      it('returns error when no target specified', async () => {
-        // Act
-        const result = await handleClick({} as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when multiple targets specified', async () => {
-        // Act
-        const result = await handleClick({
-          testId: 'button',
-          selector: '.button',
-        } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
-        // Arrange
-        vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({
-          valid: true,
-          // Missing type and value properties - will fail isValidTargetSelection
-        } as any);
-
-        // Act
-        const result = await handleClick({ testId: 'button' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toBe('Invalid target selection');
-        }
-      });
-    });
-
-    describe('with page closure after click', () => {
-      it('handles page closure gracefully', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockLocator, 'click').mockRejectedValue(
-          new Error('Target page, context or browser has been closed'),
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ testId: 'close-btn' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.clicked).toBe(true);
-          expect(result.result.pageClosedAfterClick).toBe(true);
-          expect(result.result.target).toBe('testId:close-btn');
-        }
-      });
-
-      it('handles browser closed error gracefully', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockLocator, 'click').mockRejectedValue(
-          new Error('browser has been closed'),
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ testId: 'close-btn' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.pageClosedAfterClick).toBe(true);
-        }
-      });
-    });
-
-    describe('with click errors', () => {
-      it('returns error when click fails with non-closure error', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockLocator, 'click').mockRejectedValue(
-          new Error('Element is not clickable'),
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleClick({ testId: 'my-button' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED);
-        }
-      });
-
-      it('returns error when element not found', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
-          new Error('Timeout waiting for element'),
-        );
-
-        // Act
-        const result = await handleClick({ testId: 'nonexistent' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleClick({ testId: 'my-button' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-
-  describe('handleType', () => {
-    describe('with testId target', () => {
-      it('types text into element by testId', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleType({
-          testId: 'amount-input',
-          text: '0.5',
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.typed).toBe(true);
-          expect(result.result.target).toBe('testId:amount-input');
-          expect(result.result.textLength).toBe(3);
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'amount-input',
-          expect.any(Map),
-          15000,
-        );
-        expect(mockLocator.fill).toHaveBeenCalledWith('0.5');
-      });
-
-      it('uses custom timeout when provided', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        await handleType({ testId: 'input', text: 'test', timeoutMs: 3000 });
-
-        // Assert
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'input',
-          expect.any(Map),
-          3000,
-        );
-      });
-    });
-
-    describe('with selector target', () => {
-      it('types text into element by CSS selector', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleType({
-          selector: 'input[name="email"]',
-          text: 'test@example.com',
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.typed).toBe(true);
-          expect(result.result.target).toBe('selector:input[name="email"]');
-          expect(result.result.textLength).toBe(16);
-        }
-        expect(mockLocator.fill).toHaveBeenCalledWith('test@example.com');
-      });
-    });
-
-    describe('with a11yRef target', () => {
-      it('types text into element by accessibility reference', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        const refMap = new Map([['e3', 'input[aria-label="Amount"]']]);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap);
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleType({ a11yRef: 'e3', text: '100' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.typed).toBe(true);
-          expect(result.result.target).toBe('a11yRef:e3');
-          expect(result.result.textLength).toBe(3);
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'a11yRef',
-          'e3',
-          refMap,
-          15000,
-        );
-      });
-    });
-
-    describe('with empty text', () => {
-      it('types empty string and reports zero length', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleType({ testId: 'input', text: '' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.typed).toBe(true);
-          expect(result.result.textLength).toBe(0);
-        }
-        expect(mockLocator.fill).toHaveBeenCalledWith('');
-      });
-    });
-
-    describe('with invalid target selection', () => {
-      it('returns error when no target specified', async () => {
-        // Act
-        const result = await handleType({ text: 'test' } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when multiple targets specified', async () => {
-        // Act
-        const result = await handleType({
-          testId: 'input',
-          selector: 'input',
-          text: 'test',
-        } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
-        // Arrange
-        vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({
-          valid: true,
-          // Missing type and value properties - will fail isValidTargetSelection
-        } as any);
-
-        // Act
-        const result = await handleType({ testId: 'input', text: 'test' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toBe('Invalid target selection');
-        }
-      });
-    });
-
-    describe('with type errors', () => {
-      it('returns error when fill fails', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockLocator, 'fill').mockRejectedValue(
-          new Error('Element is not editable'),
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleType({ testId: 'input', text: 'test' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TYPE_FAILED);
-        }
-      });
-
-      it('returns error when element not found', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
-          new Error('Timeout waiting for element'),
-        );
-
-        // Act
-        const result = await handleType({
-          testId: 'nonexistent',
-          text: 'test',
-        });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleType({ testId: 'input', text: 'test' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-
-  describe('handleWaitFor', () => {
-    describe('with testId target', () => {
-      it('waits for element by testId', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleWaitFor({ testId: 'loading-spinner' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.found).toBe(true);
-          expect(result.result.target).toBe('testId:loading-spinner');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'loading-spinner',
-          expect.any(Map),
-          15000,
-        );
-      });
-
-      it('uses custom timeout when provided', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        await handleWaitFor({ testId: 'element', timeoutMs: 30000 });
-
-        // Assert
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'testId',
-          'element',
-          expect.any(Map),
-          30000,
-        );
-      });
-    });
-
-    describe('with selector target', () => {
-      it('waits for element by CSS selector', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleWaitFor({ selector: '.success-message' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.found).toBe(true);
-          expect(result.result.target).toBe('selector:.success-message');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'selector',
-          '.success-message',
-          expect.any(Map),
-          15000,
-        );
-      });
-    });
-
-    describe('with a11yRef target', () => {
-      it('waits for element by accessibility reference', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        const mockLocator = createMockLocator();
-        const refMap = new Map([['e10', 'button[aria-label="Confirm"]']]);
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap);
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
-          mockLocator as any,
-        );
-
-        // Act
-        const result = await handleWaitFor({ a11yRef: 'e10' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.found).toBe(true);
-          expect(result.result.target).toBe('a11yRef:e10');
-        }
-        expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
-          mockPage,
-          'a11yRef',
-          'e10',
-          refMap,
-          15000,
-        );
-      });
-    });
-
-    describe('with invalid target selection', () => {
-      it('returns error when no target specified', async () => {
-        // Act
-        const result = await handleWaitFor({} as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when multiple targets specified', async () => {
-        // Act
-        const result = await handleWaitFor({
-          testId: 'element',
-          selector: '.element',
-        } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Exactly one');
-        }
-      });
-
-      it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
-        // Arrange
-        vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({
-          valid: true,
-          // Missing type and value properties - will fail isValidTargetSelection
-        } as any);
-
-        // Act
-        const result = await handleWaitFor({ testId: 'element' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toBe('Invalid target selection');
-        }
-      });
-    });
-
-    describe('with timeout errors', () => {
-      it('returns error when element not found within timeout', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
-          new Error('Timeout 15000ms exceeded'),
-        );
-
-        // Act
-        const result = await handleWaitFor({ testId: 'nonexistent' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
-        }
-      });
-
-      it('returns error when page closed during wait', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-        vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
-          new Error('Target page has been closed'),
-        );
-
-        // Act
-        const result = await handleWaitFor({ testId: 'element' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleWaitFor({ testId: 'element' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/tools/interaction.ts b/src/mcp-server/tools/interaction.ts
deleted file mode 100644
index 80c02e1..0000000
--- a/src/mcp-server/tools/interaction.ts
+++ /dev/null
@@ -1,296 +0,0 @@
-import { DEFAULT_INTERACTION_TIMEOUT_MS } from '../constants.js';
-import { waitForTarget } from '../discovery.js';
-import { getSessionManager } from '../session-manager.js';
-import {
-  classifyClickError,
-  classifyTypeError,
-  classifyWaitError,
-  isPageClosedError,
-} from './error-classification.js';
-import { runTool } from './run-tool.js';
-import type {
-  ClickInput,
-  ClickResult,
-  TypeInput,
-  TypeResult,
-  WaitForInput,
-  WaitForResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createErrorResponse,
-  validateTargetSelection,
-  isValidTargetSelection,
-  isInvalidTargetSelection,
-} from '../utils';
-
-/**
- * Handles clicking on an element specified by testId, selector, or accessibility reference.
- *
- * @param input The click input containing target selection and timeout options
- * @param options Optional handler configuration
- * @returns Promise resolving to click result with target information
- */
-export async function handleClick(
-  input: ClickInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<ClickResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
-
-  const validation = validateTargetSelection(input);
-  if (isInvalidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      validation.error,
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  if (!isValidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'Invalid target selection',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  const { type: targetType, value: targetValue } = validation;
-
-  return runTool<ClickInput, ClickResult>({
-    toolName: 'mm_click',
-    input,
-    options,
-
-    /**
-     * Executes the click action on the target element.
-     *
-     * @param context The tool execution context containing page and reference map
-     * @returns Promise resolving to click result with success status and target info
-     */
-    execute: async (context) => {
-      const locator = await waitForTarget(
-        context.page,
-        targetType,
-        targetValue,
-        context.refMap,
-        timeoutMs,
-      );
-
-      try {
-        await locator.click();
-        return {
-          clicked: true,
-          target: `${targetType}:${targetValue}`,
-        };
-      } catch (clickError) {
-        if (isPageClosedError(clickError)) {
-          return {
-            clicked: true,
-            target: `${targetType}:${targetValue}`,
-            pageClosedAfterClick: true,
-          };
-        }
-        throw clickError;
-      }
-    },
-
-    /**
-     * Returns the target element information for recording.
-     *
-     * @returns Object containing the target type and value
-     */
-    getTarget: () => ({ [targetType]: targetValue }),
-
-    classifyError: classifyClickError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with timeout information
-     */
-    sanitizeInputForRecording: () => ({ timeoutMs }),
-  });
-}
-
-/**
- * Handles typing text into an element specified by testId, selector, or accessibility reference.
- *
- * @param input The type input containing target selection, text, and timeout options
- * @param options Optional handler configuration
- * @returns Promise resolving to type result with target and text length information
- */
-export async function handleType(
-  input: TypeInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<TypeResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
-
-  const validation = validateTargetSelection(input);
-  if (isInvalidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      validation.error,
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  if (!isValidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'Invalid target selection',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  const { type: targetType, value: targetValue } = validation;
-
-  return runTool<TypeInput, TypeResult>({
-    toolName: 'mm_type',
-    input,
-    options,
-
-    /**
-     * Executes the type action on the target element.
-     *
-     * @param context The tool execution context containing page and reference map
-     * @returns Promise resolving to type result with success status and text length
-     */
-    execute: async (context) => {
-      const locator = await waitForTarget(
-        context.page,
-        targetType,
-        targetValue,
-        context.refMap,
-        timeoutMs,
-      );
-      await locator.fill(input.text);
-
-      return {
-        typed: true,
-        target: `${targetType}:${targetValue}`,
-        textLength: input.text.length,
-      };
-    },
-
-    /**
-     * Returns the target element information for recording.
-     *
-     * @returns Object containing the target type and value
-     */
-    getTarget: () => ({ [targetType]: targetValue }),
-
-    classifyError: classifyTypeError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with timeout and text information
-     */
-    sanitizeInputForRecording: () => ({
-      timeoutMs,
-      text: input.text,
-      testId: input.testId,
-      selector: input.selector,
-      a11yRef: input.a11yRef,
-    }),
-  });
-}
-
-/**
- * Handles waiting for an element to become visible.
- *
- * @param input The wait input containing target selection and timeout options
- * @param options Optional handler configuration
- * @returns Promise resolving to wait result with target information
- */
-export async function handleWaitFor(
-  input: WaitForInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<WaitForResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
-
-  const validation = validateTargetSelection(input);
-  if (isInvalidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      validation.error,
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  if (!isValidTargetSelection(validation)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'Invalid target selection',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  const { type: targetType, value: targetValue } = validation;
-
-  return runTool<WaitForInput, WaitForResult>({
-    toolName: 'mm_wait_for',
-    input,
-    options,
-
-    /**
-     * Executes the wait action for the target element.
-     *
-     * @param context The tool execution context containing page and reference map
-     * @returns Promise resolving to wait result with success status and target info
-     */
-    execute: async (context) => {
-      await waitForTarget(
-        context.page,
-        targetType,
-        targetValue,
-        context.refMap,
-        timeoutMs,
-      );
-
-      return {
-        found: true,
-        target: `${targetType}:${targetValue}`,
-      };
-    },
-
-    /**
-     * Returns the target element information for recording.
-     *
-     * @returns Object containing the target type and value
-     */
-    getTarget: () => ({ [targetType]: targetValue }),
-
-    classifyError: classifyWaitError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with timeout information
-     */
-    sanitizeInputForRecording: () => ({ timeoutMs }),
-  });
-}
diff --git a/src/mcp-server/tools/knowledge.ts b/src/mcp-server/tools/knowledge.ts
deleted file mode 100644
index eddbfff..0000000
--- a/src/mcp-server/tools/knowledge.ts
+++ /dev/null
@@ -1,212 +0,0 @@
-import { knowledgeStore } from '../knowledge-store.js';
-import { getSessionManager } from '../session-manager.js';
-import type {
-  KnowledgeLastInput,
-  KnowledgeLastResult,
-  KnowledgeSearchInput,
-  KnowledgeSearchResult,
-  KnowledgeSummarizeInput,
-  KnowledgeSummarizeResult,
-  KnowledgeSessionsInput,
-  KnowledgeSessionsResult,
-  KnowledgeScope,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createSuccessResponse,
-  createErrorResponse,
-  extractErrorMessage,
-} from '../utils';
-
-/**
- * Handles retrieving the last N steps from knowledge store.
- *
- * @param input - Input with number of steps and scope.
- * @param _options - Handler options (unused).
- * @returns Response with step records.
- */
-export async function handleKnowledgeLast(
-  input: KnowledgeLastInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<KnowledgeLastResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const nSteps = input.n ?? 20;
-  const scope: KnowledgeScope = input.scope ?? 'current';
-
-  try {
-    const steps = await knowledgeStore.getLastSteps(
-      nSteps,
-      scope,
-      sessionId,
-      input.filters,
-    );
-
-    return createSuccessResponse<KnowledgeLastResult>(
-      { steps },
-      sessionId,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-    return createErrorResponse(
-      ErrorCodes.MM_KNOWLEDGE_ERROR,
-      `Failed to retrieve steps: ${message}`,
-      { nSteps, scope },
-      sessionId,
-      startTime,
-    );
-  }
-}
-
-/**
- * Handles searching step records in knowledge store.
- *
- * @param input - Input with search query and filters.
- * @param _options - Handler options (unused).
- * @returns Response with matching steps.
- */
-export async function handleKnowledgeSearch(
-  input: KnowledgeSearchInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<KnowledgeSearchResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const limit = input.limit ?? 20;
-  const scope: KnowledgeScope = input.scope ?? 'all';
-
-  try {
-    const matches = await knowledgeStore.searchSteps(
-      input.query,
-      limit,
-      scope,
-      sessionId,
-      input.filters,
-    );
-
-    return createSuccessResponse<KnowledgeSearchResult>(
-      {
-        matches,
-        query: input.query,
-      },
-      sessionId,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-    return createErrorResponse(
-      ErrorCodes.MM_KNOWLEDGE_ERROR,
-      `Search failed: ${message}`,
-      { query: input.query, limit, scope },
-      sessionId,
-      startTime,
-    );
-  }
-}
-
-/**
- * Handles summarizing a session's steps as a recipe.
- *
- * @param input - Input with session scope or ID.
- * @param _options - Handler options (unused).
- * @returns Response with session summary.
- */
-export async function handleKnowledgeSummarize(
-  input: KnowledgeSummarizeInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<KnowledgeSummarizeResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const currentSessionId = sessionManager.getSessionId();
-
-  let targetSessionId: string | undefined;
-
-  if (input.sessionId) {
-    targetSessionId = input.sessionId;
-  } else if (input.scope) {
-    if (input.scope === 'all') {
-      return createErrorResponse(
-        ErrorCodes.MM_INVALID_INPUT,
-        'Cannot summarize all sessions. Use scope="current" or provide a specific sessionId.',
-        { input },
-        currentSessionId,
-        startTime,
-      );
-    } else if (input.scope === 'current') {
-      targetSessionId = currentSessionId;
-    } else if (typeof input.scope === 'object' && 'sessionId' in input.scope) {
-      targetSessionId = input.scope.sessionId;
-    }
-  } else {
-    targetSessionId = currentSessionId;
-  }
-
-  if (!targetSessionId) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'No sessionId provided and no active session',
-      { input },
-      undefined,
-      startTime,
-    );
-  }
-
-  try {
-    const summary = await knowledgeStore.summarizeSession(targetSessionId);
-
-    return createSuccessResponse<KnowledgeSummarizeResult>(
-      summary,
-      targetSessionId,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-    return createErrorResponse(
-      ErrorCodes.MM_KNOWLEDGE_ERROR,
-      `Summarize failed: ${message}`,
-      { sessionId: targetSessionId },
-      targetSessionId,
-      startTime,
-    );
-  }
-}
-
-/**
- * Handles listing recent sessions with metadata.
- *
- * @param input - Input with limit and filters.
- * @param _options - Handler options (unused).
- * @returns Response with session list.
- */
-export async function handleKnowledgeSessions(
-  input: KnowledgeSessionsInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<KnowledgeSessionsResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const limit = input.limit ?? 10;
-
-  try {
-    const sessions = await knowledgeStore.listSessions(limit, input.filters);
-
-    return createSuccessResponse<KnowledgeSessionsResult>(
-      { sessions },
-      sessionId,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-    return createErrorResponse(
-      ErrorCodes.MM_KNOWLEDGE_ERROR,
-      `Failed to list sessions: ${message}`,
-      { limit, filters: input.filters },
-      sessionId,
-      startTime,
-    );
-  }
-}
diff --git a/src/mcp-server/tools/launch.test.ts b/src/mcp-server/tools/launch.test.ts
deleted file mode 100644
index 81cab1b..0000000
--- a/src/mcp-server/tools/launch.test.ts
+++ /dev/null
@@ -1,384 +0,0 @@
-/**
- * Unit tests for launch tool handler.
- *
- * Tests session launch with various states and error scenarios.
- */
-
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-import { handleLaunch } from './launch.js';
-import type { ExtensionState } from '../../capabilities/types.js';
-import * as sessionManagerModule from '../session-manager.js';
-import type { SessionLaunchResult } from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-import { ErrorCodes } from '../types';
-import type { LaunchInput } from '../types';
-
-describe('handleLaunch', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  describe('successful launch', () => {
-    it('returns session info on successful launch', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-123/home.html',
-        extensionId: 'ext-123',
-        isUnlocked: false,
-        currentScreen: 'home',
-        accountAddress: null,
-        networkName: null,
-        chainId: null,
-        balance: null,
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'test-session-123',
-        extensionId: 'ext-123',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.sessionId).toBe('test-session-123');
-        expect(result.result.extensionId).toBe('ext-123');
-        expect(result.result.state).toStrictEqual(mockState);
-        expect(result.meta.sessionId).toBe('test-session-123');
-      }
-      expect(mockSessionManager.launch).toHaveBeenCalledWith(input);
-    });
-
-    it('includes prerequisites in prod mode', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-456/home.html',
-        extensionId: 'ext-456',
-        isUnlocked: true,
-        currentScreen: 'home',
-        accountAddress: '0x1234',
-        networkName: 'Ethereum Mainnet',
-        chainId: 1,
-        balance: '10 ETH',
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'prod-session-456',
-        extensionId: 'ext-456',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-        environmentMode: 'prod',
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.prerequisites).toBeDefined();
-        expect(result.result.prerequisites).toHaveLength(3);
-        expect(result.result.prerequisites?.[0].step).toBe('Unlock Wallet');
-        expect(result.result.prerequisites?.[1].step).toBe('Configure Network');
-        expect(result.result.prerequisites?.[2].step).toBe('Set Up Accounts');
-      }
-    });
-
-    it('does not include prerequisites in e2e mode', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-123/home.html',
-        extensionId: 'ext-123',
-        isUnlocked: false,
-        currentScreen: 'home',
-        accountAddress: null,
-        networkName: null,
-        chainId: null,
-        balance: null,
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'e2e-session-789',
-        extensionId: 'ext-123',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-        environmentMode: 'e2e',
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.prerequisites).toBeUndefined();
-      }
-    });
-
-    it('passes through all launch input parameters', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-123/home.html',
-        extensionId: 'ext-123',
-        isUnlocked: false,
-        currentScreen: 'home',
-        accountAddress: null,
-        networkName: null,
-        chainId: null,
-        balance: null,
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'custom-session',
-        extensionId: 'ext-123',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = {
-        stateMode: 'custom',
-        fixturePreset: 'test-preset',
-        autoBuild: false,
-        slowMo: 100,
-        goal: 'Test send flow',
-        flowTags: ['send', 'transaction'],
-        tags: ['smoke-test'],
-        seedContracts: ['hst', 'nfts'],
-        ports: {
-          anvil: 8546,
-          fixtureServer: 12346,
-        },
-      };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      expect(mockSessionManager.launch).toHaveBeenCalledWith(input);
-    });
-  });
-
-  describe('session already running', () => {
-    it('returns error when session already active', async () => {
-      const mockSessionManager = createMockSessionManager({
-        hasActive: true,
-        sessionId: 'existing-session-999',
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_SESSION_ALREADY_RUNNING);
-        expect(result.error.message).toBe(
-          'A session is already running. Call mm_cleanup first.',
-        );
-        expect(result.error.details).toStrictEqual({
-          currentSessionId: 'existing-session-999',
-        });
-        expect(result.meta.sessionId).toBe('existing-session-999');
-      }
-      expect(mockSessionManager.launch).not.toHaveBeenCalled();
-    });
-  });
-
-  describe('launch failures', () => {
-    it('returns port conflict error for EADDRINUSE', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(mockSessionManager, 'launch').mockRejectedValue(
-        new Error('listen EADDRINUSE: address already in use :::8545'),
-      );
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE);
-        expect(result.error.message).toContain('Port conflict');
-        expect(result.error.message).toContain('EADDRINUSE');
-        expect(result.error.details).toStrictEqual({ input });
-      }
-    });
-
-    it('returns port conflict error for port keyword in message', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(mockSessionManager, 'launch').mockRejectedValue(
-        new Error('port 8545 is already in use'),
-      );
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE);
-        expect(result.error.message).toContain('Port conflict');
-      }
-    });
-
-    it('returns generic launch failed error for other errors', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(mockSessionManager, 'launch').mockRejectedValue(
-        new Error('Browser failed to start'),
-      );
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED);
-        expect(result.error.message).toContain('Launch failed');
-        expect(result.error.message).toContain('Browser failed to start');
-        expect(result.error.details).toStrictEqual({ input });
-      }
-    });
-
-    it('handles non-Error exceptions', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(mockSessionManager, 'launch').mockRejectedValue('string error');
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED);
-        expect(result.error.message).toContain('Launch failed');
-      }
-    });
-  });
-
-  describe('response metadata', () => {
-    it('includes timestamp in response', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-123/home.html',
-        extensionId: 'ext-123',
-        isUnlocked: false,
-        currentScreen: 'home',
-        accountAddress: null,
-        networkName: null,
-        chainId: null,
-        balance: null,
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'test-session-123',
-        extensionId: 'ext-123',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.meta.timestamp).toBeDefined();
-        expect(typeof result.meta.timestamp).toBe('string');
-        expect(new Date(result.meta.timestamp).getTime()).toBeGreaterThan(0);
-      }
-    });
-
-    it('includes durationMs in response', async () => {
-      const mockState: ExtensionState = {
-        isLoaded: true,
-        currentUrl: 'chrome-extension://ext-123/home.html',
-        extensionId: 'ext-123',
-        isUnlocked: false,
-        currentScreen: 'home',
-        accountAddress: null,
-        networkName: null,
-        chainId: null,
-        balance: null,
-      };
-
-      const mockLaunchResult: SessionLaunchResult = {
-        sessionId: 'test-session-123',
-        extensionId: 'ext-123',
-        state: mockState,
-      };
-
-      const mockSessionManager = createMockSessionManager({
-        hasActive: false,
-        launchResult: mockLaunchResult,
-      });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
-
-      const input: LaunchInput = { stateMode: 'default' };
-
-      const result = await handleLaunch(input);
-
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.meta.durationMs).toBeGreaterThanOrEqual(0);
-        expect(typeof result.meta.durationMs).toBe('number');
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/launch.ts b/src/mcp-server/tools/launch.ts
deleted file mode 100644
index 11b5d3c..0000000
--- a/src/mcp-server/tools/launch.ts
+++ /dev/null
@@ -1,93 +0,0 @@
-import { getSessionManager } from '../session-manager.js';
-import type {
-  LaunchInput,
-  LaunchResult,
-  LaunchPrerequisite,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createSuccessResponse,
-  createErrorResponse,
-  extractErrorMessage,
-} from '../utils';
-
-const PROD_MODE_PREREQUISITES: LaunchPrerequisite[] = [
-  {
-    step: 'Unlock Wallet',
-    description:
-      'The wallet must be unlocked before interacting with it. Use the extension UI to enter your password.',
-  },
-  {
-    step: 'Configure Network',
-    description:
-      'Ensure the correct network is selected (e.g., Ethereum Mainnet, Sepolia, or custom network).',
-  },
-  {
-    step: 'Set Up Accounts',
-    description:
-      'Import or create accounts as needed. Ensure the active account has sufficient funds for transactions.',
-  },
-];
-
-/**
- * Handles the launch tool request to start a browser session.
- *
- * @param input - The launch configuration parameters.
- * @param _options - Handler options (unused).
- * @returns Response with session info or error.
- */
-export async function handleLaunch(
-  input: LaunchInput,
-  _options?: HandlerOptions,
-): Promise<McpResponse<LaunchResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-
-  try {
-    if (sessionManager.hasActiveSession()) {
-      return createErrorResponse(
-        ErrorCodes.MM_SESSION_ALREADY_RUNNING,
-        'A session is already running. Call mm_cleanup first.',
-        { currentSessionId: sessionManager.getSessionId() },
-        sessionManager.getSessionId(),
-        startTime,
-      );
-    }
-
-    const result = await sessionManager.launch(input);
-
-    const isProdMode = sessionManager.getEnvironmentMode() === 'prod';
-    const launchResult: LaunchResult = {
-      ...result,
-      ...(isProdMode && { prerequisites: PROD_MODE_PREREQUISITES }),
-    };
-
-    return createSuccessResponse<LaunchResult>(
-      launchResult,
-      result.sessionId,
-      startTime,
-    );
-  } catch (error) {
-    const message = extractErrorMessage(error);
-
-    if (message.includes('EADDRINUSE') || message.includes('port')) {
-      return createErrorResponse(
-        ErrorCodes.MM_PORT_IN_USE,
-        `Port conflict: ${message}`,
-        { input },
-        undefined,
-        startTime,
-      );
-    }
-
-    return createErrorResponse(
-      ErrorCodes.MM_LAUNCH_FAILED,
-      `Launch failed: ${message}`,
-      { input },
-      undefined,
-      startTime,
-    );
-  }
-}
diff --git a/src/mcp-server/tools/navigation.test.ts b/src/mcp-server/tools/navigation.test.ts
deleted file mode 100644
index 84cbdca..0000000
--- a/src/mcp-server/tools/navigation.test.ts
+++ /dev/null
@@ -1,787 +0,0 @@
-/**
- * Unit tests for navigation tool handlers.
- *
- * Tests handleNavigate, handleWaitForNotification, handleSwitchToTab, and handleCloseTab
- * with various navigation targets, tab operations, and error scenarios.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import {
-  handleNavigate,
-  handleWaitForNotification,
-  handleSwitchToTab,
-  handleCloseTab,
-} from './navigation';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager, createMockPage } from '../test-utils';
-import { ErrorCodes } from '../types';
-
-describe('navigation', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleNavigate', () => {
-    describe('with home screen', () => {
-      it('navigates to home screen', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'navigateToHome').mockResolvedValue(
-          undefined,
-        );
-
-        // Act
-        const result = await handleNavigate({ screen: 'home' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.navigated).toBe(true);
-          expect(result.result.currentUrl).toBe(
-            'chrome-extension://ext-123/home.html',
-          );
-        }
-        expect(mockSessionManager.navigateToHome).toHaveBeenCalled();
-      });
-    });
-
-    describe('with settings screen', () => {
-      it('navigates to settings screen', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/settings.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'navigateToSettings').mockResolvedValue(
-          undefined,
-        );
-
-        // Act
-        const result = await handleNavigate({ screen: 'settings' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.navigated).toBe(true);
-          expect(result.result.currentUrl).toBe(
-            'chrome-extension://ext-123/settings.html',
-          );
-        }
-        expect(mockSessionManager.navigateToSettings).toHaveBeenCalled();
-      });
-    });
-
-    describe('with notification screen', () => {
-      it('navigates to notification screen', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/notification.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(
-          mockSessionManager,
-          'navigateToNotification',
-        ).mockResolvedValue(undefined);
-
-        // Act
-        const result = await handleNavigate({ screen: 'notification' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.navigated).toBe(true);
-          expect(result.result.currentUrl).toBe(
-            'chrome-extension://ext-123/notification.html',
-          );
-        }
-        expect(mockSessionManager.navigateToNotification).toHaveBeenCalled();
-      });
-    });
-
-    describe('with URL screen', () => {
-      it('navigates to custom URL', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue('https://app.uniswap.org');
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'navigateToUrl').mockResolvedValue(
-          mockPage,
-        );
-
-        // Act
-        const result = await handleNavigate({
-          screen: 'url',
-          url: 'https://app.uniswap.org',
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.navigated).toBe(true);
-          expect(result.result.currentUrl).toBe('https://app.uniswap.org');
-        }
-        expect(mockSessionManager.navigateToUrl).toHaveBeenCalledWith(
-          'https://app.uniswap.org',
-        );
-      });
-
-      it('returns error when URL is missing', async () => {
-        // Act
-        const result = await handleNavigate({ screen: 'url' } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('url is required');
-        }
-      });
-    });
-
-    describe('with invalid screen', () => {
-      it('returns error for unknown screen', async () => {
-        // Act
-        const result = await handleNavigate({ screen: 'invalid' } as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain('Unknown screen');
-        }
-      });
-    });
-
-    describe('with navigation errors', () => {
-      it('returns error when navigation fails', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'navigateToHome').mockRejectedValue(
-          new Error('Navigation failed'),
-        );
-
-        // Act
-        const result = await handleNavigate({ screen: 'home' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED);
-        }
-      });
-
-      it('returns error when page closed during navigation', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'navigateToSettings').mockRejectedValue(
-          new Error('Target page, context or browser has been closed'),
-        );
-
-        // Act
-        const result = await handleNavigate({ screen: 'settings' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED);
-          expect(result.error.message).toContain(
-            'Page closed during navigation',
-          );
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleNavigate({ screen: 'home' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-
-  describe('handleWaitForNotification', () => {
-    describe('with default timeout', () => {
-      it('waits for notification popup', async () => {
-        // Arrange
-        const mockNotificationPage = createMockPage();
-        vi.spyOn(mockNotificationPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/notification.html',
-        );
-        vi.spyOn(
-          mockSessionManager,
-          'waitForNotificationPage',
-        ).mockResolvedValue(mockNotificationPage);
-
-        // Act
-        const result = await handleWaitForNotification({});
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.found).toBe(true);
-          expect(result.result.pageUrl).toBe(
-            'chrome-extension://ext-123/notification.html',
-          );
-        }
-        expect(mockSessionManager.waitForNotificationPage).toHaveBeenCalledWith(
-          15000,
-        );
-      });
-    });
-
-    describe('with custom timeout', () => {
-      it('uses custom timeout value', async () => {
-        // Arrange
-        const mockNotificationPage = createMockPage();
-        vi.spyOn(mockNotificationPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/notification.html',
-        );
-        vi.spyOn(
-          mockSessionManager,
-          'waitForNotificationPage',
-        ).mockResolvedValue(mockNotificationPage);
-
-        // Act
-        const result = await handleWaitForNotification({ timeoutMs: 30000 });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.found).toBe(true);
-        }
-        expect(mockSessionManager.waitForNotificationPage).toHaveBeenCalledWith(
-          30000,
-        );
-      });
-    });
-
-    describe('with timeout errors', () => {
-      it('returns error when notification not found within timeout', async () => {
-        // Arrange
-        vi.spyOn(
-          mockSessionManager,
-          'waitForNotificationPage',
-        ).mockRejectedValue(new Error('Timeout 15000ms exceeded'));
-
-        // Act
-        const result = await handleWaitForNotification({});
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT);
-        }
-      });
-
-      it('returns error when browser closed during wait', async () => {
-        // Arrange
-        vi.spyOn(
-          mockSessionManager,
-          'waitForNotificationPage',
-        ).mockRejectedValue(new Error('browser has been closed'));
-
-        // Act
-        const result = await handleWaitForNotification({});
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT);
-          expect(result.error.message).toContain(
-            'Browser closed while waiting for notification',
-          );
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleWaitForNotification({});
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-
-  describe('handleSwitchToTab', () => {
-    describe('with role matching', () => {
-      it('switches to tab by role', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue(
-          undefined,
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org',
-        );
-        vi.spyOn(mockDappPage, 'bringToFront').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage);
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org',
-          },
-        ]);
-        vi.spyOn(mockSessionManager, 'setActivePage');
-
-        // Act
-        const result = await handleSwitchToTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.switched).toBe(true);
-          expect(result.result.activeTab.role).toBe('dapp');
-          expect(result.result.activeTab.url).toBe('https://app.uniswap.org');
-        }
-        expect(mockDappPage.bringToFront).toHaveBeenCalled();
-        expect(mockSessionManager.setActivePage).toHaveBeenCalledWith(
-          mockDappPage,
-        );
-      });
-    });
-
-    describe('with URL matching', () => {
-      it('switches to tab by URL prefix', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue(
-          undefined,
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org/swap',
-        );
-        vi.spyOn(mockDappPage, 'bringToFront').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage);
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org/swap',
-          },
-        ]);
-        vi.spyOn(mockSessionManager, 'setActivePage');
-
-        // Act
-        const result = await handleSwitchToTab({
-          url: 'https://app.uniswap.org',
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.switched).toBe(true);
-          expect(result.result.activeTab.url).toBe(
-            'https://app.uniswap.org/swap',
-          );
-        }
-        expect(mockDappPage.bringToFront).toHaveBeenCalled();
-      });
-    });
-
-    describe('with invalid input', () => {
-      it('returns error when neither role nor url provided', async () => {
-        // Act
-        const result = await handleSwitchToTab({} as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain(
-            'Either role or url must be provided',
-          );
-        }
-      });
-    });
-
-    describe('with tab not found', () => {
-      it('returns error when no matching tab found by role', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        // Act
-        const result = await handleSwitchToTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
-          expect(result.error.message).toContain('No tab found matching: dapp');
-        }
-      });
-
-      it('returns error when no matching tab found by URL', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        // Act
-        const result = await handleSwitchToTab({
-          url: 'https://app.uniswap.org',
-        });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleSwitchToTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-
-  describe('handleCloseTab', () => {
-    describe('with role matching', () => {
-      it('closes tab by role', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org',
-        );
-        vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(
-          mockExtensionPage,
-        );
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org',
-          },
-        ]);
-
-        // Act
-        const result = await handleCloseTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.closed).toBe(true);
-          expect(result.result.closedUrl).toBe('https://app.uniswap.org');
-        }
-        expect(mockDappPage.close).toHaveBeenCalled();
-      });
-    });
-
-    describe('with URL matching', () => {
-      it('closes tab by URL prefix', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org/swap',
-        );
-        vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(
-          mockExtensionPage,
-        );
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org/swap',
-          },
-        ]);
-
-        // Act
-        const result = await handleCloseTab({ url: 'https://app.uniswap.org' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.closed).toBe(true);
-          expect(result.result.closedUrl).toBe('https://app.uniswap.org/swap');
-        }
-        expect(mockDappPage.close).toHaveBeenCalled();
-      });
-    });
-
-    describe('with active tab closure', () => {
-      it('switches to extension tab when closing active tab', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue(
-          undefined,
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org',
-        );
-        vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage);
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org',
-          },
-        ]);
-        vi.spyOn(mockSessionManager, 'setActivePage');
-
-        // Act
-        const result = await handleCloseTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.closed).toBe(true);
-        }
-        expect(mockExtensionPage.bringToFront).toHaveBeenCalled();
-        expect(mockSessionManager.setActivePage).toHaveBeenCalledWith(
-          mockExtensionPage,
-        );
-        expect(mockDappPage.close).toHaveBeenCalled();
-      });
-
-      it('does not switch when closing non-active tab', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue(
-          undefined,
-        );
-
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org',
-        );
-        vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined);
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(
-          mockExtensionPage,
-        );
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org',
-          },
-        ]);
-        vi.spyOn(mockSessionManager, 'setActivePage');
-
-        // Act
-        const result = await handleCloseTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        expect(mockExtensionPage.bringToFront).not.toHaveBeenCalled();
-        expect(mockSessionManager.setActivePage).not.toHaveBeenCalled();
-        expect(mockDappPage.close).toHaveBeenCalled();
-      });
-    });
-
-    describe('with invalid input', () => {
-      it('returns error when neither role nor url provided', async () => {
-        // Act
-        const result = await handleCloseTab({} as any);
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
-          expect(result.error.message).toContain(
-            'Either role or url must be provided',
-          );
-        }
-      });
-    });
-
-    describe('with tab not found', () => {
-      it('returns error when no matching tab found by role', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        // Act
-        const result = await handleCloseTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
-          expect(result.error.message).toContain('No tab found matching: dapp');
-        }
-      });
-
-      it('returns error when no matching tab found by URL', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        // Act
-        const result = await handleCloseTab({ url: 'https://app.uniswap.org' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
-        }
-      });
-    });
-
-    describe('without active session', () => {
-      it('returns error when no session active', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleCloseTab({ role: 'dapp' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/tools/navigation.ts b/src/mcp-server/tools/navigation.ts
deleted file mode 100644
index 83a59a2..0000000
--- a/src/mcp-server/tools/navigation.ts
+++ /dev/null
@@ -1,329 +0,0 @@
-import { DEFAULT_INTERACTION_TIMEOUT_MS } from '../constants.js';
-import { getSessionManager } from '../session-manager.js';
-import {
-  classifyNavigationError,
-  classifyTabError,
-  classifyNotificationError,
-} from './error-classification.js';
-import { runTool } from './run-tool.js';
-import type {
-  NavigateInput,
-  NavigateResult,
-  WaitForNotificationInput,
-  WaitForNotificationResult,
-  SwitchToTabInput,
-  SwitchToTabResult,
-  CloseTabInput,
-  CloseTabResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import { createErrorResponse } from '../utils';
-
-/**
- * Handles navigation to a specific screen or URL.
- *
- * @param input The navigate input containing target screen and optional URL
- * @param options Optional handler configuration
- * @returns Promise resolving to navigate result with current URL information
- */
-export async function handleNavigate(
-  input: NavigateInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<NavigateResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-
-  if (input.screen === 'url' && !input.url) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'url is required when screen is "url"',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  const validScreens = ['home', 'settings', 'url', 'notification'];
-  if (!validScreens.includes(input.screen)) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      `Unknown screen: ${String(input.screen)}`,
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  return runTool<NavigateInput, NavigateResult>({
-    toolName: 'mm_navigate',
-    input,
-    options,
-
-    /**
-     * Executes the navigation action to the target screen.
-     *
-     * @param context The tool execution context containing page and reference map
-     * @returns Promise resolving to navigate result with success status and URL
-     */
-    execute: async (context) => {
-      switch (input.screen) {
-        case 'home':
-          await sessionManager.navigateToHome();
-          break;
-        case 'settings':
-          await sessionManager.navigateToSettings();
-          break;
-        case 'url':
-          await sessionManager.navigateToUrl(input.url as string);
-          break;
-        case 'notification':
-          await sessionManager.navigateToNotification();
-          break;
-        default:
-          throw new Error(`Unsupported screen: ${String(input.screen)}`);
-      }
-
-      return {
-        navigated: true,
-        currentUrl: context.page.url(),
-      };
-    },
-
-    classifyError: classifyNavigationError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with screen and URL information
-     */
-    sanitizeInputForRecording: () => ({
-      screen: input.screen,
-      url: input.url,
-    }),
-  });
-}
-
-/**
- * Handles waiting for a notification popup to appear.
- *
- * @param input The wait input containing timeout options
- * @param options Optional handler configuration
- * @returns Promise resolving to wait result with notification page URL
- */
-export async function handleWaitForNotification(
-  input: WaitForNotificationInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<WaitForNotificationResult>> {
-  const sessionManager = getSessionManager();
-  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
-
-  return runTool<WaitForNotificationInput, WaitForNotificationResult>({
-    toolName: 'mm_wait_for_notification',
-    input,
-    options,
-
-    /**
-     * Executes the wait action for notification popup.
-     *
-     * @returns Promise resolving to wait result with notification page URL
-     */
-    execute: async () => {
-      const notificationPage =
-        await sessionManager.waitForNotificationPage(timeoutMs);
-      const pageUrl = notificationPage.url();
-
-      return {
-        found: true,
-        pageUrl,
-      };
-    },
-
-    classifyError: classifyNotificationError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with timeout information
-     */
-    sanitizeInputForRecording: () => ({ timeoutMs }),
-  });
-}
-
-/**
- * Handles switching to a different tab by role or URL.
- *
- * @param input The switch input containing tab role or URL to match
- * @param options Optional handler configuration
- * @returns Promise resolving to switch result with active tab information
- */
-export async function handleSwitchToTab(
-  input: SwitchToTabInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<SwitchToTabResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-
-  if (!input.role && !input.url) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'Either role or url must be provided',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  return runTool<SwitchToTabInput, SwitchToTabResult>({
-    toolName: 'mm_switch_to_tab',
-    input,
-    options,
-
-    /**
-     * Executes the tab switch action.
-     *
-     * @param _context The tool execution context containing page and reference map
-     * @returns Promise resolving to switch result with active tab information
-     */
-    execute: async (_context) => {
-      const trackedPages = sessionManager.getTrackedPages();
-      const targetPage = trackedPages.find((trackedPage) => {
-        if (input.role) {
-          return trackedPage.role === input.role;
-        }
-        if (input.url) {
-          return trackedPage.url.startsWith(input.url);
-        }
-        return false;
-      });
-
-      if (!targetPage) {
-        const availableTabs = trackedPages.map((trackedPage) => ({
-          role: trackedPage.role,
-          url: trackedPage.url,
-        }));
-        throw new Error(
-          `No tab found matching: ${input.role ?? input.url}. Available tabs: ${JSON.stringify(availableTabs)}`,
-        );
-      }
-
-      await targetPage.page.bringToFront();
-      sessionManager.setActivePage(targetPage.page);
-
-      const updatedTrackedPages = sessionManager.getTrackedPages();
-      const activeTabInfo = updatedTrackedPages.find(
-        (trackedPage) => trackedPage.page === targetPage.page,
-      );
-
-      return {
-        switched: true,
-        activeTab: {
-          role: activeTabInfo?.role ?? 'other',
-          url: targetPage.page.url(),
-        },
-      };
-    },
-
-    classifyError: classifyTabError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with role and URL information
-     */
-    sanitizeInputForRecording: () => ({
-      role: input.role,
-      url: input.url,
-    }),
-  });
-}
-
-/**
- * Handles closing a tab by role or URL.
- *
- * @param input The close input containing tab role or URL to match
- * @param options Optional handler configuration
- * @returns Promise resolving to close result with closed tab URL
- */
-export async function handleCloseTab(
-  input: CloseTabInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<CloseTabResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-
-  if (!input.role && !input.url) {
-    return createErrorResponse(
-      ErrorCodes.MM_INVALID_INPUT,
-      'Either role or url must be provided',
-      { input },
-      sessionId,
-      startTime,
-    );
-  }
-
-  return runTool<CloseTabInput, CloseTabResult>({
-    toolName: 'mm_close_tab',
-    input,
-    options,
-
-    /**
-     * Executes the tab close action.
-     *
-     * @param context The tool execution context containing page and reference map
-     * @returns Promise resolving to close result with closed tab URL
-     */
-    execute: async (context) => {
-      const trackedPages = sessionManager.getTrackedPages();
-      const targetPage = trackedPages.find((trackedPage) => {
-        if (input.role) {
-          return trackedPage.role === input.role;
-        }
-        if (input.url) {
-          return trackedPage.url.startsWith(input.url);
-        }
-        return false;
-      });
-
-      if (!targetPage) {
-        throw new Error(`No tab found matching: ${input.role ?? input.url}`);
-      }
-
-      const closedUrl = targetPage.url;
-
-      const currentActivePage = context.page;
-      if (targetPage.page === currentActivePage) {
-        const extensionPage = trackedPages.find(
-          (trackedPage) => trackedPage.role === 'extension',
-        );
-        if (extensionPage) {
-          await extensionPage.page.bringToFront();
-          sessionManager.setActivePage(extensionPage.page);
-        }
-      }
-
-      await targetPage.page.close();
-
-      return {
-        closed: true,
-        closedUrl,
-      };
-    },
-
-    classifyError: classifyTabError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object with role and URL information
-     */
-    sanitizeInputForRecording: () => ({
-      role: input.role,
-      url: input.url,
-    }),
-  });
-}
diff --git a/src/mcp-server/tools/registry.test.ts b/src/mcp-server/tools/registry.test.ts
deleted file mode 100644
index 084c489..0000000
--- a/src/mcp-server/tools/registry.test.ts
+++ /dev/null
@@ -1,156 +0,0 @@
-import { describe, it, expect } from 'vitest';
-
-import {
-  getToolHandler,
-  hasToolHandler,
-  buildToolHandlersRecord,
-  toolHandlers,
-} from './registry.js';
-
-describe('tool registry', () => {
-  describe('getToolHandler', () => {
-    it('returns handler for prefixed tool name', () => {
-      const handler = getToolHandler('mm_launch');
-
-      expect(handler).toBeDefined();
-      expect(typeof handler).toBe('function');
-    });
-
-    it('returns handler for base tool name', () => {
-      const handler = getToolHandler('launch');
-
-      expect(handler).toBeDefined();
-      expect(typeof handler).toBe('function');
-    });
-
-    it('returns undefined for unknown tool', () => {
-      const handler = getToolHandler('mm_unknown_tool');
-
-      expect(handler).toBeUndefined();
-    });
-
-    it('returns undefined for empty string', () => {
-      const handler = getToolHandler('');
-
-      expect(handler).toBeUndefined();
-    });
-
-    it('returns different handlers for different tools', () => {
-      const launchHandler = getToolHandler('mm_launch');
-      const cleanupHandler = getToolHandler('mm_cleanup');
-
-      expect(launchHandler).not.toBe(cleanupHandler);
-    });
-  });
-
-  describe('hasToolHandler', () => {
-    it('returns true for existing prefixed tool', () => {
-      const result = hasToolHandler('mm_click');
-
-      expect(result).toBe(true);
-    });
-
-    it('returns true for existing base tool', () => {
-      const result = hasToolHandler('click');
-
-      expect(result).toBe(true);
-    });
-
-    it('returns false for non-existent tool', () => {
-      const result = hasToolHandler('mm_nonexistent');
-
-      expect(result).toBe(false);
-    });
-
-    it('returns false for empty string', () => {
-      const result = hasToolHandler('');
-
-      expect(result).toBe(false);
-    });
-  });
-
-  describe('buildToolHandlersRecord', () => {
-    it('returns record with prefixed tool names', () => {
-      const handlers = buildToolHandlersRecord();
-
-      expect(handlers.mm_launch).toBeDefined();
-      expect(handlers.mm_cleanup).toBeDefined();
-      expect(handlers.mm_click).toBeDefined();
-      expect(handlers.mm_type).toBeDefined();
-    });
-
-    it('returns fresh record on each call', () => {
-      const handlers1 = buildToolHandlersRecord();
-      const handlers2 = buildToolHandlersRecord();
-
-      expect(handlers1).not.toBe(handlers2);
-      expect(handlers1).toStrictEqual(handlers2);
-    });
-
-    it('includes all 27 tools', () => {
-      const handlers = buildToolHandlersRecord();
-
-      expect(Object.keys(handlers)).toHaveLength(27);
-    });
-
-    it('all handlers are functions', () => {
-      const handlers = buildToolHandlersRecord();
-
-      for (const handler of Object.values(handlers)) {
-        expect(typeof handler).toBe('function');
-      }
-    });
-  });
-
-  describe('toolHandlers export', () => {
-    it('exports pre-built handlers record', () => {
-      expect(toolHandlers).toBeDefined();
-      expect(typeof toolHandlers).toBe('object');
-    });
-
-    it('contains all expected tools', () => {
-      const expectedTools = [
-        'mm_build',
-        'mm_launch',
-        'mm_cleanup',
-        'mm_get_state',
-        'mm_navigate',
-        'mm_wait_for_notification',
-        'mm_switch_to_tab',
-        'mm_close_tab',
-        'mm_list_testids',
-        'mm_accessibility_snapshot',
-        'mm_describe_screen',
-        'mm_screenshot',
-        'mm_click',
-        'mm_type',
-        'mm_wait_for',
-        'mm_knowledge_last',
-        'mm_knowledge_search',
-        'mm_knowledge_summarize',
-        'mm_knowledge_sessions',
-        'mm_seed_contract',
-        'mm_seed_contracts',
-        'mm_get_contract_address',
-        'mm_list_contracts',
-        'mm_run_steps',
-        'mm_set_context',
-        'mm_get_context',
-        'mm_clipboard',
-      ];
-
-      for (const tool of expectedTools) {
-        expect(toolHandlers[tool]).toBeDefined();
-        expect(typeof toolHandlers[tool]).toBe('function');
-      }
-    });
-
-    it('matches buildToolHandlersRecord output', () => {
-      const freshHandlers = buildToolHandlersRecord();
-
-      expect(Object.keys(toolHandlers)).toStrictEqual(
-        Object.keys(freshHandlers),
-      );
-    });
-  });
-});
diff --git a/src/mcp-server/tools/registry.ts b/src/mcp-server/tools/registry.ts
deleted file mode 100644
index 3b67886..0000000
--- a/src/mcp-server/tools/registry.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-import { buildToolHandlersRecord } from './definitions.js';
-
-export {
-  getToolHandler,
-  hasToolHandler,
-  buildToolHandlersRecord,
-} from './definitions.js';
-export type { ToolHandler } from './batch.js';
-
-export const toolHandlers = buildToolHandlersRecord();
diff --git a/src/mcp-server/tools/run-tool.test.ts b/src/mcp-server/tools/run-tool.test.ts
deleted file mode 100644
index 3592062..0000000
--- a/src/mcp-server/tools/run-tool.test.ts
+++ /dev/null
@@ -1,958 +0,0 @@
-/**
- * Unit tests for the generic tool execution wrapper (runTool).
- *
- * Tests execution flow, observation collection policies, knowledge store recording,
- * error classification, timeout handling, and page closure detection.
- */
-
-import type { Page } from '@playwright/test';
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import { runTool } from './run-tool';
-import type { ToolExecutionConfig } from './run-tool';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types';
-import * as helpersModule from './helpers.js';
-
-describe('runTool', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-  let mockKnowledgeStore: {
-    recordStep: ReturnType<typeof vi.fn>;
-    getLastSteps: ReturnType<typeof vi.fn>;
-    searchSteps: ReturnType<typeof vi.fn>;
-    summarizeSession: ReturnType<typeof vi.fn>;
-    listSessions: ReturnType<typeof vi.fn>;
-    generatePriorKnowledge: ReturnType<typeof vi.fn>;
-    writeSessionMetadata: ReturnType<typeof vi.fn>;
-  };
-  let mockPage: Page;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      environmentMode: 'e2e',
-    });
-    mockPage = {
-      url: () => 'chrome-extension://test/home.html',
-      isClosed: () => false,
-    } as unknown as Page;
-    vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-    vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map());
-
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    mockKnowledgeStore = {
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    };
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue(
-      mockKnowledgeStore as any,
-    );
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('basic execution', () => {
-    it('executes tool and returns success response', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<{ value: string }, string> = {
-        toolName: 'mm_test_tool',
-        input: { value: 'test-input' },
-        execute: vi.fn().mockResolvedValue('success'),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toBe('success');
-        expect(result.meta.sessionId).toBe('test-session-123');
-        expect(result.meta.durationMs).toBeGreaterThanOrEqual(0);
-      }
-    });
-
-    it('passes context to execute function', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const executeFn = vi.fn().mockResolvedValue({ result: 'ok' });
-      const config: ToolExecutionConfig<{ value: string }, { result: string }> =
-        {
-          toolName: 'mm_test_tool',
-          input: { value: 'test' },
-          execute: executeFn,
-        };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(executeFn).toHaveBeenCalledWith({
-        sessionId: 'test-session-123',
-        page: mockPage,
-        refMap: expect.any(Map),
-        startTime: expect.any(Number),
-      });
-    });
-
-    it('handles ToolExecuteResult with custom observation', async () => {
-      // Arrange
-      const customObservation = {
-        state: { isLoaded: true } as any,
-        testIds: [{ testId: 'custom', tag: 'div', text: '', visible: true }],
-        a11y: { nodes: [] },
-      };
-      const config: ToolExecutionConfig<object, { data: string }> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        observationPolicy: 'custom',
-        execute: vi.fn().mockResolvedValue({
-          result: { data: 'test' },
-          observation: customObservation,
-        }),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toStrictEqual({ data: 'test' });
-      }
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          observation: customObservation,
-        }),
-      );
-    });
-  });
-
-  describe('session validation', () => {
-    it('returns error when no active session and requiresSession is true', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        requiresSession: true,
-        execute: vi.fn(),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        expect(result.error.message).toBe(
-          'No active session. Call launch first.',
-        );
-      }
-      expect(config.execute).not.toHaveBeenCalled();
-    });
-
-    it('executes tool when no active session but requiresSession is false', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-      const executeFn = vi.fn().mockResolvedValue({ done: true });
-      const config: ToolExecutionConfig<object, { done: boolean }> = {
-        toolName: 'mm_build',
-        input: {},
-        requiresSession: false,
-        execute: executeFn,
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      expect(executeFn).toHaveBeenCalled();
-    });
-
-    it('defaults requiresSession to true when not specified', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        execute: vi.fn(),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-      }
-    });
-  });
-
-  describe('observation policies', () => {
-    describe('policy: none', () => {
-      it('collects minimal observation on success', async () => {
-        // Arrange
-        const collectObservationSpy = vi
-          .spyOn(helpersModule, 'collectObservation')
-          .mockResolvedValue({
-            state: {} as any,
-            testIds: [],
-            a11y: { nodes: [] },
-          });
-        const config: ToolExecutionConfig<object, object> = {
-          toolName: 'mm_test_tool',
-          input: {},
-          observationPolicy: 'none',
-          execute: vi.fn().mockResolvedValue({}),
-        };
-
-        // Act
-        await runTool(config);
-
-        // Assert
-        expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal');
-      });
-    });
-
-    describe('policy: default', () => {
-      it('collects full observation on success', async () => {
-        // Arrange
-        const collectObservationSpy = vi
-          .spyOn(helpersModule, 'collectObservation')
-          .mockResolvedValue({
-            state: {} as any,
-            testIds: [],
-            a11y: { nodes: [] },
-          });
-        const config: ToolExecutionConfig<object, object> = {
-          toolName: 'mm_test_tool',
-          input: {},
-          observationPolicy: 'default',
-          execute: vi.fn().mockResolvedValue({}),
-        };
-
-        // Act
-        await runTool(config);
-
-        // Assert
-        expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full');
-      });
-    });
-
-    describe('policy: failures', () => {
-      it('collects minimal observation on success', async () => {
-        // Arrange
-        const collectObservationSpy = vi
-          .spyOn(helpersModule, 'collectObservation')
-          .mockResolvedValue({
-            state: {} as any,
-            testIds: [],
-            a11y: { nodes: [] },
-          });
-        const config: ToolExecutionConfig<object, object> = {
-          toolName: 'mm_test_tool',
-          input: {},
-          observationPolicy: 'failures',
-          execute: vi.fn().mockResolvedValue({}),
-        };
-
-        // Act
-        await runTool(config);
-
-        // Assert
-        expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal');
-      });
-
-      it('collects full observation on failure', async () => {
-        // Arrange
-        const collectObservationSpy = vi
-          .spyOn(helpersModule, 'collectObservation')
-          .mockResolvedValue({
-            state: {} as any,
-            testIds: [],
-            a11y: { nodes: [] },
-          });
-        const config: ToolExecutionConfig<object, object> = {
-          toolName: 'mm_test_tool',
-          input: {},
-          observationPolicy: 'failures',
-          execute: vi.fn().mockRejectedValue(new Error('Test failure')),
-        };
-
-        // Act
-        await runTool(config);
-
-        // Assert
-        expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full');
-      });
-    });
-
-    describe('policy: custom', () => {
-      it('uses observation from execute result', async () => {
-        // Arrange
-        const customObservation = {
-          state: { isLoaded: true } as any,
-          testIds: [],
-          a11y: {
-            nodes: [{ ref: 'e1', role: 'button', name: 'Test', path: [] }],
-          },
-        };
-        const collectObservationSpy = vi.spyOn(
-          helpersModule,
-          'collectObservation',
-        );
-        const config: ToolExecutionConfig<object, { data: string }> = {
-          toolName: 'mm_test_tool',
-          input: {},
-          observationPolicy: 'custom',
-          execute: vi.fn().mockResolvedValue({
-            result: { data: 'test' },
-            observation: customObservation,
-          }),
-        };
-
-        // Act
-        await runTool(config);
-
-        // Assert
-        expect(collectObservationSpy).not.toHaveBeenCalled();
-        expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith(
-          expect.objectContaining({
-            observation: customObservation,
-          }),
-        );
-      });
-    });
-
-    it('uses options.observationPolicy over config.observationPolicy', async () => {
-      // Arrange
-      const collectObservationSpy = vi
-        .spyOn(helpersModule, 'collectObservation')
-        .mockResolvedValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        observationPolicy: 'default',
-        options: { observationPolicy: 'none' },
-        execute: vi.fn().mockResolvedValue({}),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal');
-    });
-
-    it('skips observation collection when requiresSession is false', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-      const collectObservationSpy = vi.spyOn(
-        helpersModule,
-        'collectObservation',
-      );
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_build',
-        input: {},
-        requiresSession: false,
-        observationPolicy: 'default',
-        execute: vi.fn().mockResolvedValue({}),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(collectObservationSpy).not.toHaveBeenCalled();
-    });
-  });
-
-  describe('knowledge store recording', () => {
-    it('records successful step with all parameters', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<
-        { testId: string },
-        { clicked: boolean }
-      > = {
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        execute: vi.fn().mockResolvedValue({ clicked: true }),
-        getTarget: (input) => ({ testId: input.testId }),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith({
-        sessionId: 'test-session-123',
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        target: { testId: 'send-button' },
-        outcome: { ok: true },
-        observation: expect.any(Object),
-        durationMs: expect.any(Number),
-        context: 'e2e',
-      });
-    });
-
-    it('records failed step with error details', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<{ testId: string }, object> = {
-        toolName: 'mm_click',
-        input: { testId: 'missing-button' },
-        execute: vi.fn().mockRejectedValue(new Error('Element not found')),
-        getTarget: (input) => ({ testId: input.testId }),
-        classifyError: () => ({
-          code: 'MM_TARGET_NOT_FOUND',
-          message: 'Element not found',
-        }),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith({
-        sessionId: 'test-session-123',
-        toolName: 'mm_click',
-        input: { testId: 'missing-button' },
-        target: { testId: 'missing-button' },
-        outcome: {
-          ok: false,
-          error: { code: 'MM_TARGET_NOT_FOUND', message: 'Element not found' },
-        },
-        observation: expect.any(Object),
-        durationMs: expect.any(Number),
-        context: 'e2e',
-      });
-    });
-
-    it('uses sanitizeInputForRecording when provided', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<
-        { action: string; text: string },
-        { success: boolean }
-      > = {
-        toolName: 'mm_clipboard',
-        input: { action: 'write', text: 'secret-srp-phrase' },
-        execute: vi.fn().mockResolvedValue({ success: true }),
-        sanitizeInputForRecording: (input) => ({
-          action: input.action,
-          textLength: input.text.length,
-        }),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          input: { action: 'write', textLength: 17 },
-        }),
-      );
-    });
-
-    it('skips recording when sessionId is undefined', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined);
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockResolvedValue({}),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).not.toHaveBeenCalled();
-    });
-  });
-
-  describe('error classification', () => {
-    it('uses classifyError when provided', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        execute: vi
-          .fn()
-          .mockRejectedValue(new Error('Timeout waiting for selector')),
-        classifyError: () => ({
-          code: 'MM_WAIT_TIMEOUT',
-          message: 'Element wait timeout',
-        }),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_WAIT_TIMEOUT');
-        expect(result.error.message).toBe('Element wait timeout');
-      }
-    });
-
-    it('generates default error code when classifyError not provided', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_my_tool',
-        input: {},
-        execute: vi.fn().mockRejectedValue(new Error('Something went wrong')),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_MY_TOOL_FAILED');
-        expect(result.error.message).toBe('Something went wrong');
-      }
-    });
-
-    it('removes MM_ prefix when generating default error code', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        execute: vi.fn().mockRejectedValue(new Error('Click failed')),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe('MM_CLICK_FAILED');
-      }
-    });
-  });
-
-  describe('error handling', () => {
-    it('returns error response when execute throws', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<{ testId: string }, object> = {
-        toolName: 'mm_click',
-        input: { testId: 'test-button' },
-        execute: vi.fn().mockRejectedValue(new Error('Execution failed')),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.message).toBe('Execution failed');
-        expect(result.error.details).toStrictEqual({
-          input: { testId: 'test-button' },
-        });
-      }
-    });
-
-    it('collects full observation on failure with default policy', async () => {
-      // Arrange
-      const collectObservationSpy = vi
-        .spyOn(helpersModule, 'collectObservation')
-        .mockResolvedValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        observationPolicy: 'default',
-        execute: vi.fn().mockRejectedValue(new Error('Failed')),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full');
-    });
-
-    it('collects minimal observation on failure with none policy', async () => {
-      // Arrange
-      const collectObservationSpy = vi
-        .spyOn(helpersModule, 'collectObservation')
-        .mockResolvedValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        observationPolicy: 'none',
-        execute: vi.fn().mockRejectedValue(new Error('Failed')),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(collectObservationSpy).toHaveBeenCalledWith(undefined, 'minimal');
-    });
-
-    it('handles observation collection failure gracefully', async () => {
-      // Arrange
-      const collectObservationSpy = vi
-        .spyOn(helpersModule, 'collectObservation')
-        .mockRejectedValueOnce(new Error('Page closed'))
-        .mockResolvedValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        observationPolicy: 'failures',
-        execute: vi.fn().mockRejectedValue(new Error('Execution failed')),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.message).toBe('Execution failed');
-      }
-      expect(collectObservationSpy).toHaveBeenCalled();
-    });
-  });
-
-  describe('page closure detection', () => {
-    it('creates empty observation when page is closed during failure handling', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true);
-      const collectObservationSpy = vi
-        .spyOn(helpersModule, 'collectObservation')
-        .mockRejectedValueOnce(
-          new Error('Target page, context or browser has been closed'),
-        )
-        .mockResolvedValue({
-          state: {} as any,
-          testIds: [],
-          a11y: { nodes: [] },
-        });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_click',
-        input: {},
-        observationPolicy: 'default',
-        execute: vi.fn().mockRejectedValue(new Error('Click failed')),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(collectObservationSpy).toHaveBeenCalledTimes(2);
-      expect(collectObservationSpy).toHaveBeenLastCalledWith(
-        undefined,
-        'minimal',
-      );
-    });
-  });
-
-  describe('timeout handling', () => {
-    it('includes duration in response even on timeout error', async () => {
-      // Arrange
-      vi.useFakeTimers();
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_wait_for',
-        input: {},
-        execute: vi.fn().mockImplementation(async () => {
-          await new Promise((resolve) => setTimeout(resolve, 100));
-          throw new Error('Timeout waiting for element');
-        }),
-        classifyError: () => ({
-          code: 'MM_WAIT_TIMEOUT',
-          message: 'Wait timeout',
-        }),
-      };
-
-      // Act
-      const resultPromise = runTool(config);
-      await vi.advanceTimersByTimeAsync(100);
-      const result = await resultPromise;
-
-      // Assert
-      expect(result.ok).toBe(false);
-      expect(result.meta.durationMs).toBe(100);
-
-      // Cleanup
-      vi.useRealTimers();
-    });
-  });
-
-  describe('getTarget function', () => {
-    it('extracts target from input when getTarget provided', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<
-        { testId?: string; selector?: string; a11yRef?: string },
-        object
-      > = {
-        toolName: 'mm_click',
-        input: { testId: 'send-button', selector: '.btn' },
-        execute: vi.fn().mockResolvedValue({}),
-        getTarget: (input) => ({
-          testId: input.testId,
-          selector: input.selector,
-          a11yRef: input.a11yRef,
-        }),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          target: {
-            testId: 'send-button',
-            selector: '.btn',
-            a11yRef: undefined,
-          },
-        }),
-      );
-    });
-
-    it('records undefined target when getTarget not provided', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<{ testId: string }, object> = {
-        toolName: 'mm_click',
-        input: { testId: 'send-button' },
-        execute: vi.fn().mockResolvedValue({}),
-      };
-
-      // Act
-      await runTool(config);
-
-      // Assert
-      expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith(
-        expect.objectContaining({
-          target: undefined,
-        }),
-      );
-    });
-  });
-
-  describe('isToolExecuteResult type guard', () => {
-    it('handles plain result (not ToolExecuteResult)', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, { simple: string }> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockResolvedValue({ simple: 'value' }),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toStrictEqual({ simple: 'value' });
-      }
-    });
-
-    it('handles ToolExecuteResult wrapper', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, { wrapped: string }> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockResolvedValue({
-          result: { wrapped: 'value' },
-        }),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toStrictEqual({ wrapped: 'value' });
-      }
-    });
-
-    it('handles null result', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, null> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockResolvedValue(null),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toBeNull();
-      }
-    });
-
-    it('handles primitive result', async () => {
-      // Arrange
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, string> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockResolvedValue('string-result'),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result).toBe('string-result');
-      }
-    });
-  });
-
-  describe('createEmptyObservation', () => {
-    it('creates empty observation when session has no ID on failure', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined);
-      vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({
-        state: {} as any,
-        testIds: [],
-        a11y: { nodes: [] },
-      });
-      const config: ToolExecutionConfig<object, object> = {
-        toolName: 'mm_test_tool',
-        input: {},
-        execute: vi.fn().mockRejectedValue(new Error('Failed')),
-      };
-
-      // Act
-      const result = await runTool(config);
-
-      // Assert
-      expect(result.ok).toBe(false);
-      expect(mockKnowledgeStore.recordStep).not.toHaveBeenCalled();
-    });
-  });
-});
diff --git a/src/mcp-server/tools/run-tool.ts b/src/mcp-server/tools/run-tool.ts
deleted file mode 100644
index d74c206..0000000
--- a/src/mcp-server/tools/run-tool.ts
+++ /dev/null
@@ -1,220 +0,0 @@
-import type { Page } from '@playwright/test';
-
-import type { ExtensionState } from '../../capabilities/types.js';
-import { knowledgeStore } from '../knowledge-store.js';
-import { getSessionManager } from '../session-manager.js';
-import { collectObservation } from './helpers.js';
-import type {
-  McpResponse,
-  HandlerOptions,
-  StepRecordObservation,
-  ErrorCode,
-} from '../types';
-import { ErrorCodes } from '../types';
-import {
-  createSuccessResponse,
-  createErrorResponse,
-  extractErrorMessage,
-  debugWarn,
-} from '../utils';
-
-/**
- * Creates an empty observation object for step recording.
- *
- * @returns Empty observation with default state, testIds, and a11y nodes
- */
-function createEmptyObservation(): StepRecordObservation {
-  return {
-    state: {} as ExtensionState,
-    testIds: [],
-    a11y: { nodes: [] },
-  };
-}
-
-export type ObservationPolicy = 'none' | 'default' | 'custom' | 'failures';
-
-export type ToolExecutionContext = {
-  sessionId: string | undefined;
-  page: Page;
-  refMap: Map<string, string>;
-  startTime: number;
-};
-
-export type ToolExecuteResult<TResult> = {
-  result: TResult;
-  observation?: StepRecordObservation;
-};
-
-export type ToolExecutionConfig<TInput, TResult> = {
-  toolName: string;
-  input: TInput;
-  options?: HandlerOptions;
-  requiresSession?: boolean;
-  observationPolicy?: ObservationPolicy;
-  execute: (
-    context: ToolExecutionContext,
-  ) => Promise<TResult | ToolExecuteResult<TResult>>;
-  classifyError?: (error: unknown) => {
-    code: string;
-    message: string;
-  };
-  getTarget?: (input: TInput) =>
-    | {
-        testId?: string;
-        selector?: string;
-        a11yRef?: string;
-      }
-    | undefined;
-  sanitizeInputForRecording?: (input: TInput) => Record<string, unknown>;
-};
-
-/**
- * Type guard to check if result is a ToolExecuteResult with observation.
- *
- * @param result The result to check
- * @returns True if result is a ToolExecuteResult with observation property
- */
-function isToolExecuteResult<TResult>(
-  result: TResult | ToolExecuteResult<TResult>,
-): result is ToolExecuteResult<TResult> {
-  return (
-    typeof result === 'object' &&
-    result !== null &&
-    'result' in result &&
-    Object.prototype.hasOwnProperty.call(result, 'result')
-  );
-}
-
-/**
- * Executes a tool with error handling, observation collection, and knowledge store recording.
- *
- * @param config The tool execution configuration with input, execute function, and error handling
- * @returns Promise resolving to MCP response with tool result or error information
- */
-export async function runTool<TInput, TResult>(
-  config: ToolExecutionConfig<TInput, TResult>,
-): Promise<McpResponse<TResult>> {
-  const startTime = Date.now();
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-  const requiresSession = config.requiresSession ?? true;
-
-  const effectivePolicy =
-    config.options?.observationPolicy ?? config.observationPolicy ?? 'default';
-
-  try {
-    if (requiresSession && !sessionManager.hasActiveSession()) {
-      return createErrorResponse(
-        ErrorCodes.MM_NO_ACTIVE_SESSION,
-        'No active session. Call launch first.',
-        { input: config.input },
-        undefined,
-        startTime,
-      );
-    }
-
-    const context: ToolExecutionContext = {
-      sessionId,
-      page: requiresSession ? sessionManager.getPage() : (undefined as never),
-      refMap: requiresSession ? sessionManager.getRefMap() : new Map(),
-      startTime,
-    };
-
-    const executeResult = await config.execute(context);
-
-    let result: TResult;
-    let customObservation: StepRecordObservation | undefined;
-
-    if (isToolExecuteResult<TResult>(executeResult)) {
-      result = executeResult.result;
-      customObservation = executeResult.observation;
-    } else {
-      result = executeResult;
-    }
-
-    let observation: StepRecordObservation | undefined;
-
-    if (effectivePolicy === 'custom' && customObservation) {
-      observation = customObservation;
-    } else if (effectivePolicy === 'default' && requiresSession) {
-      observation = await collectObservation(context.page, 'full');
-    } else if (
-      (effectivePolicy === 'none' || effectivePolicy === 'failures') &&
-      requiresSession
-    ) {
-      observation = await collectObservation(context.page, 'minimal');
-    }
-
-    if (sessionId) {
-      const recordInput = config.sanitizeInputForRecording
-        ? config.sanitizeInputForRecording(config.input)
-        : (config.input as Record<string, unknown>);
-
-      await knowledgeStore.recordStep({
-        sessionId,
-        toolName: config.toolName,
-        input: recordInput,
-        target: config.getTarget?.(config.input),
-        outcome: { ok: true },
-        observation: observation ?? createEmptyObservation(),
-        durationMs: Date.now() - startTime,
-        context: sessionManager.getEnvironmentMode(),
-      });
-    }
-
-    return createSuccessResponse<TResult>(result, sessionId, startTime);
-  } catch (error) {
-    const errorInfo = config.classifyError?.(error) ?? {
-      code: `MM_${config.toolName.toUpperCase().replace(/^MM_/u, '')}_FAILED`,
-      message: extractErrorMessage(error),
-    };
-
-    let failureObservation: StepRecordObservation = createEmptyObservation();
-
-    if (requiresSession && sessionManager.hasActiveSession()) {
-      if (effectivePolicy === 'failures' || effectivePolicy === 'default') {
-        try {
-          const page = sessionManager.getPage();
-          failureObservation = await collectObservation(page, 'full');
-        } catch (collectError) {
-          debugWarn('run-tool.collectObservation', collectError);
-          failureObservation = await collectObservation(undefined, 'minimal');
-        }
-      } else if (effectivePolicy === 'none') {
-        try {
-          failureObservation = await collectObservation(undefined, 'minimal');
-        } catch (collectError) {
-          debugWarn('run-tool.collectObservation', collectError);
-        }
-      }
-    }
-
-    if (sessionId) {
-      const recordInput = config.sanitizeInputForRecording
-        ? config.sanitizeInputForRecording(config.input)
-        : (config.input as Record<string, unknown>);
-
-      await knowledgeStore.recordStep({
-        sessionId,
-        toolName: config.toolName,
-        input: recordInput,
-        target: config.getTarget?.(config.input),
-        outcome: {
-          ok: false,
-          error: { code: errorInfo.code, message: errorInfo.message },
-        },
-        observation: failureObservation,
-        durationMs: Date.now() - startTime,
-        context: sessionManager.getEnvironmentMode(),
-      });
-    }
-
-    return createErrorResponse(
-      errorInfo.code as ErrorCode,
-      errorInfo.message,
-      { input: config.input },
-      sessionId,
-      startTime,
-    );
-  }
-}
diff --git a/src/mcp-server/tools/screenshot.test.ts b/src/mcp-server/tools/screenshot.test.ts
deleted file mode 100644
index b21fda8..0000000
--- a/src/mcp-server/tools/screenshot.test.ts
+++ /dev/null
@@ -1,307 +0,0 @@
-/**
- * Unit tests for screenshot tool handler.
- *
- * Tests handleScreenshot with various options including base64 encoding,
- * selector scoping, and error handling.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import { handleScreenshot } from './screenshot.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('screenshot', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      sessionMetadata: {
-        schemaVersion: 1,
-        sessionId: 'test-session-123',
-        createdAt: new Date().toISOString(),
-        flowTags: [],
-        tags: [],
-        launch: { stateMode: 'default' },
-      },
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleScreenshot', () => {
-    describe('basic screenshot', () => {
-      it('captures full page screenshot by default', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-          base64: undefined,
-        });
-
-        // Act
-        const result = await handleScreenshot({ name: 'test-screenshot' });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.path).toBe('/path/to/screenshot.png');
-          expect(result.result.width).toBe(1280);
-          expect(result.result.height).toBe(720);
-          expect(result.result.base64).toBeUndefined();
-        }
-        expect(mockSessionManager.screenshot).toHaveBeenCalledWith({
-          name: 'test-screenshot',
-          fullPage: true,
-          selector: undefined,
-        });
-      });
-
-      it('captures viewport-only screenshot when fullPage is false', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-          base64: undefined,
-        });
-
-        // Act
-        const result = await handleScreenshot({
-          name: 'viewport-screenshot',
-          fullPage: false,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        expect(mockSessionManager.screenshot).toHaveBeenCalledWith({
-          name: 'viewport-screenshot',
-          fullPage: false,
-          selector: undefined,
-        });
-      });
-    });
-
-    describe('with base64 encoding', () => {
-      it('includes base64 when includeBase64 is true', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-          base64:
-            'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
-        });
-
-        // Act
-        const result = await handleScreenshot({
-          name: 'base64-screenshot',
-          includeBase64: true,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.base64).toBe(
-            'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
-          );
-        }
-      });
-
-      it('excludes base64 when includeBase64 is false', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-          base64:
-            'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
-        });
-
-        // Act
-        const result = await handleScreenshot({
-          name: 'no-base64-screenshot',
-          includeBase64: false,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.base64).toBeUndefined();
-        }
-      });
-    });
-
-    describe('with selector scoping', () => {
-      it('captures screenshot of specific element', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/element-screenshot.png',
-          width: 400,
-          height: 200,
-          base64: undefined,
-        });
-
-        // Act
-        const result = await handleScreenshot({
-          name: 'element-screenshot',
-          selector: '[data-testid="account-menu"]',
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.width).toBe(400);
-          expect(result.result.height).toBe(200);
-        }
-        expect(mockSessionManager.screenshot).toHaveBeenCalledWith({
-          name: 'element-screenshot',
-          fullPage: true,
-          selector: '[data-testid="account-menu"]',
-        });
-      });
-
-      it('combines selector with fullPage false', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/element-screenshot.png',
-          width: 400,
-          height: 200,
-          base64: undefined,
-        });
-
-        // Act
-        const result = await handleScreenshot({
-          name: 'element-viewport-screenshot',
-          selector: '.modal-content',
-          fullPage: false,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        expect(mockSessionManager.screenshot).toHaveBeenCalledWith({
-          name: 'element-viewport-screenshot',
-          fullPage: false,
-          selector: '.modal-content',
-        });
-      });
-    });
-
-    describe('error handling', () => {
-      it('returns error when no active session', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleScreenshot({ name: 'test-screenshot' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-
-      it('returns error when screenshot fails', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockRejectedValue(
-          new Error('Screenshot failed'),
-        );
-
-        // Act
-        const result = await handleScreenshot({ name: 'test-screenshot' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_SCREENSHOT_FAILED);
-          expect(result.error.message).toContain('Screenshot failed');
-        }
-      });
-
-      it('returns error when page is closed', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockRejectedValue(
-          new Error('Target page, context or browser has been closed'),
-        );
-
-        // Act
-        const result = await handleScreenshot({ name: 'test-screenshot' });
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED);
-        }
-      });
-    });
-
-    describe('input sanitization', () => {
-      it('sanitizes input for knowledge store recording', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-          base64: 'very-long-base64-string-that-should-not-be-recorded',
-        });
-
-        const recordStepMock = vi.fn().mockResolvedValue(undefined);
-        vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue(
-          {
-            recordStep: recordStepMock,
-            getLastSteps: vi.fn().mockResolvedValue([]),
-            searchSteps: vi.fn().mockResolvedValue([]),
-            summarizeSession: vi.fn().mockResolvedValue({
-              sessionId: 'test',
-              stepCount: 0,
-              recipe: [],
-            }),
-            listSessions: vi.fn().mockResolvedValue([]),
-            generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-            writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-          } as any,
-        );
-
-        // Act
-        await handleScreenshot({
-          name: 'test-screenshot',
-          includeBase64: true,
-          selector: '[data-testid="test"]',
-        });
-
-        // Assert
-        expect(recordStepMock).toHaveBeenCalled();
-        const recordedInput = recordStepMock.mock.calls[0][0].input;
-        expect(recordedInput).toStrictEqual({
-          name: 'test-screenshot',
-          fullPage: undefined,
-          selector: '[data-testid="test"]',
-        });
-        expect(recordedInput.includeBase64).toBeUndefined();
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/tools/screenshot.ts b/src/mcp-server/tools/screenshot.ts
deleted file mode 100644
index d6696ad..0000000
--- a/src/mcp-server/tools/screenshot.ts
+++ /dev/null
@@ -1,67 +0,0 @@
-import { getSessionManager } from '../session-manager.js';
-import { classifyScreenshotError } from './error-classification.js';
-import { runTool } from './run-tool.js';
-import type {
-  ScreenshotInput,
-  ScreenshotToolResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-
-/**
- * Handles the screenshot tool request.
- *
- * @param input - The screenshot input parameters.
- * @param options - Handler options including abort signal.
- * @returns Response with screenshot path and dimensions.
- */
-export async function handleScreenshot(
-  input: ScreenshotInput,
-  options?: HandlerOptions,
-): Promise<McpResponse<ScreenshotToolResult>> {
-  return runTool<ScreenshotInput, ScreenshotToolResult>({
-    toolName: 'mm_screenshot',
-    input,
-    options,
-    observationPolicy: 'none',
-
-    /**
-     * Executes the screenshot capture.
-     *
-     * @returns The screenshot result.
-     */
-    execute: async () => {
-      const sessionManager = getSessionManager();
-      const result = await sessionManager.screenshot({
-        name: input.name,
-        fullPage: input.fullPage ?? true,
-        selector: input.selector,
-      });
-
-      const response: ScreenshotToolResult = {
-        path: result.path,
-        width: result.width,
-        height: result.height,
-      };
-
-      if (input.includeBase64) {
-        response.base64 = result.base64;
-      }
-
-      return response;
-    },
-
-    classifyError: classifyScreenshotError,
-
-    /**
-     * Sanitizes input for knowledge store recording.
-     *
-     * @returns Sanitized input object.
-     */
-    sanitizeInputForRecording: () => ({
-      name: input.name,
-      fullPage: input.fullPage,
-      selector: input.selector,
-    }),
-  });
-}
diff --git a/src/mcp-server/tools/seeding.test.ts b/src/mcp-server/tools/seeding.test.ts
deleted file mode 100644
index e77efbb..0000000
--- a/src/mcp-server/tools/seeding.test.ts
+++ /dev/null
@@ -1,552 +0,0 @@
-/**
- * Unit tests for seeding tool handlers.
- *
- * Tests contract deployment handlers including single/multiple contract deployment,
- * address lookup, and contract listing with ContractSeedingCapability.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import {
-  handleSeedContract,
-  handleSeedContracts,
-  handleGetContractAddress,
-  handleListDeployedContracts,
-} from './seeding.js';
-import type { ContractSeedingCapability } from '../../capabilities/types.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('seeding', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-  let mockSeedingCapability: ContractSeedingCapability;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      sessionMetadata: {
-        schemaVersion: 1,
-        sessionId: 'test-session-123',
-        createdAt: new Date().toISOString(),
-        flowTags: [],
-        tags: [],
-        launch: { stateMode: 'default' },
-      },
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-
-    // Create fresh mock seeding capability
-    mockSeedingCapability = {
-      deployContract: vi.fn(),
-      deployContracts: vi.fn(),
-      getContractAddress: vi.fn(),
-      listDeployedContracts: vi.fn(),
-      getAvailableContracts: vi.fn(),
-      clearRegistry: vi.fn(),
-      initialize: vi.fn(),
-    };
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleSeedContract', () => {
-    it('deploys a single contract successfully', async () => {
-      // Arrange
-      const deployedAt = new Date().toISOString();
-      const mockedDeployContract = vi
-        .spyOn(mockSeedingCapability, 'deployContract')
-        .mockResolvedValue({
-          name: 'hst',
-          address: '0x1234567890123456789012345678901234567890',
-          deployedAt,
-        });
-
-      // Act
-      const result = await handleSeedContract(
-        { contractName: 'hst' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.contractName).toBe('hst');
-        expect(result.result.contractAddress).toBe(
-          '0x1234567890123456789012345678901234567890',
-        );
-        expect(result.result.deployedAt).toBe(deployedAt);
-      }
-      expect(mockedDeployContract).toHaveBeenCalledWith('hst', {
-        hardfork: undefined,
-        deployerOptions: undefined,
-      });
-    });
-
-    it('deploys contract with custom hardfork', async () => {
-      // Arrange
-      const deployedAt = new Date().toISOString();
-      const mockedDeployContract = vi
-        .spyOn(mockSeedingCapability, 'deployContract')
-        .mockResolvedValue({
-          name: 'nfts',
-          address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
-          deployedAt,
-        });
-
-      // Act
-      const result = await handleSeedContract(
-        { contractName: 'nfts', hardfork: 'shanghai' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      expect(mockedDeployContract).toHaveBeenCalledWith('nfts', {
-        hardfork: 'shanghai',
-        deployerOptions: undefined,
-      });
-    });
-
-    it('deploys contract with deployer options', async () => {
-      // Arrange
-      const deployedAt = new Date().toISOString();
-      const mockedDeployContract = vi
-        .spyOn(mockSeedingCapability, 'deployContract')
-        .mockResolvedValue({
-          name: 'piggybank',
-          address: '0x9876543210987654321098765432109876543210',
-          deployedAt,
-        });
-
-      // Act
-      const result = await handleSeedContract(
-        {
-          contractName: 'piggybank',
-          deployerOptions: {
-            fromAddress: '0x1111111111111111111111111111111111111111',
-          },
-        },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      expect(mockedDeployContract).toHaveBeenCalledWith('piggybank', {
-        hardfork: undefined,
-        deployerOptions: {
-          fromAddress: '0x1111111111111111111111111111111111111111',
-        },
-      });
-    });
-
-    it('returns error when seeding capability not available', async () => {
-      // Act
-      const result = await handleSeedContract(
-        { contractName: 'hst' },
-        { seedingCapability: undefined },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
-        expect(result.error.message).toContain(
-          'ContractSeedingCapability not available',
-        );
-      }
-    });
-
-    it('returns error when deployment fails', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'deployContract').mockRejectedValue(
-        new Error('Contract not found: unknown'),
-      );
-
-      // Act
-      const result = await handleSeedContract(
-        { contractName: 'hst' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_CONTRACT_NOT_FOUND);
-        expect(result.error.message).toContain('Contract not found');
-      }
-    });
-
-    it('returns error when deployment fails with generic error', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'deployContract').mockRejectedValue(
-        new Error('Deployment failed'),
-      );
-
-      // Act
-      const result = await handleSeedContract(
-        { contractName: 'hst' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
-        expect(result.error.message).toContain('Deployment failed');
-      }
-    });
-  });
-
-  describe('handleSeedContracts', () => {
-    it('deploys multiple contracts successfully', async () => {
-      // Arrange
-      const deployedAt1 = new Date().toISOString();
-      const deployedAt2 = new Date(Date.now() + 1000).toISOString();
-      const mockedDeployContracts = vi
-        .spyOn(mockSeedingCapability, 'deployContracts')
-        .mockResolvedValue({
-          deployed: [
-            {
-              name: 'hst',
-              address: '0x1234567890123456789012345678901234567890',
-              deployedAt: deployedAt1,
-            },
-            {
-              name: 'nfts',
-              address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
-              deployedAt: deployedAt2,
-            },
-          ],
-          failed: [],
-        });
-
-      // Act
-      const result = await handleSeedContracts(
-        { contracts: ['hst', 'nfts'] },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.deployed).toHaveLength(2);
-        expect(result.result.deployed[0].contractName).toBe('hst');
-        expect(result.result.deployed[1].contractName).toBe('nfts');
-        expect(result.result.failed).toHaveLength(0);
-      }
-      expect(mockedDeployContracts).toHaveBeenCalledWith(['hst', 'nfts'], {
-        hardfork: undefined,
-      });
-    });
-
-    it('deploys contracts with custom hardfork', async () => {
-      // Arrange
-      const deployedAt = new Date().toISOString();
-      const mockedDeployContracts = vi
-        .spyOn(mockSeedingCapability, 'deployContracts')
-        .mockResolvedValue({
-          deployed: [
-            {
-              name: 'hst',
-              address: '0x1234567890123456789012345678901234567890',
-              deployedAt,
-            },
-          ],
-          failed: [],
-        });
-
-      // Act
-      const result = await handleSeedContracts(
-        { contracts: ['hst'], hardfork: 'shanghai' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      expect(mockedDeployContracts).toHaveBeenCalledWith(['hst'], {
-        hardfork: 'shanghai',
-      });
-    });
-
-    it('handles partial deployment failures', async () => {
-      // Arrange
-      const deployedAt = new Date().toISOString();
-      vi.spyOn(mockSeedingCapability, 'deployContracts').mockResolvedValue({
-        deployed: [
-          {
-            name: 'hst',
-            address: '0x1234567890123456789012345678901234567890',
-            deployedAt,
-          },
-        ],
-        failed: [
-          {
-            name: 'nfts',
-            error: 'Contract deployment failed',
-          },
-        ],
-      });
-
-      // Act
-      const result = await handleSeedContracts(
-        { contracts: ['hst', 'nfts'] },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.deployed).toHaveLength(1);
-        expect(result.result.failed).toHaveLength(1);
-        expect(result.result.failed[0].contractName).toBe('nfts');
-        expect(result.result.failed[0].error).toBe(
-          'Contract deployment failed',
-        );
-      }
-    });
-
-    it('returns error when seeding capability not available', async () => {
-      // Act
-      const result = await handleSeedContracts(
-        { contracts: ['hst'] },
-        { seedingCapability: undefined },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
-        expect(result.error.message).toContain(
-          'ContractSeedingCapability not available',
-        );
-      }
-    });
-
-    it('returns error when deployment fails completely', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'deployContracts').mockRejectedValue(
-        new Error('Anvil not running'),
-      );
-
-      // Act
-      const result = await handleSeedContracts(
-        { contracts: ['hst'] },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
-        expect(result.error.message).toContain('Anvil not running');
-      }
-    });
-  });
-
-  describe('handleGetContractAddress', () => {
-    it('returns contract address when found', async () => {
-      // Arrange
-      const mockedGetContractAddress = vi
-        .spyOn(mockSeedingCapability, 'getContractAddress')
-        .mockReturnValue('0x1234567890123456789012345678901234567890');
-
-      // Act
-      const result = await handleGetContractAddress(
-        { contractName: 'hst' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.contractName).toBe('hst');
-        expect(result.result.contractAddress).toBe(
-          '0x1234567890123456789012345678901234567890',
-        );
-      }
-      expect(mockedGetContractAddress).toHaveBeenCalledWith('hst');
-    });
-
-    it('returns null when contract not found', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'getContractAddress').mockReturnValue(
-        null,
-      );
-
-      // Act
-      const result = await handleGetContractAddress(
-        { contractName: 'nfts' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.contractName).toBe('nfts');
-        expect(result.result.contractAddress).toBeNull();
-      }
-    });
-
-    it('returns error when seeding capability not available', async () => {
-      // Act
-      const result = await handleGetContractAddress(
-        { contractName: 'hst' },
-        { seedingCapability: undefined },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
-        expect(result.error.message).toContain(
-          'ContractSeedingCapability not available',
-        );
-      }
-    });
-
-    it('returns error when lookup fails', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'getContractAddress').mockImplementation(
-        () => {
-          throw new Error('Registry error');
-        },
-      );
-
-      // Act
-      const result = await handleGetContractAddress(
-        { contractName: 'hst' },
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
-        expect(result.error.message).toContain('Registry error');
-      }
-    });
-  });
-
-  describe('handleListDeployedContracts', () => {
-    it('returns list of deployed contracts', async () => {
-      // Arrange
-      const deployedAt1 = new Date().toISOString();
-      const deployedAt2 = new Date(Date.now() + 1000).toISOString();
-      const mockedListDeployedContracts = vi
-        .spyOn(mockSeedingCapability, 'listDeployedContracts')
-        .mockReturnValue([
-          {
-            name: 'hst',
-            address: '0x1234567890123456789012345678901234567890',
-            deployedAt: deployedAt1,
-          },
-          {
-            name: 'nfts',
-            address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
-            deployedAt: deployedAt2,
-          },
-        ]);
-
-      // Act
-      const result = await handleListDeployedContracts(
-        {},
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.contracts).toHaveLength(2);
-        expect(result.result.contracts[0].contractName).toBe('hst');
-        expect(result.result.contracts[0].contractAddress).toBe(
-          '0x1234567890123456789012345678901234567890',
-        );
-        expect(result.result.contracts[0].deployedAt).toBe(deployedAt1);
-        expect(result.result.contracts[1].contractName).toBe('nfts');
-        expect(result.result.contracts[1].contractAddress).toBe(
-          '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
-        );
-        expect(result.result.contracts[1].deployedAt).toBe(deployedAt2);
-      }
-      expect(mockedListDeployedContracts).toHaveBeenCalled();
-    });
-
-    it('returns empty list when no contracts deployed', async () => {
-      // Arrange
-      vi.spyOn(mockSeedingCapability, 'listDeployedContracts').mockReturnValue(
-        [],
-      );
-
-      // Act
-      const result = await handleListDeployedContracts(
-        {},
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(result.result.contracts).toHaveLength(0);
-      }
-    });
-
-    it('returns error when seeding capability not available', async () => {
-      // Act
-      const result = await handleListDeployedContracts(
-        {},
-        { seedingCapability: undefined },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
-        expect(result.error.message).toContain(
-          'ContractSeedingCapability not available',
-        );
-      }
-    });
-
-    it('returns error when listing fails', async () => {
-      // Arrange
-      vi.spyOn(
-        mockSeedingCapability,
-        'listDeployedContracts',
-      ).mockImplementation(() => {
-        throw new Error('Registry error');
-      });
-
-      // Act
-      const result = await handleListDeployedContracts(
-        {},
-        { seedingCapability: mockSeedingCapability },
-      );
-
-      // Assert
-      expect(result.ok).toBe(false);
-      if (!result.ok) {
-        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
-        expect(result.error.message).toContain('Registry error');
-      }
-    });
-  });
-});
diff --git a/src/mcp-server/tools/seeding.ts b/src/mcp-server/tools/seeding.ts
deleted file mode 100644
index 0fd578c..0000000
--- a/src/mcp-server/tools/seeding.ts
+++ /dev/null
@@ -1,327 +0,0 @@
-import type { ContractSeedingCapability } from '../../capabilities/types.js';
-import { getSessionManager } from '../session-manager.js';
-import { classifySeedingError } from './error-classification.js';
-import { runTool } from './run-tool.js';
-import type {
-  SeedContractInput,
-  SeedContractsInput,
-  GetContractAddressInput,
-  ListDeployedContractsInput,
-  SeedContractResult,
-  SeedContractsResult,
-  GetContractAddressResult,
-  ListDeployedContractsResult,
-  McpResponse,
-  HandlerOptions,
-} from '../types';
-import { ErrorCodes } from '../types';
-import { createErrorResponse } from '../utils';
-
-export type SeedingToolOptions = HandlerOptions & {
-  seedingCapability?: ContractSeedingCapability;
-};
-
-/**
- * Validates that the seeding capability is available, returning either the capability or an error response.
- *
- * @param toolName The name of the tool requesting the capability
- * @param input The input provided to the tool
- * @param options Tool options containing the seeding capability
- * @param startTime Timestamp when the tool execution started
- * @returns The seeding capability if available, or an error response if not
- */
-function checkSeedingCapability<Type>(
-  toolName: string,
-  input: unknown,
-  options: SeedingToolOptions | undefined,
-  startTime: number,
-): McpResponse<Type> | ContractSeedingCapability {
-  const sessionManager = getSessionManager();
-  const sessionId = sessionManager.getSessionId();
-
-  if (!options?.seedingCapability) {
-    return createErrorResponse(
-      ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE,
-      `ContractSeedingCapability not available. The ${toolName} tool requires running in e2e mode with the MetaMask extension wrapper, which provides Anvil chain and contract deployment support.`,
-      { capability: 'ContractSeedingCapability', input },
-      sessionId,
-      startTime,
-    ) as McpResponse<Type>;
-  }
-
-  return options.seedingCapability;
-}
-
-/**
- * Type guard to check if a result is a ContractSeedingCapability.
- *
- * @param result The value to check
- * @returns True if result is a ContractSeedingCapability, false if it's an error response
- */
-function isCapability(
-  result: McpResponse<unknown> | ContractSeedingCapability,
-): result is ContractSeedingCapability {
-  return (
-    typeof result === 'object' && result !== null && 'deployContract' in result
-  );
-}
-
-/**
- * Handles the mm_seed_contract tool to deploy a single smart contract.
- *
- * @param input The contract name and deployment options
- * @param options Tool options including seeding capability
- * @returns Promise resolving to the deployment result with contract address
- */
-export async function handleSeedContract(
-  input: SeedContractInput,
-  options?: SeedingToolOptions,
-): Promise<McpResponse<SeedContractResult>> {
-  const startTime = Date.now();
-  const capabilityOrError = checkSeedingCapability<SeedContractResult>(
-    'mm_seed_contract',
-    input,
-    options,
-    startTime,
-  );
-
-  if (!isCapability(capabilityOrError)) {
-    return capabilityOrError;
-  }
-
-  const seedingCapability = capabilityOrError;
-
-  return runTool<SeedContractInput, SeedContractResult>({
-    toolName: 'mm_seed_contract',
-    input,
-    options,
-    observationPolicy: 'none',
-
-    /**
-     * Executes the contract deployment using the seeding capability.
-     *
-     * @returns The deployed contract details including name, address, and timestamp
-     */
-    execute: async () => {
-      const deployed = await seedingCapability.deployContract(
-        input.contractName,
-        {
-          hardfork: input.hardfork,
-          deployerOptions: input.deployerOptions,
-        },
-      );
-
-      return {
-        contractName: deployed.name,
-        contractAddress: deployed.address,
-        deployedAt: deployed.deployedAt,
-      };
-    },
-
-    classifyError: classifySeedingError,
-
-    /**
-     * Sanitizes the input for recording in the knowledge store.
-     *
-     * @returns The sanitized input containing contract name and hardfork
-     */
-    sanitizeInputForRecording: () => ({
-      contractName: input.contractName,
-      hardfork: input.hardfork ?? 'prague',
-    }),
-  });
-}
-
-/**
- * Handles the mm_seed_contracts tool to deploy multiple smart contracts.
- *
- * @param input The list of contract names and deployment options
- * @param options Tool options including seeding capability
- * @returns Promise resolving to deployment results with deployed and failed contracts
- */
-export async function handleSeedContracts(
-  input: SeedContractsInput,
-  options?: SeedingToolOptions,
-): Promise<McpResponse<SeedContractsResult>> {
-  const startTime = Date.now();
-  const capabilityOrError = checkSeedingCapability<SeedContractsResult>(
-    'mm_seed_contracts',
-    input,
-    options,
-    startTime,
-  );
-
-  if (!isCapability(capabilityOrError)) {
-    return capabilityOrError;
-  }
-
-  const seedingCapability = capabilityOrError;
-
-  return runTool<SeedContractsInput, SeedContractsResult>({
-    toolName: 'mm_seed_contracts',
-    input,
-    options,
-    observationPolicy: 'none',
-
-    /**
-     * Executes the multi-contract deployment using the seeding capability.
-     *
-     * @returns The deployment results with deployed and failed contract lists
-     */
-    execute: async () => {
-      const seedResult = await seedingCapability.deployContracts(
-        input.contracts,
-        { hardfork: input.hardfork },
-      );
-
-      return {
-        deployed: seedResult.deployed.map((deployedContract) => ({
-          contractName: deployedContract.name,
-          contractAddress: deployedContract.address,
-          deployedAt: deployedContract.deployedAt,
-        })),
-        failed: seedResult.failed.map((failedDeployment) => ({
-          contractName: failedDeployment.name,
-          error: failedDeployment.error,
-        })),
-      };
-    },
-
-    classifyError: classifySeedingError,
-
-    /**
-     * Sanitizes the input for recording in the knowledge store.
-     *
-     * @returns The sanitized input containing contracts list and hardfork
-     */
-    sanitizeInputForRecording: () => ({
-      contracts: input.contracts,
-      hardfork: input.hardfork ?? 'prague',
-    }),
-  });
-}
-
-/**
- * Handles the mm_get_contract_address tool to retrieve a deployed contract's address.
- *
- * @param input The contract name to look up
- * @param options Tool options including seeding capability
- * @returns Promise resolving to the contract address or null if not found
- */
-export async function handleGetContractAddress(
-  input: GetContractAddressInput,
-  options?: SeedingToolOptions,
-): Promise<McpResponse<GetContractAddressResult>> {
-  const startTime = Date.now();
-  const capabilityOrError = checkSeedingCapability<GetContractAddressResult>(
-    'mm_get_contract_address',
-    input,
-    options,
-    startTime,
-  );
-
-  if (!isCapability(capabilityOrError)) {
-    return capabilityOrError;
-  }
-
-  const seedingCapability = capabilityOrError;
-
-  return runTool<GetContractAddressInput, GetContractAddressResult>({
-    toolName: 'mm_get_contract_address',
-    input,
-    options,
-    observationPolicy: 'none',
-
-    /**
-     * Executes the contract address lookup using the seeding capability.
-     *
-     * @returns The contract name and its deployed address
-     */
-    execute: async () => {
-      const address = seedingCapability.getContractAddress(input.contractName);
-
-      return {
-        contractName: input.contractName,
-        contractAddress: address,
-      };
-    },
-
-    classifyError: classifySeedingError,
-
-    /**
-     * Sanitizes the input for recording in the knowledge store.
-     *
-     * @returns The sanitized input containing the contract name
-     */
-    sanitizeInputForRecording: () => ({
-      contractName: input.contractName,
-    }),
-  });
-}
-
-/**
- * Handles the mm_list_contracts tool to list all deployed contracts in the session.
- *
- * @param _input Unused input parameter (no input required for this tool)
- * @param options Tool options including seeding capability
- * @returns Promise resolving to a list of all deployed contracts with their addresses
- */
-export async function handleListDeployedContracts(
-  _input: ListDeployedContractsInput,
-  options?: SeedingToolOptions,
-): Promise<McpResponse<ListDeployedContractsResult>> {
-  const startTime = Date.now();
-  const capabilityOrError = checkSeedingCapability<ListDeployedContractsResult>(
-    'mm_list_contracts',
-    _input,
-    options,
-    startTime,
-  );
-
-  if (!isCapability(capabilityOrError)) {
-    return capabilityOrError;
-  }
-
-  const seedingCapability = capabilityOrError;
-
-  return runTool<ListDeployedContractsInput, ListDeployedContractsResult>({
-    toolName: 'mm_list_contracts',
-    input: _input,
-    options,
-    observationPolicy: 'none',
-
-    /**
-     * Executes the contract listing using the seeding capability.
-     *
-     * @returns The list of all deployed contracts with their details
-     */
-    execute: async () => {
-      const deployed = seedingCapability.listDeployedContracts();
-
-      return {
-        contracts: deployed.map(
-          (deployedContract: {
-            /**
-             * The contract name
-             */
-            name: string;
-            /**
-             * The contract's deployed address
-             */
-            address: string;
-            /**
-             * The deployment timestamp
-             */
-            deployedAt: string;
-          }) => ({
-            contractName: deployedContract.name,
-            contractAddress: deployedContract.address,
-            deployedAt: deployedContract.deployedAt,
-          }),
-        ),
-      };
-    },
-
-    classifyError: classifySeedingError,
-  });
-}
diff --git a/src/mcp-server/tools/state.test.ts b/src/mcp-server/tools/state.test.ts
deleted file mode 100644
index 902e230..0000000
--- a/src/mcp-server/tools/state.test.ts
+++ /dev/null
@@ -1,358 +0,0 @@
-/**
- * Unit tests for state tool handler.
- *
- * Tests handleGetState with various scenarios including state snapshot capability,
- * tab tracking, and error handling.
- */
-
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
-
-import { handleGetState } from './state.js';
-import type { StateSnapshotCapability } from '../../capabilities/types.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager, createMockPage } from '../test-utils';
-import { ErrorCodes } from '../types/errors.js';
-
-describe('state', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
-      hasActive: true,
-      sessionId: 'test-session-123',
-      sessionMetadata: {
-        schemaVersion: 1,
-        sessionId: 'test-session-123',
-        createdAt: new Date().toISOString(),
-        flowTags: [],
-        tags: [],
-        launch: { stateMode: 'default' },
-      },
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-    // Mock knowledge store to prevent "not initialized" errors
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi
-        .fn()
-        .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }),
-      listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    } as any);
-  });
-
-  afterEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  describe('handleGetState', () => {
-    describe('without state snapshot capability', () => {
-      it('returns extension state from session manager', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home',
-          accountAddress: '0x1234567890123456789012345678901234567890',
-          networkName: 'Ethereum Mainnet',
-          chainId: 1,
-          balance: '1.5 ETH',
-        });
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.state).toStrictEqual({
-            isLoaded: true,
-            currentUrl: 'chrome-extension://ext-123/home.html',
-            extensionId: 'ext-123',
-            isUnlocked: true,
-            currentScreen: 'home',
-            accountAddress: '0x1234567890123456789012345678901234567890',
-            networkName: 'Ethereum Mainnet',
-            chainId: 1,
-            balance: '1.5 ETH',
-          });
-          expect(result.result.tabs).toStrictEqual({
-            active: {
-              role: 'extension',
-              url: 'chrome-extension://ext-123/home.html',
-            },
-            tracked: [
-              {
-                role: 'extension',
-                url: 'chrome-extension://ext-123/home.html',
-              },
-            ],
-          });
-        }
-        expect(mockSessionManager.getExtensionState).toHaveBeenCalled();
-      });
-
-      it('includes multiple tracked pages in tabs', async () => {
-        // Arrange
-        const mockExtensionPage = createMockPage();
-        vi.spyOn(mockExtensionPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        const mockDappPage = createMockPage();
-        vi.spyOn(mockDappPage, 'url').mockReturnValue(
-          'https://app.uniswap.org',
-        );
-
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(
-          mockExtensionPage,
-        );
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: true,
-          currentScreen: 'home',
-          accountAddress: '0x1234567890123456789012345678901234567890',
-          networkName: 'Ethereum Mainnet',
-          chainId: 1,
-          balance: '1.5 ETH',
-        });
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockExtensionPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-          {
-            page: mockDappPage,
-            role: 'dapp',
-            url: 'https://app.uniswap.org',
-          },
-        ]);
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.tabs).toBeDefined();
-          expect(result.result.tabs?.tracked).toHaveLength(2);
-          expect(result.result.tabs?.tracked).toStrictEqual([
-            { role: 'extension', url: 'chrome-extension://ext-123/home.html' },
-            { role: 'dapp', url: 'https://app.uniswap.org' },
-          ]);
-        }
-      });
-
-      it('handles active page without tracked page info', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({
-          isLoaded: true,
-          currentUrl: 'chrome-extension://ext-123/home.html',
-          extensionId: 'ext-123',
-          isUnlocked: false,
-          currentScreen: 'home',
-          accountAddress: null,
-          networkName: null,
-          chainId: null,
-          balance: null,
-        });
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([]);
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.tabs).toBeDefined();
-          expect(result.result.tabs?.active.role).toBe('other');
-          expect(result.result.tabs?.active.url).toBe(
-            'chrome-extension://ext-123/home.html',
-          );
-        }
-      });
-    });
-
-    describe('with state snapshot capability', () => {
-      it('uses state snapshot capability when provided', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getSessionState').mockReturnValue({
-          extensionId: 'ext-123',
-          ports: { anvil: 8545 },
-        });
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        const mockStateSnapshot: StateSnapshotCapability = {
-          getState: vi.fn().mockResolvedValue({
-            isLoaded: true,
-            currentUrl: 'chrome-extension://ext-123/home.html',
-            extensionId: 'ext-123',
-            isUnlocked: true,
-            currentScreen: 'home',
-            accountAddress: '0x1234567890123456789012345678901234567890',
-            networkName: 'Localhost 8545',
-            chainId: 1337,
-            balance: '25 ETH',
-          }),
-          detectCurrentScreen: vi.fn().mockResolvedValue('home'),
-        };
-
-        // Act
-        const result = await handleGetState({
-          stateSnapshotCapability: mockStateSnapshot,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        if (result.ok) {
-          expect(result.result.state.chainId).toBe(1337);
-          expect(result.result.state.networkName).toBe('Localhost 8545');
-          expect(result.result.state.balance).toBe('25 ETH');
-        }
-        expect(mockStateSnapshot.getState).toHaveBeenCalledWith(mockPage, {
-          extensionId: 'ext-123',
-          chainId: 1337,
-        });
-        expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled();
-      });
-
-      it('uses chainId 1 when anvil port not present', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockPage, 'url').mockReturnValue(
-          'chrome-extension://ext-123/home.html',
-        );
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getSessionState').mockReturnValue({
-          extensionId: 'ext-123',
-          ports: {},
-        });
-        vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([
-          {
-            page: mockPage,
-            role: 'extension',
-            url: 'chrome-extension://ext-123/home.html',
-          },
-        ]);
-
-        const mockStateSnapshot: StateSnapshotCapability = {
-          getState: vi.fn().mockResolvedValue({
-            isLoaded: true,
-            currentUrl: 'chrome-extension://ext-123/home.html',
-            extensionId: 'ext-123',
-            isUnlocked: true,
-            currentScreen: 'home',
-            accountAddress: '0x1234567890123456789012345678901234567890',
-            networkName: 'Ethereum Mainnet',
-            chainId: 1,
-            balance: '1.5 ETH',
-          }),
-          detectCurrentScreen: vi.fn().mockResolvedValue('home'),
-        };
-
-        // Act
-        const result = await handleGetState({
-          stateSnapshotCapability: mockStateSnapshot,
-        });
-
-        // Assert
-        expect(result.ok).toBe(true);
-        expect(mockStateSnapshot.getState).toHaveBeenCalledWith(mockPage, {
-          extensionId: 'ext-123',
-          chainId: 1,
-        });
-      });
-    });
-
-    describe('error handling', () => {
-      it('returns error when no active session', async () => {
-        // Arrange
-        vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false);
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
-        }
-      });
-
-      it('returns error when getExtensionState fails', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockRejectedValue(
-          new Error('Failed to get state'),
-        );
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_STATE_FAILED);
-          expect(result.error.message).toContain('Failed to get state');
-        }
-      });
-
-      it('returns error when page is closed', async () => {
-        // Arrange
-        const mockPage = createMockPage();
-        vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage);
-        vi.spyOn(mockSessionManager, 'getExtensionState').mockRejectedValue(
-          new Error('Target page, context or browser has been closed'),
-        );
-
-        // Act
-        const result = await handleGetState();
-
-        // Assert
-        expect(result.ok).toBe(false);
-        if (!result.ok) {
-          expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED);
-        }
-      });
-    });
-  });
-});
diff --git a/src/mcp-server/tools/state.ts b/src/mcp-server/tools/state.ts
deleted file mode 100644
index 8d1f71c..0000000
--- a/src/mcp-server/tools/state.ts
+++ /dev/null
@@ -1,102 +0,0 @@
-import type { Page } from 'playwright';
-
-import { classifyStateError } from './error-classification.js';
-import { collectObservation } from './helpers.js';
-import { runTool } from './run-tool.js';
-import type {
-  StateSnapshotCapability,
-  ExtensionState,
-} from '../../capabilities/types.js';
-import { getSessionManager } from '../session-manager.js';
-import type { GetStateResult, McpResponse, HandlerOptions } from '../types';
-
-/**
- * Tool options for state-related operations.
- */
-export type StateToolOptions = HandlerOptions & {
-  /**
-   * Optional capability for taking state snapshots
-   */
-  stateSnapshotCapability?: StateSnapshotCapability;
-};
-
-/**
- * Retrieves the current extension state, using the snapshot capability if available.
- *
- * @param page The Playwright page object to query
- * @param sessionManager The session manager instance
- * @param stateSnapshotCapability Optional capability for detailed state snapshots
- * @returns Promise resolving to the current extension state
- */
-async function getState(
-  page: Page,
-  sessionManager: ReturnType<typeof getSessionManager>,
-  stateSnapshotCapability?: StateSnapshotCapability,
-): Promise<ExtensionState> {
-  if (stateSnapshotCapability) {
-    const extensionId = sessionManager.getSessionState()?.extensionId;
-    return stateSnapshotCapability.getState(page, {
-      extensionId,
-      chainId: sessionManager.getSessionState()?.ports?.anvil ? 1337 : 1,
-    });
-  }
-  return sessionManager.getExtensionState();
-}
-
-/**
- * Handles the mm_get_state tool to retrieve the current extension state.
- *
- * @param options Tool options including optional state snapshot capability
- * @returns Promise resolving to the current extension state and tab information
- */
-export async function handleGetState(
-  options?: StateToolOptions,
-): Promise<McpResponse<GetStateResult>> {
-  return runTool<Record<string, never>, GetStateResult>({
-    toolName: 'mm_get_state',
-    input: {},
-    options,
-    observationPolicy: 'custom',
-
-    /**
-     * Executes the state retrieval with tab and observation information.
-     *
-     * @param context The tool execution context containing the page
-     * @returns The extension state, tab information, and observation data
-     */
-    execute: async (context) => {
-      const sessionManager = getSessionManager();
-      const state = await getState(
-        context.page,
-        sessionManager,
-        options?.stateSnapshotCapability,
-      );
-
-      const trackedPages = sessionManager.getTrackedPages();
-      const activePage = sessionManager.getPage();
-      const activeTabInfo = trackedPages.find(
-        (trackedPage) => trackedPage.page === activePage,
-      );
-
-      const tabs = {
-        active: {
-          role: activeTabInfo?.role ?? 'other',
-          url: activePage.url(),
-        },
-        tracked: trackedPages.map((trackedPage) => ({
-          role: trackedPage.role,
-          url: trackedPage.url,
-        })),
-      };
-
-      const observation = await collectObservation(context.page, 'full', state);
-
-      return {
-        result: { state, tabs },
-        observation,
-      };
-    },
-
-    classifyError: classifyStateError,
-  });
-}
diff --git a/src/mcp-server/types/responses.ts b/src/mcp-server/types/responses.ts
deleted file mode 100644
index edb96c4..0000000
--- a/src/mcp-server/types/responses.ts
+++ /dev/null
@@ -1,27 +0,0 @@
-export type ResponseMeta = {
-  timestamp: string;
-  sessionId?: string;
-  durationMs: number;
-};
-
-export type SuccessResponse<Result = unknown> = {
-  meta: ResponseMeta;
-  ok: true;
-  result: Result;
-};
-
-export type ErrorDetails = {
-  code: string;
-  message: string;
-  details?: Record<string, unknown>;
-};
-
-export type ErrorResponse = {
-  error: ErrorDetails;
-  meta: ResponseMeta;
-  ok: false;
-};
-
-export type McpResponse<Result = unknown> =
-  | SuccessResponse<Result>
-  | ErrorResponse;
diff --git a/src/mcp-server/utils/index.ts b/src/mcp-server/utils/index.ts
deleted file mode 100644
index f4af15f..0000000
--- a/src/mcp-server/utils/index.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-export { SENSITIVE_FIELD_PATTERNS, isSensitiveField } from './redaction.js';
-export { generateFilesafeTimestamp, generateSessionId } from './time.js';
-export { createSuccessResponse, createErrorResponse } from './response.js';
-export {
-  validateTargetSelection,
-  type TargetValidationResult,
-} from './targets.js';
-export { extractErrorMessage } from './errors.js';
-export { debugWarn } from './logger.js';
-export {
-  isValidTargetSelection,
-  isInvalidTargetSelection,
-  type TargetType,
-} from './type-guards.js';
diff --git a/src/mcp-server/utils/response.ts b/src/mcp-server/utils/response.ts
deleted file mode 100644
index 91d968e..0000000
--- a/src/mcp-server/utils/response.ts
+++ /dev/null
@@ -1,57 +0,0 @@
-import type { SuccessResponse, ErrorResponse, ErrorCode } from '../types';
-
-/**
- * Creates a standardized success response.
- *
- * @param result - The result data to include in the response.
- * @param sessionId - Optional session identifier.
- * @param startTime - Optional start time for duration calculation.
- * @returns A success response object.
- */
-export function createSuccessResponse<Result>(
-  result: Result,
-  sessionId?: string,
-  startTime?: number,
-): SuccessResponse<Result> {
-  return {
-    meta: {
-      timestamp: new Date().toISOString(),
-      sessionId,
-      durationMs: startTime ? Date.now() - startTime : 0,
-    },
-    ok: true,
-    result,
-  };
-}
-
-/**
- * Creates a standardized error response.
- *
- * @param code - The error code identifying the error type.
- * @param message - Human-readable error message.
- * @param details - Optional additional error details.
- * @param sessionId - Optional session identifier.
- * @param startTime - Optional start time for duration calculation.
- * @returns An error response object.
- */
-export function createErrorResponse(
-  code: ErrorCode,
-  message: string,
-  details?: Record<string, unknown>,
-  sessionId?: string,
-  startTime?: number,
-): ErrorResponse {
-  return {
-    error: {
-      code,
-      message,
-      details,
-    },
-    meta: {
-      timestamp: new Date().toISOString(),
-      sessionId,
-      durationMs: startTime ? Date.now() - startTime : 0,
-    },
-    ok: false,
-  };
-}
diff --git a/src/server/create-server.test.ts b/src/server/create-server.test.ts
new file mode 100644
index 0000000..58d3884
--- /dev/null
+++ b/src/server/create-server.test.ts
@@ -0,0 +1,1730 @@
+import * as fs from 'node:fs/promises';
+import * as http from 'node:http';
+import * as os from 'node:os';
+import * as path from 'node:path';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import type { MockInstance } from 'vitest';
+
+import type { ServerInstance } from './create-server.js';
+import {
+  createServer,
+  extractTargetFromInput,
+  extractScreenshotInfo,
+  extractToolOutcome,
+  buildResponseBody,
+  shouldCollectObservations,
+  shouldIncludeObservationsInResponse,
+} from './create-server.js';
+import { readDaemonState } from './daemon-state.js';
+import pkg from '../../package.json';
+import type { PortMap, WorkflowContext } from '../capabilities/context.js';
+import type { DaemonState, ServerConfig, ToolResponse } from '../types/http.js';
+
+const tmpDir = path.join(os.tmpdir(), `mm-create-server-test-${Date.now()}`);
+
+vi.mock('node:child_process', () => ({
+  execSync: () => Buffer.from(`${tmpDir}\n`),
+}));
+
+vi.mock('../tools/utils/discovery.js', () => ({
+  collectTestIds: vi.fn().mockResolvedValue([]),
+  collectTrimmedA11ySnapshot: vi.fn().mockResolvedValue({
+    nodes: [],
+    refMap: new Map(),
+  }),
+  waitForTarget: vi.fn().mockResolvedValue({
+    click: vi.fn().mockResolvedValue(undefined),
+    fill: vi.fn().mockResolvedValue(undefined),
+    textContent: vi.fn().mockResolvedValue(''),
+  }),
+}));
+
+vi.mock('../knowledge-store/knowledge-store.js', () => {
+  const mockStore = {
+    recordStep: vi.fn().mockResolvedValue('/mock/path'),
+    writeSessionMetadata: vi.fn().mockResolvedValue('/mock/path'),
+    getLastSteps: vi.fn().mockResolvedValue([]),
+    searchSteps: vi.fn().mockResolvedValue([]),
+    summarizeSession: vi.fn().mockResolvedValue({ stepCount: 0, recipe: [] }),
+    listSessions: vi.fn().mockResolvedValue([]),
+    generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
+    getAllSessionIds: vi.fn().mockResolvedValue([]),
+    resolveSessionIds: vi.fn().mockResolvedValue([]),
+  };
+  return {
+    KnowledgeStore: vi.fn(() => mockStore),
+    createDefaultObservation: vi.fn(
+      (state: unknown, testIds?: unknown[], nodes?: unknown[]) => ({
+        state: state ?? {},
+        testIds: testIds ?? [],
+        a11y: { nodes: nodes ?? [] },
+      }),
+    ),
+    createKnowledgeStore: vi.fn(() => mockStore),
+    setKnowledgeStore: vi.fn(),
+    hasKnowledgeStore: vi.fn(() => false),
+    knowledgeStore: mockStore,
+  };
+});
+
+function createMockSessionManager() {
+  return {
+    hasActiveSession: vi.fn(() => false),
+    getSessionId: vi.fn(() => 'test-session'),
+    getSessionState: vi.fn(() => undefined),
+    getSessionMetadata: vi.fn(() => undefined),
+    launch: vi.fn(async () => ({
+      sessionId: 'test-session',
+      extensionId: 'test-ext',
+      state: {},
+    })),
+    cleanup: vi.fn(async () => true),
+    getPage: vi.fn(() => ({
+      waitForLoadState: vi.fn(async () => undefined),
+      waitForFunction: vi.fn(async () => undefined),
+    })),
+    setActivePage: vi.fn(),
+    getTrackedPages: vi.fn(() => []),
+    classifyPageRole: vi.fn(() => 'extension'),
+    getContext: vi.fn(() => ({})),
+    getExtensionId: vi.fn(() => 'test-ext'),
+    getExtensionState: vi.fn(async () => ({})),
+    takeScreenshot: vi.fn(async () => ({ path: '', base64: '' })),
+    getRefMap: vi.fn(() => new Map()),
+    setRefMap: vi.fn(),
+    setWorkflowContext: vi.fn(),
+    getEnvironmentMode: vi.fn(() => 'e2e'),
+    setContext: vi.fn(),
+    getContextInfo: vi.fn(() => ({
+      currentContext: 'e2e',
+      hasActiveSession: false,
+      sessionId: null,
+      capabilities: { available: [] },
+      canSwitchContext: true,
+    })),
+  };
+}
+
+let exitSpy: MockInstance;
+
+function buildConfig(overrides: Partial<ServerConfig> = {}): ServerConfig {
+  return {
+    sessionManager:
+      createMockSessionManager() as unknown as ServerConfig['sessionManager'],
+    contextFactory: async () =>
+      ({
+        config: { environment: 'prod', extensionName: 'Test Extension' },
+      }) satisfies WorkflowContext,
+    ...overrides,
+  };
+}
+
+async function httpRequest(
+  url: string,
+  options: {
+    method?: string;
+    headers?: Record<string, string>;
+    body?: string;
+  } = {},
+): Promise<{ status: number; json: () => Promise<unknown> }> {
+  return new Promise((resolve, reject) => {
+    const parsedUrl = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsedUrl.hostname,
+        port: parsedUrl.port,
+        path: parsedUrl.pathname,
+        method: options.method ?? 'GET',
+        headers: options.headers,
+      },
+      (res) => {
+        let data = '';
+        res.on('data', (chunk: Buffer) => {
+          data += chunk.toString();
+        });
+        res.on('end', () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            json: async () => JSON.parse(data) as unknown,
+          });
+        });
+      },
+    );
+    req.on('error', reject);
+    if (options.body) {
+      req.write(options.body);
+    }
+    req.end();
+  });
+}
+
+describe('extractTargetFromInput', () => {
+  it('returns undefined for null input', () => {
+    expect(extractTargetFromInput(null)).toBeUndefined();
+  });
+
+  it('returns undefined for non-object input', () => {
+    expect(extractTargetFromInput('string')).toBeUndefined();
+    expect(extractTargetFromInput(42)).toBeUndefined();
+  });
+
+  it('returns undefined when no target fields present', () => {
+    expect(extractTargetFromInput({ name: 'click' })).toBeUndefined();
+  });
+
+  it('extracts a11yRef', () => {
+    expect(extractTargetFromInput({ a11yRef: 'e1' })).toStrictEqual({
+      a11yRef: 'e1',
+      testId: undefined,
+      selector: undefined,
+    });
+  });
+
+  it('extracts testId', () => {
+    expect(extractTargetFromInput({ testId: 'btn' })).toStrictEqual({
+      a11yRef: undefined,
+      testId: 'btn',
+      selector: undefined,
+    });
+  });
+
+  it('extracts selector', () => {
+    expect(extractTargetFromInput({ selector: '.my-btn' })).toStrictEqual({
+      a11yRef: undefined,
+      testId: undefined,
+      selector: '.my-btn',
+    });
+  });
+
+  it('extracts multiple target fields', () => {
+    expect(
+      extractTargetFromInput({ a11yRef: 'e1', testId: 'btn' }),
+    ).toStrictEqual({
+      a11yRef: 'e1',
+      testId: 'btn',
+      selector: undefined,
+    });
+  });
+
+  it('ignores non-string target values', () => {
+    expect(extractTargetFromInput({ a11yRef: 42 })).toBeUndefined();
+  });
+});
+
+describe('extractScreenshotInfo', () => {
+  it('returns undefined for non-screenshot tools', () => {
+    expect(extractScreenshotInfo('click', {})).toBeUndefined();
+  });
+
+  it('returns undefined when toolResult is not an object', () => {
+    expect(extractScreenshotInfo('screenshot', null)).toBeUndefined();
+    expect(extractScreenshotInfo('screenshot', 'string')).toBeUndefined();
+  });
+
+  it('returns undefined when result is not ok', () => {
+    expect(extractScreenshotInfo('screenshot', { ok: false })).toBeUndefined();
+  });
+
+  it('returns undefined when result has no path', () => {
+    expect(
+      extractScreenshotInfo('screenshot', { ok: true, result: {} }),
+    ).toBeUndefined();
+  });
+
+  it('extracts screenshot path from result.path', () => {
+    expect(
+      extractScreenshotInfo('screenshot', {
+        ok: true,
+        result: { path: '/img.png' },
+      }),
+    ).toStrictEqual({ path: '/img.png' });
+  });
+
+  it('extracts screenshot path with dimensions', () => {
+    expect(
+      extractScreenshotInfo('screenshot', {
+        ok: true,
+        result: { path: '/img.png', width: 1280, height: 720 },
+      }),
+    ).toStrictEqual({
+      path: '/img.png',
+      dimensions: { width: 1280, height: 720 },
+    });
+  });
+
+  it('extracts screenshot from nested screenshot object', () => {
+    expect(
+      extractScreenshotInfo('describe_screen', {
+        ok: true,
+        result: { screenshot: { path: '/ss.png', width: 800, height: 600 } },
+      }),
+    ).toStrictEqual({
+      path: '/ss.png',
+      dimensions: { width: 800, height: 600 },
+    });
+  });
+
+  it('extracts nested screenshot without dimensions', () => {
+    expect(
+      extractScreenshotInfo('describe_screen', {
+        ok: true,
+        result: { screenshot: { path: '/ss.png' } },
+      }),
+    ).toStrictEqual({ path: '/ss.png' });
+  });
+
+  it('returns undefined when result.result is null', () => {
+    expect(
+      extractScreenshotInfo('screenshot', { ok: true, result: null }),
+    ).toBeUndefined();
+  });
+
+  it('returns undefined when nested screenshot has no path', () => {
+    expect(
+      extractScreenshotInfo('describe_screen', {
+        ok: true,
+        result: { screenshot: { width: 800 } },
+      }),
+    ).toBeUndefined();
+  });
+
+  it('returns undefined when nested screenshot is null', () => {
+    expect(
+      extractScreenshotInfo('describe_screen', {
+        ok: true,
+        result: { screenshot: null },
+      }),
+    ).toBeUndefined();
+  });
+});
+
+describe('extractToolOutcome', () => {
+  it('returns ok:true for non-object input', () => {
+    expect(extractToolOutcome(null)).toStrictEqual({ ok: true });
+    expect(extractToolOutcome('string')).toStrictEqual({ ok: true });
+  });
+
+  it('returns ok:true when ok not in result', () => {
+    expect(extractToolOutcome({ result: 'data' })).toStrictEqual({ ok: true });
+  });
+
+  it('returns ok:true for successful result', () => {
+    expect(extractToolOutcome({ ok: true, result: 'data' })).toStrictEqual({
+      ok: true,
+    });
+  });
+
+  it('returns ok:false with error for failed result', () => {
+    expect(
+      extractToolOutcome({
+        ok: false,
+        error: { code: 'ERR', message: 'fail' },
+      }),
+    ).toStrictEqual({
+      ok: false,
+      error: { code: 'ERR', message: 'fail' },
+    });
+  });
+
+  it('returns ok:false without error when no error field', () => {
+    expect(extractToolOutcome({ ok: false })).toStrictEqual({ ok: false });
+  });
+});
+
+describe('buildResponseBody', () => {
+  it('returns toolResult as-is for non-object', () => {
+    expect(buildResponseBody('string', undefined)).toBe('string');
+    expect(buildResponseBody(null, undefined)).toBeNull();
+  });
+
+  it('returns toolResult when no observations', () => {
+    const result = { ok: true, data: 'test' };
+    expect(buildResponseBody(result, undefined)).toStrictEqual(result);
+  });
+
+  it('merges observations into result', () => {
+    const result = { ok: true };
+    const obs = { state: {}, testIds: [], a11y: { nodes: [] } };
+    expect(buildResponseBody(result, obs as any)).toStrictEqual({
+      ok: true,
+      observations: obs,
+    });
+  });
+});
+
+describe('shouldCollectObservations', () => {
+  it('returns true for mutating', () => {
+    expect(shouldCollectObservations('mutating')).toBe(true);
+  });
+
+  it('returns true for readonly (collected for knowledge store)', () => {
+    expect(shouldCollectObservations('readonly')).toBe(true);
+  });
+
+  it('returns true for discovery (collected for knowledge store)', () => {
+    expect(shouldCollectObservations('discovery')).toBe(true);
+  });
+
+  it('returns true for batch with default policy', () => {
+    expect(shouldCollectObservations('batch')).toBe(true);
+  });
+
+  it("returns true for batch with 'all' policy", () => {
+    expect(
+      shouldCollectObservations('batch', { includeObservations: 'all' }),
+    ).toBe(true);
+  });
+
+  it("returns false for batch with 'none' policy", () => {
+    expect(
+      shouldCollectObservations('batch', { includeObservations: 'none' }),
+    ).toBe(false);
+  });
+
+  it("returns true for batch with 'failures' policy", () => {
+    expect(
+      shouldCollectObservations('batch', { includeObservations: 'failures' }),
+    ).toBe(true);
+  });
+});
+
+describe('shouldIncludeObservationsInResponse', () => {
+  const okResult: ToolResponse = { ok: true, result: {} };
+  const failResult: ToolResponse = {
+    ok: false,
+    error: { code: 'ERR', message: 'fail' },
+  };
+  const summaryFailResult: ToolResponse = {
+    ok: true,
+    result: { summary: { ok: false } },
+  };
+
+  it('returns true for mutating', () => {
+    expect(shouldIncludeObservationsInResponse('mutating', okResult)).toBe(
+      true,
+    );
+  });
+
+  it('returns false for readonly', () => {
+    expect(shouldIncludeObservationsInResponse('readonly', okResult)).toBe(
+      false,
+    );
+  });
+
+  it('returns false for discovery', () => {
+    expect(shouldIncludeObservationsInResponse('discovery', okResult)).toBe(
+      false,
+    );
+  });
+
+  it("returns true for batch with 'all' (default)", () => {
+    expect(shouldIncludeObservationsInResponse('batch', okResult, {})).toBe(
+      true,
+    );
+  });
+
+  it("returns false for batch with 'none'", () => {
+    expect(
+      shouldIncludeObservationsInResponse('batch', okResult, {
+        includeObservations: 'none',
+      }),
+    ).toBe(false);
+  });
+
+  it("returns true for batch with 'failures' when tool failed", () => {
+    expect(
+      shouldIncludeObservationsInResponse('batch', failResult, {
+        includeObservations: 'failures',
+      }),
+    ).toBe(true);
+  });
+
+  it("returns true for batch with 'failures' when summary.ok is false", () => {
+    expect(
+      shouldIncludeObservationsInResponse('batch', summaryFailResult, {
+        includeObservations: 'failures',
+      }),
+    ).toBe(true);
+  });
+
+  it("returns false for batch with 'failures' when tool succeeded", () => {
+    const batchOk: ToolResponse = {
+      ok: true,
+      result: { summary: { ok: true } },
+    };
+    expect(
+      shouldIncludeObservationsInResponse('batch', batchOk, {
+        includeObservations: 'failures',
+      }),
+    ).toBe(false);
+  });
+
+  it("returns false for batch with 'failures' when summary is missing", () => {
+    expect(
+      shouldIncludeObservationsInResponse('batch', okResult, {
+        includeObservations: 'failures',
+      }),
+    ).toBe(false);
+  });
+});
+
+describe('createServer integration', () => {
+  let server: ServerInstance;
+  let state: DaemonState;
+
+  beforeEach(async () => {
+    await fs.mkdir(tmpDir, { recursive: true });
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never);
+
+    server = createServer(buildConfig());
+    state = await server.start();
+  });
+
+  afterEach(async () => {
+    await server.stop();
+    exitSpy.mockRestore();
+    await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
+  });
+
+  it('gET /health returns 200 with status and nonce', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/health`);
+    const body = (await res.json()) as { status: string; nonce: string };
+
+    expect(res.status).toBe(200);
+    expect(body.status).toBe('ok');
+    expect(body.nonce).toBe(state.nonce);
+  });
+
+  it('gET /status returns daemon info', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/status`);
+    const body = (await res.json()) as {
+      daemon: { pid: number; port: number };
+      ports: PortMap;
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.daemon.pid).toBe(process.pid);
+    expect(body.daemon.port).toBe(state.port);
+    expect(body.ports).toStrictEqual({});
+  });
+
+  it('pOST /launch delegates to session manager', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/launch`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ state: 'default' }),
+    });
+    const body = (await res.json()) as { ok: boolean };
+
+    expect(res.status).toBe(200);
+    expect(body.ok).toBe(true);
+  });
+
+  it('pOST /cleanup delegates to session manager', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as { ok: boolean };
+
+    expect(res.status).toBe(200);
+    expect(body.ok).toBe(true);
+  });
+
+  it('pOST /tool/nonexistent returns 404', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/nonexistent`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      },
+    );
+    const body = (await res.json()) as {
+      ok: boolean;
+      error: { code: string };
+    };
+
+    expect(res.status).toBe(404);
+    expect(body.ok).toBe(false);
+    expect(body.error.code).toBe('TOOL_NOT_FOUND');
+  });
+
+  it('writes .mm-server on start', async () => {
+    const daemonState = await readDaemonState(tmpDir);
+    expect(daemonState).not.toBeNull();
+    expect(daemonState?.port).toBe(state.port);
+    expect(daemonState?.nonce).toBe(state.nonce);
+    expect(daemonState?.version).toBe(pkg.version);
+  });
+
+  it('passes workflow context to session manager on start', async () => {
+    await server.stop();
+
+    const workflowContext: WorkflowContext = {
+      config: { environment: 'e2e', extensionName: 'Test Extension' },
+    };
+    const mockSM = createMockSessionManager();
+    const customServer = createServer(
+      buildConfig({
+        sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+        contextFactory: vi.fn().mockResolvedValue(workflowContext),
+      }),
+    );
+
+    await customServer.start();
+    expect(mockSM.setWorkflowContext).toHaveBeenCalledWith(workflowContext);
+    await customServer.stop();
+  });
+
+  it('fails startup when contextFactory rejects', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi
+          .fn<ServerConfig['contextFactory']>()
+          .mockRejectedValue(new Error('port allocation failed')),
+      }),
+    );
+
+    await expect(customServer.start()).rejects.toThrowError(
+      'contextFactory failed during server startup: port allocation failed',
+    );
+  });
+
+  it('preserves original error as cause when contextFactory rejects', async () => {
+    await server.stop();
+
+    const cause = new Error('root cause');
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi
+          .fn<ServerConfig['contextFactory']>()
+          .mockRejectedValue(cause),
+      }),
+    );
+
+    await expect(customServer.start()).rejects.toThrowError(
+      expect.objectContaining({ cause }),
+    );
+  });
+
+  it('fails startup when contextFactory resolves with null', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi.fn().mockResolvedValue(null),
+      }),
+    );
+
+    await expect(customServer.start()).rejects.toThrowError(
+      'contextFactory must return an object with a valid config.environment field',
+    );
+  });
+
+  it('fails startup when contextFactory resolves without config', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi.fn().mockResolvedValue({}),
+      }),
+    );
+
+    await expect(customServer.start()).rejects.toThrowError(
+      'contextFactory must return an object with a valid config.environment field',
+    );
+  });
+
+  it('fails startup when allocatedPorts contains non-number values', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi.fn().mockResolvedValue({
+          config: { environment: 'prod', extensionName: 'Test' },
+          allocatedPorts: { bad: 'not-a-number' },
+        }),
+      }),
+    );
+
+    await expect(customServer.start()).rejects.toThrowError(
+      'allocatedPorts["bad"] must be a finite number',
+    );
+  });
+
+  it('does not call setWorkflowContext when contextFactory rejects', async () => {
+    await server.stop();
+
+    const mockSM = createMockSessionManager();
+    const customServer = createServer(
+      buildConfig({
+        sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+        contextFactory: vi
+          .fn<ServerConfig['contextFactory']>()
+          .mockRejectedValue(new Error('boom')),
+      }),
+    );
+
+    await customServer.start().catch(() => {});
+    expect(mockSM.setWorkflowContext).not.toHaveBeenCalled();
+  });
+
+  it('does not write .mm-server when contextFactory rejects', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi
+          .fn<ServerConfig['contextFactory']>()
+          .mockRejectedValue(new Error('boom')),
+      }),
+    );
+
+    await customServer.start().catch(() => {});
+    const daemonState = await readDaemonState(tmpDir);
+    expect(daemonState).toBeNull();
+  });
+
+  it('cleans up session when startup fails after contextFactory succeeds', async () => {
+    await server.stop();
+
+    const mockSM = createMockSessionManager();
+    const customServer = createServer(
+      buildConfig({
+        sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+        contextFactory: vi.fn().mockResolvedValue({
+          config: { environment: 'prod', extensionName: 'Test' },
+        } satisfies WorkflowContext),
+      }),
+    );
+
+    await fs.chmod(tmpDir, 0o444);
+    try {
+      await expect(customServer.start()).rejects.toThrowError(/EACCES/u);
+      expect(mockSM.cleanup).toHaveBeenCalled();
+    } finally {
+      await fs.chmod(tmpDir, 0o755).catch(() => {});
+    }
+  });
+
+  it('accepts a synchronous contextFactory', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: () => ({
+          config: { environment: 'prod' as const, extensionName: 'Sync' },
+        }),
+      }),
+    );
+
+    const customState = await customServer.start();
+    expect(customState.port).toBeGreaterThan(0);
+    await customServer.stop();
+  });
+
+  it('gET /status returns empty ports when allocatedPorts is undefined', async () => {
+    await server.stop();
+
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi.fn().mockResolvedValue({
+          config: { environment: 'prod', extensionName: 'Test Extension' },
+        } satisfies WorkflowContext),
+      }),
+    );
+
+    const customState = await customServer.start();
+    const res = await httpRequest(
+      `http://127.0.0.1:${customState.port}/status`,
+    );
+    const body = (await res.json()) as { ports: PortMap };
+
+    expect(res.status).toBe(200);
+    expect(body.ports).toStrictEqual({});
+
+    await customServer.stop();
+  });
+
+  it('gET /status returns custom allocated ports', async () => {
+    await server.stop();
+
+    const allocatedPorts = { serviceA: 3001, serviceB: 3002 };
+    const customServer = createServer(
+      buildConfig({
+        contextFactory: vi.fn().mockResolvedValue({
+          config: { environment: 'prod', extensionName: 'Test Extension' },
+          allocatedPorts,
+        } satisfies WorkflowContext),
+      }),
+    );
+
+    const customState = await customServer.start();
+    const res = await httpRequest(
+      `http://127.0.0.1:${customState.port}/status`,
+    );
+    const body = (await res.json()) as { ports: PortMap };
+
+    expect(res.status).toBe(200);
+    expect(body.ports).toStrictEqual(allocatedPorts);
+
+    await customServer.stop();
+  });
+
+  it('removes .mm-server on stop', async () => {
+    await server.stop();
+    const daemonState = await readDaemonState(tmpDir);
+    expect(daemonState).toBeNull();
+  });
+
+  it('serializes concurrent launch requests through the queue', async () => {
+    const [res1, res2] = await Promise.all([
+      httpRequest(`http://127.0.0.1:${state.port}/launch`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      }),
+      httpRequest(`http://127.0.0.1:${state.port}/launch`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      }),
+    ]);
+
+    expect(res1.status).toBe(200);
+    expect(res2.status).toBe(200);
+  });
+
+  it('stop() is idempotent', async () => {
+    await server.stop();
+    expect(await server.stop()).toBeUndefined();
+  });
+
+  describe('POST /tool/:name input validation', () => {
+    it('returns 400 for missing required field', async () => {
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/click`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({}),
+        },
+      );
+      const body = (await res.json()) as {
+        ok: boolean;
+        error: { code: string; message: string };
+      };
+
+      expect(res.status).toBe(400);
+      expect(body.ok).toBe(false);
+      expect(body.error.code).toBe('VALIDATION_ERROR');
+    });
+
+    it('returns 400 for invalid enum value', async () => {
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/navigate`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ screen: 'nonexistent' }),
+        },
+      );
+      const body = (await res.json()) as {
+        ok: boolean;
+        error: { code: string; message: string };
+      };
+
+      expect(res.status).toBe(400);
+      expect(body.ok).toBe(false);
+      expect(body.error.code).toBe('VALIDATION_ERROR');
+    });
+
+    it('returns 400 when cross-field refine fails', async () => {
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/clipboard`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ action: 'write' }),
+        },
+      );
+      const body = (await res.json()) as {
+        ok: boolean;
+        error: { code: string; message: string };
+      };
+
+      expect(res.status).toBe(400);
+      expect(body.ok).toBe(false);
+      expect(body.error.code).toBe('VALIDATION_ERROR');
+      expect(body.error.message).toContain(
+        "text is required when action is 'write'",
+      );
+    });
+
+    it('returns 400 for wrong field type', async () => {
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/wait_for_notification`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ timeoutMs: 'not-a-number' }),
+        },
+      );
+      const body = (await res.json()) as {
+        ok: boolean;
+        error: { code: string; message: string };
+      };
+
+      expect(res.status).toBe(400);
+      expect(body.ok).toBe(false);
+      expect(body.error.code).toBe('VALIDATION_ERROR');
+    });
+
+    it('passes validation for valid input (empty schema)', async () => {
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/get_state`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({}),
+        },
+      );
+
+      expect(res.status).not.toBe(400);
+    });
+  });
+});
+
+describe('createServer with active session', () => {
+  let server: ServerInstance;
+  let state: DaemonState;
+  let mockSM: ReturnType<typeof createMockSessionManager>;
+
+  beforeEach(async () => {
+    await fs.mkdir(tmpDir, { recursive: true });
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never);
+
+    mockSM = createMockSessionManager();
+    mockSM.hasActiveSession.mockReturnValue(true);
+    mockSM.getExtensionState.mockResolvedValue({
+      isLoaded: true,
+      currentUrl: 'chrome-extension://test/home.html',
+    });
+
+    server = createServer(
+      buildConfig({
+        sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+      }),
+    );
+    state = await server.start();
+  });
+
+  afterEach(async () => {
+    await server.stop();
+    exitSpy.mockRestore();
+    await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
+  });
+
+  it('collects observations and records knowledge for tool execution', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as { ok: boolean; observations?: unknown };
+
+    expect(res.status).toBe(200);
+    expect(body.ok).toBe(true);
+    expect(body.observations).toBeDefined();
+  });
+
+  it('records error step when tool execution throws', async () => {
+    mockSM.cleanup.mockRejectedValueOnce(new Error('Browser crash'));
+
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      error: { code: string; message: string };
+    };
+
+    expect(res.status).toBe(500);
+    expect(body.ok).toBe(false);
+    expect(body.error.code).toBe('TOOL_EXECUTION_FAILED');
+    expect(body.error.message).toContain('Browser crash');
+  });
+
+  it('handles observation collection failure gracefully', async () => {
+    mockSM.getPage.mockImplementation(() => {
+      throw new Error('Page closed');
+    });
+
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as { ok: boolean };
+
+    expect(res.status).toBe(200);
+    expect(body.ok).toBe(true);
+  });
+
+  it('records step with environment context', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/get_state`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      },
+    );
+
+    expect(res.status).toBe(200);
+  });
+
+  it('read-only tool response omits observations', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/get_state`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      },
+    );
+    const body = (await res.json()) as { ok: boolean; observations?: unknown };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeUndefined();
+  });
+
+  it('mutating tool response includes observations with state, testIds, a11y', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: { state: unknown; testIds: unknown[]; a11y: unknown };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    expect(body.observations?.state).toBeDefined();
+    expect(body.observations?.testIds).toBeDefined();
+    expect(body.observations?.a11y).toBeDefined();
+  });
+
+  it('playwright helpers called for read-only tools (knowledge store)', async () => {
+    const { collectTestIds, collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    const collectTestIdsSpy = vi.mocked(collectTestIds);
+    const collectA11ySpy = vi.mocked(collectTrimmedA11ySnapshot);
+
+    collectTestIdsSpy.mockClear();
+    collectA11ySpy.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/get_state`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    expect(collectTestIdsSpy).toHaveBeenCalled();
+    expect(collectA11ySpy).toHaveBeenCalled();
+  });
+
+  it('observation Playwright helpers called for mutating tools', async () => {
+    const { collectTestIds, collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    const collectTestIdsSpy = vi.mocked(collectTestIds);
+    const collectA11ySpy = vi.mocked(collectTrimmedA11ySnapshot);
+
+    collectTestIdsSpy.mockClear();
+    collectA11ySpy.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    expect(collectTestIdsSpy).toHaveBeenCalled();
+    expect(collectA11ySpy).toHaveBeenCalled();
+  });
+
+  it('recordStep is called for mutating tool routes', async () => {
+    const { KnowledgeStore } =
+      await import('../knowledge-store/knowledge-store.js');
+    const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as {
+      recordStep: ReturnType<typeof vi.fn>;
+    };
+    mockStore.recordStep.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    expect(mockStore.recordStep).toHaveBeenCalled();
+  });
+
+  it('recordStep is called for read-only tool routes', async () => {
+    const { KnowledgeStore } =
+      await import('../knowledge-store/knowledge-store.js');
+    const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as {
+      recordStep: ReturnType<typeof vi.fn>;
+    };
+    mockStore.recordStep.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/get_state`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    expect(mockStore.recordStep).toHaveBeenCalled();
+  });
+
+  describe('post-mutation state recheck', () => {
+    it('resolves immediately when getExtensionState returns a known screen', async () => {
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState.mockResolvedValue({
+        isLoaded: true,
+        currentScreen: 'home',
+        currentUrl: 'chrome-extension://test/home.html',
+      });
+
+      const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      });
+      const body = (await res.json()) as {
+        ok: boolean;
+        observations?: { state: { currentScreen?: string } };
+      };
+
+      expect(res.status).toBe(200);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1);
+      expect(body.observations?.state.currentScreen).toBe('home');
+    });
+
+    it("retries when first call returns 'unknown', resolves on second call", async () => {
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState
+        .mockResolvedValueOnce({
+          isLoaded: true,
+          currentScreen: 'unknown',
+          currentUrl: 'chrome-extension://test/unknown.html',
+        })
+        .mockResolvedValueOnce({
+          isLoaded: true,
+          currentScreen: 'home',
+          currentUrl: 'chrome-extension://test/home.html',
+        });
+
+      const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      });
+      const body = (await res.json()) as {
+        ok: boolean;
+        observations?: { state: { currentScreen?: string } };
+      };
+
+      expect(res.status).toBe(200);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(2);
+      expect(body.observations?.state.currentScreen).toBe('home');
+    });
+
+    it("retries up to deadline and returns 'unknown' if all calls return 'unknown'", async () => {
+      vi.useFakeTimers();
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState.mockResolvedValue({
+        isLoaded: true,
+        currentScreen: 'unknown',
+        currentUrl: 'chrome-extension://test/unknown.html',
+      });
+
+      const start = Date.now();
+      const responsePromise = httpRequest(
+        `http://127.0.0.1:${state.port}/cleanup`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({}),
+        },
+      );
+
+      await vi.advanceTimersByTimeAsync(500);
+      vi.useRealTimers();
+
+      const res = await responsePromise;
+      const body = (await res.json()) as {
+        ok: boolean;
+        observations?: { state: { currentScreen?: string } };
+      };
+
+      expect(res.status).toBe(200);
+      expect(Date.now() - start).toBeLessThanOrEqual(600);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(6);
+      expect(body.observations?.state.currentScreen).toBe('unknown');
+    });
+
+    it('does not recheck for readonly tool category', async () => {
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState.mockResolvedValue({
+        isLoaded: true,
+        currentScreen: 'unknown',
+        currentUrl: 'chrome-extension://test/unknown.html',
+      });
+
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/knowledge_last`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({}),
+        },
+      );
+
+      expect(res.status).toBe(200);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1);
+    });
+
+    it('does not recheck for discovery tool category', async () => {
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState.mockResolvedValue({
+        isLoaded: true,
+        currentScreen: 'unknown',
+        currentUrl: 'chrome-extension://test/unknown.html',
+      });
+
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/list_testids`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({}),
+        },
+      );
+
+      expect(res.status).toBe(200);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1);
+    });
+
+    it('does not recheck for batch tool category', async () => {
+      mockSM.getExtensionState.mockReset();
+      mockSM.getExtensionState.mockResolvedValue({
+        isLoaded: true,
+        currentScreen: 'unknown',
+        currentUrl: 'chrome-extension://test/unknown.html',
+      });
+
+      const res = await httpRequest(
+        `http://127.0.0.1:${state.port}/tool/run_steps`,
+        {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            steps: [{ tool: 'knowledge_last', args: {} }],
+          }),
+        },
+      );
+
+      expect(res.status).toBe(200);
+      expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1);
+    });
+  });
+});
+
+describe('createServer with logging', () => {
+  let server: ServerInstance;
+  let state: DaemonState;
+
+  beforeEach(async () => {
+    await fs.mkdir(tmpDir, { recursive: true });
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never);
+
+    server = createServer(
+      buildConfig({ logFilePath: path.join(tmpDir, 'daemon.log') }),
+    );
+    state = await server.start();
+  });
+
+  afterEach(async () => {
+    await server.stop();
+    exitSpy.mockRestore();
+    await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
+  });
+
+  it('writes request logs to file', async () => {
+    await httpRequest(`http://127.0.0.1:${state.port}/health`);
+    await new Promise((resolve) => setTimeout(resolve, 100));
+    const logContent = await fs
+      .readFile(path.join(tmpDir, 'daemon.log'), 'utf-8')
+      .catch(() => '');
+    expect(logContent).toContain('/health');
+  });
+
+  it('logs fatal errors to stderr and file', async () => {
+    const stderrSpy = vi
+      .spyOn(process.stderr, 'write')
+      .mockImplementation(() => true);
+
+    // Trigger a cleanup error by making sessionManager.cleanup() throw
+    const mockSM = createMockSessionManager();
+    mockSM.hasActiveSession.mockReturnValue(true);
+    mockSM.cleanup.mockRejectedValue(new Error('Cleanup failed'));
+
+    const testServer = createServer({
+      sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+      contextFactory: vi.fn().mockResolvedValue({
+        config: {
+          environment: 'e2e',
+          extensionName: 'Test',
+          defaultPassword: 'test',
+          artifactsDir: tmpDir,
+          defaultChainId: 1,
+          ports: { anvil: 8545, fixtureServer: 12345 },
+        },
+      } satisfies WorkflowContext),
+      logFilePath: path.join(tmpDir, 'error.log'),
+    });
+
+    await testServer.start();
+    await testServer.stop();
+
+    // Verify stderr was called with fatal error
+    expect(stderrSpy).toHaveBeenCalledWith(
+      expect.stringContaining('[ERROR] Cleanup failed'),
+    );
+
+    stderrSpy.mockRestore();
+  });
+
+  it('handles log file write errors gracefully', async () => {
+    const stderrSpy = vi
+      .spyOn(process.stderr, 'write')
+      .mockImplementation(() => true);
+
+    // Create a read-only directory to cause write errors
+    const readOnlyDir = path.join(tmpDir, 'readonly');
+    await fs.mkdir(readOnlyDir, { recursive: true });
+    const logPath = path.join(readOnlyDir, 'daemon.log');
+
+    // Make directory read-only
+    await fs.chmod(readOnlyDir, 0o444);
+
+    try {
+      const testServer = createServer(buildConfig({ logFilePath: logPath }));
+      const testState = await testServer.start();
+
+      // Make a request to trigger logging
+      await httpRequest(`http://127.0.0.1:${testState.port}/health`);
+      await new Promise((resolve) => setTimeout(resolve, 100));
+
+      await testServer.stop();
+
+      // Verify that stderr was called with the write error message
+      expect(stderrSpy).toHaveBeenCalledWith(
+        expect.stringContaining('Failed to write log'),
+      );
+    } finally {
+      stderrSpy.mockRestore();
+      // Restore write permissions for cleanup
+      await fs.chmod(readOnlyDir, 0o755).catch(() => {});
+    }
+  });
+
+  it('handles server close timeout with force close', async () => {
+    const testServer = createServer(buildConfig());
+    const testState = await testServer.start();
+
+    // Make a request to ensure server is active
+    await httpRequest(`http://127.0.0.1:${testState.port}/health`);
+
+    // Stop should complete even if server doesn't close gracefully
+    expect(await testServer.stop()).toBeUndefined();
+  });
+});
+
+describe('observation compaction in HTTP responses', () => {
+  let server: ServerInstance;
+  let state: DaemonState;
+  let mockSM: ReturnType<typeof createMockSessionManager>;
+
+  const comboboxAndOptions = [
+    { ref: 'e1', role: 'combobox', name: 'Language', path: ['root'] },
+    ...Array.from({ length: 10 }, (_, i) => ({
+      ref: `e${i + 2}`,
+      role: 'option',
+      name: `Lang ${i + 1}`,
+      path: ['root', 'combobox'],
+    })),
+    { ref: 'e12', role: 'button', name: 'Submit', path: ['root'] },
+  ];
+
+  const initialButtons = [
+    { ref: 'e1', role: 'button', name: 'Continue', path: ['root'] },
+    { ref: 'e2', role: 'button', name: 'Cancel', path: ['root'] },
+  ];
+
+  const changedButtons = [
+    { ref: 'e1', role: 'button', name: 'Continue', path: ['root'] },
+    { ref: 'e3', role: 'button', name: 'Confirm', path: ['root'] },
+  ];
+
+  const manyNewButtons = Array.from({ length: 10 }, (_, index) => ({
+    ref: `e${index + 10}`,
+    role: 'button',
+    name: `Action ${index + 1}`,
+    path: ['root'],
+  }));
+
+  beforeEach(async () => {
+    await fs.mkdir(tmpDir, { recursive: true });
+    exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never);
+
+    mockSM = createMockSessionManager();
+    mockSM.hasActiveSession.mockReturnValue(true);
+    mockSM.getExtensionState.mockResolvedValue({
+      isLoaded: true,
+      currentUrl: 'chrome-extension://test/home.html',
+    });
+
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot).mockResolvedValue({
+      nodes: comboboxAndOptions as never,
+      refMap: new Map(),
+    });
+
+    server = createServer(
+      buildConfig({
+        sessionManager: mockSM as unknown as ServerConfig['sessionManager'],
+      }),
+    );
+    state = await server.start();
+  });
+
+  afterEach(async () => {
+    await server.stop();
+    exitSpy.mockRestore();
+
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot).mockResolvedValue({
+      nodes: [],
+      refMap: new Map(),
+    });
+
+    await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
+  });
+
+  it('mutating tool returns compact observations in HTTP response', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: { a11y: { nodes: unknown[] } };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    // 12 original nodes → compacted: combobox + summary + button = 3
+    expect(body.observations?.a11y.nodes).toHaveLength(3);
+  });
+
+  it('first mutation returns a full compact observation when no baseline exists', async () => {
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: {
+        a11y: {
+          nodes: unknown[];
+          diff?: unknown;
+        };
+      };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    expect(body.observations?.a11y.diff).toBeUndefined();
+    expect(body.observations?.a11y.nodes).toHaveLength(3);
+  });
+
+  it('second mutation returns a diff-based observation', async () => {
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot)
+      .mockResolvedValueOnce({
+        nodes: initialButtons as never,
+        refMap: new Map(),
+      })
+      .mockResolvedValueOnce({
+        nodes: changedButtons as never,
+        refMap: new Map(),
+      });
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: {
+        a11y: {
+          nodes: unknown[];
+          diff?: { added: string[]; removed: string[]; unchanged: number };
+        };
+      };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations?.a11y.diff).toStrictEqual({
+      added: ['e3'],
+      removed: ['e2'],
+      unchanged: 1,
+    });
+    expect(body.observations?.a11y.nodes).toHaveLength(1);
+  });
+
+  it('describe_screen resets the diff baseline', async () => {
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot)
+      .mockResolvedValueOnce({
+        nodes: initialButtons as never,
+        refMap: new Map(),
+      })
+      .mockResolvedValueOnce({
+        nodes: initialButtons as never,
+        refMap: new Map(),
+      })
+      .mockResolvedValueOnce({
+        nodes: changedButtons as never,
+        refMap: new Map(),
+      });
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/describe_screen`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: {
+        a11y: {
+          nodes: unknown[];
+          diff?: unknown;
+        };
+      };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    expect(body.observations?.a11y.diff).toBeUndefined();
+    expect(body.observations?.a11y.nodes.length).toBeGreaterThan(1);
+  });
+
+  it('falls back to the full observation when the diff is not smaller', async () => {
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot)
+      .mockResolvedValueOnce({
+        nodes: [initialButtons[0]] as never,
+        refMap: new Map(),
+      })
+      .mockResolvedValueOnce({
+        nodes: manyNewButtons as never,
+        refMap: new Map(),
+      });
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+
+    const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: {
+        a11y: {
+          nodes: unknown[];
+          diff?: unknown;
+        };
+      };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    expect(body.observations?.a11y.diff).toBeUndefined();
+    expect(body.observations?.a11y.nodes).toHaveLength(10);
+  });
+
+  it('knowledge store always receives the full observation instead of the diff', async () => {
+    const { collectTrimmedA11ySnapshot } =
+      await import('../tools/utils/discovery.js');
+    vi.mocked(collectTrimmedA11ySnapshot)
+      .mockResolvedValueOnce({
+        nodes: initialButtons as never,
+        refMap: new Map(),
+      })
+      .mockResolvedValueOnce({
+        nodes: changedButtons as never,
+        refMap: new Map(),
+      });
+
+    const { KnowledgeStore } =
+      await import('../knowledge-store/knowledge-store.js');
+    const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as {
+      recordStep: ReturnType<typeof vi.fn>;
+    };
+    mockStore.recordStep.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+
+    await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ a11yRef: 'e1' }),
+    });
+
+    expect(mockStore.recordStep).toHaveBeenCalledTimes(2);
+    const recorded = mockStore.recordStep.mock.calls[1][0] as {
+      observation: { a11y: { nodes: unknown[]; diff?: unknown } };
+    };
+
+    expect(recorded.observation.a11y.diff).toBeUndefined();
+    expect(recorded.observation.a11y.nodes).toStrictEqual(changedButtons);
+  });
+
+  it('knowledge store receives full uncompacted observations', async () => {
+    const { KnowledgeStore } =
+      await import('../knowledge-store/knowledge-store.js');
+    const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as {
+      recordStep: ReturnType<typeof vi.fn>;
+    };
+    mockStore.recordStep.mockClear();
+
+    await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({}),
+    });
+
+    expect(mockStore.recordStep).toHaveBeenCalled();
+    const recorded = mockStore.recordStep.mock.calls[0][0] as {
+      observation: { a11y: { nodes: unknown[] } };
+    };
+    expect(recorded.observation.a11y.nodes).toHaveLength(12);
+  });
+
+  it('batch with includeObservations=all returns compact observations', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/run_steps`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          steps: [{ tool: 'get_state' }],
+          includeObservations: 'all',
+        }),
+      },
+    );
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: { a11y: { nodes: unknown[] } };
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeDefined();
+    expect(body.observations?.a11y.nodes).toHaveLength(3);
+  });
+
+  it('batch with includeObservations=none omits observations', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/run_steps`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          steps: [{ tool: 'get_state' }],
+          includeObservations: 'none',
+        }),
+      },
+    );
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: unknown;
+    };
+
+    expect(res.status).toBe(200);
+    expect(body.observations).toBeUndefined();
+  });
+
+  it('describe_screen response omits observations', async () => {
+    const res = await httpRequest(
+      `http://127.0.0.1:${state.port}/tool/describe_screen`,
+      {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({}),
+      },
+    );
+    const body = (await res.json()) as {
+      ok: boolean;
+      observations?: unknown;
+    };
+
+    // Discovery tools never include observations in the HTTP response
+    expect(body.observations).toBeUndefined();
+  });
+});
diff --git a/src/server/create-server.ts b/src/server/create-server.ts
new file mode 100644
index 0000000..3c1e893
--- /dev/null
+++ b/src/server/create-server.ts
@@ -0,0 +1,813 @@
+import express from 'express';
+import { execSync } from 'node:child_process';
+import { randomUUID } from 'node:crypto';
+import * as fs from 'node:fs/promises';
+import * as http from 'node:http';
+
+import { writeDaemonState, removeDaemonState } from './daemon-state.js';
+import { compactObservation } from './observation-compaction.js';
+import { RequestQueue } from './request-queue.js';
+import pkg from '../../package.json';
+import type { PortMap, WorkflowContext } from '../capabilities/context.js';
+import type { ExtensionState } from '../capabilities/types.js';
+import {
+  KnowledgeStore,
+  createDefaultObservation,
+} from '../knowledge-store/knowledge-store.js';
+import { toolRegistry, getToolCategory } from '../tools/registry.js';
+import type { ToolCategory } from '../tools/registry.js';
+import type {
+  StepRecordObservation,
+  StepRecordOutcome,
+  StepRecordTool,
+} from '../tools/types/step-record.js';
+import { OBSERVATION_TESTID_LIMIT } from '../tools/utils/constants.js';
+import {
+  collectTestIds,
+  collectTrimmedA11ySnapshot,
+} from '../tools/utils/discovery.js';
+import type {
+  DaemonState,
+  ServerConfig,
+  ToolContext,
+  ToolResponse,
+} from '../types/http.js';
+import { extractErrorMessage } from '../utils/errors.js';
+import type { ToolName } from '../validation/schemas.js';
+import { toolSchemas } from '../validation/schemas.js';
+
+/**
+ * Extracts target selection fields from a tool's validated input.
+ * Interaction tools (click, type, wait_for) include a11yRef, testId, or selector.
+ *
+ * @param input - The validated tool input.
+ * @returns The target info for knowledge recording, or undefined if not applicable.
+ */
+export function extractTargetFromInput(
+  input: unknown,
+): StepRecordTool['target'] | undefined {
+  if (typeof input !== 'object' || input === null) {
+    return undefined;
+  }
+  const obj = input as Record<string, unknown>;
+  const a11yRef = typeof obj.a11yRef === 'string' ? obj.a11yRef : undefined;
+  const testId = typeof obj.testId === 'string' ? obj.testId : undefined;
+  const selector = typeof obj.selector === 'string' ? obj.selector : undefined;
+  if (!a11yRef && !testId && !selector) {
+    return undefined;
+  }
+  return { a11yRef, testId, selector };
+}
+
+/**
+ * Extracts screenshot artifact metadata from a successful tool result.
+ * Applies to `screenshot` and `describe_screen` tools.
+ *
+ * @param toolName - The name of the tool that produced the result.
+ * @param toolResult - The raw result from the tool execution.
+ * @returns Screenshot path and dimensions, or undefined if not applicable.
+ */
+export function extractScreenshotInfo(
+  toolName: string,
+  toolResult: unknown,
+):
+  | { path: string; dimensions?: { width: number; height: number } }
+  | undefined {
+  if (toolName !== 'screenshot' && toolName !== 'describe_screen') {
+    return undefined;
+  }
+  if (typeof toolResult !== 'object' || toolResult === null) {
+    return undefined;
+  }
+  const result = toolResult as Record<string, unknown>;
+  if (
+    !result.ok ||
+    typeof result.result !== 'object' ||
+    result.result === null
+  ) {
+    return undefined;
+  }
+  const data = result.result as Record<string, unknown>;
+
+  if (typeof data.path === 'string') {
+    return {
+      path: data.path,
+      ...(typeof data.width === 'number' && typeof data.height === 'number'
+        ? { dimensions: { width: data.width, height: data.height } }
+        : {}),
+    };
+  }
+
+  if (typeof data.screenshot === 'object' && data.screenshot !== null) {
+    const ss = data.screenshot as Record<string, unknown>;
+    if (typeof ss.path === 'string') {
+      return {
+        path: ss.path,
+        ...(typeof ss.width === 'number' && typeof ss.height === 'number'
+          ? { dimensions: { width: ss.width, height: ss.height } }
+          : {}),
+      };
+    }
+  }
+
+  return undefined;
+}
+
+export type ServerInstance = {
+  start(): Promise<DaemonState>;
+  stop(): Promise<void>;
+};
+
+/**
+ * Extracts a structured outcome from a raw tool result for knowledge recording.
+ *
+ * @param toolResult - The raw result returned by a tool function.
+ * @returns A normalized outcome with ok status and optional error details.
+ */
+export function extractToolOutcome(toolResult: unknown): {
+  ok: boolean;
+  error?: { code: string; message: string };
+} {
+  if (
+    typeof toolResult !== 'object' ||
+    toolResult === null ||
+    !('ok' in toolResult)
+  ) {
+    return { ok: true };
+  }
+
+  const typed = toolResult as {
+    ok: boolean;
+    error?: { code: string; message: string };
+  };
+  if (typed.ok) {
+    return { ok: true };
+  }
+
+  return typed.error ? { ok: false, error: typed.error } : { ok: false };
+}
+
+/**
+ * Merges a tool result with observation data into the HTTP response body.
+ *
+ * @param toolResult - The raw result returned by a tool function.
+ * @param observations - Optional observation snapshot to attach.
+ * @returns The response body suitable for res.json().
+ */
+export function buildResponseBody(
+  toolResult: unknown,
+  observations: StepRecordObservation | undefined,
+): unknown {
+  if (typeof toolResult !== 'object' || toolResult === null) {
+    return toolResult;
+  }
+
+  if (!observations) {
+    return toolResult;
+  }
+
+  return { ...(toolResult as Record<string, unknown>), observations };
+}
+
+/**
+ * Whether to run Playwright observation collection for this tool invocation.
+ *
+ * Observations are always collected for the knowledge store, regardless of
+ * whether they appear in the HTTP response. The only exception is batch
+ * with `'none'` policy, which skips collection entirely for best performance.
+ *
+ * @param category - The tool category to check.
+ * @param validatedInput - The validated input payload (checked for batch policy).
+ * @returns True if observations should be collected.
+ */
+export function shouldCollectObservations(
+  category: ToolCategory,
+  validatedInput?: Record<string, unknown>,
+): boolean {
+  if (category === 'batch') {
+    const policy =
+      (validatedInput as { includeObservations?: string })
+        ?.includeObservations ?? 'all';
+    return policy !== 'none';
+  }
+  return true;
+}
+
+/**
+ * Whether to include observations in the HTTP response.
+ *
+ * @param category - The tool category.
+ * @param toolResult - The result returned by the tool.
+ * @param validatedInput - The validated input payload (used for batch policy).
+ * @returns True if observations should be included in the response.
+ */
+export function shouldIncludeObservationsInResponse(
+  category: ToolCategory,
+  toolResult: ToolResponse,
+  validatedInput?: Record<string, unknown>,
+): boolean {
+  if (category === 'mutating') {
+    return true;
+  }
+  if (category === 'batch') {
+    const policy =
+      (validatedInput as { includeObservations?: string })
+        ?.includeObservations ?? 'all';
+    if (policy === 'none') {
+      return false;
+    }
+    if (policy === 'failures') {
+      if (!toolResult.ok) {
+        return true;
+      }
+      const result = toolResult.result as Record<string, unknown>;
+      const summary = result?.summary as Record<string, unknown> | undefined;
+      return summary !== undefined && !summary.ok;
+    }
+    return true; // 'all'
+  }
+  return false; // readonly, discovery
+}
+
+/**
+ * Creates an HTTP daemon server for agent-driven browser testing.
+ *
+ * @param config - The server configuration options.
+ * @returns The server instance with start and stop methods.
+ */
+export function createServer(config: ServerConfig): ServerInstance {
+  const app = express();
+  const queue = new RequestQueue(config.requestTimeoutMs);
+  const nonce = randomUUID();
+  const knowledgeStore = config.knowledgeStore ?? new KnowledgeStore();
+
+  let httpServer: http.Server | null = null;
+  let worktreeRoot = '';
+  let startedAt = '';
+  let daemonPort = 0;
+  let workflowContext: WorkflowContext | null = null;
+  let subPorts: PortMap = {};
+  let shuttingDown = false;
+  let shutdownHandler: (() => void) | null = null;
+  let lastRequestTime = Date.now();
+  let idleCheckInterval: ReturnType<typeof setInterval> | null = null;
+  let lastObservation: StepRecordObservation | null = null;
+
+  // eslint-disable-next-line import-x/no-named-as-default-member
+  app.use(express.json({ limit: '10mb' }));
+
+  app.use((req, res, next) => {
+    lastRequestTime = Date.now();
+    const requestStartedAt = lastRequestTime;
+    res.on('finish', () => {
+      const duration = Date.now() - requestStartedAt;
+      appendLog(
+        config.logFilePath,
+        `[INFO] ${req.method} ${req.path} ${res.statusCode} ${duration}ms`,
+      );
+    });
+    next();
+  });
+
+  app.get('/health', (_req, res) => {
+    res.json({ status: 'ok', nonce });
+  });
+
+  app.get('/status', (_req, res) => {
+    res.json({
+      daemon: {
+        pid: process.pid,
+        port: daemonPort,
+        uptime: process.uptime(),
+        startedAt,
+      },
+      ports: subPorts,
+    });
+  });
+
+  /**
+   * Builds a lazy ToolContext where `page` and `refMap` are only accessed
+   * when a tool actually reads them, avoiding throws for non-session tools.
+   *
+   * @param wfCtx - The current workflow context to embed in the tool context.
+   * @returns A ToolContext with lazy page and refMap accessors.
+   */
+  function buildToolContext(wfCtx: WorkflowContext): ToolContext {
+    return {
+      sessionManager: config.sessionManager,
+      get page(): ReturnType<typeof config.sessionManager.getPage> {
+        return config.sessionManager.getPage();
+      },
+      get refMap(): Map<string, string> {
+        return config.sessionManager.hasActiveSession()
+          ? config.sessionManager.getRefMap()
+          : new Map<string, string>();
+      },
+      workflowContext: wfCtx,
+      knowledgeStore,
+      toolRegistry,
+    };
+  }
+
+  /**
+   * Records a tool execution step to the knowledge store.
+   * Failures are silently caught — recording must never block tool responses.
+   *
+   * @param toolName - The registered tool name.
+   * @param validatedInput - The validated input payload.
+   * @param outcome - The tool execution outcome.
+   * @param observation - The post-execution observation snapshot.
+   * @param toolResult - The raw tool result (for screenshot extraction).
+   * @param startTime - The epoch timestamp when execution started.
+   */
+  async function recordToolStep(
+    toolName: string,
+    validatedInput: unknown,
+    outcome: StepRecordOutcome,
+    observation: StepRecordObservation | undefined,
+    toolResult: unknown,
+    startTime: number,
+  ): Promise<void> {
+    try {
+      const sessionId = config.sessionManager.getSessionId();
+      if (!sessionId) {
+        return;
+      }
+
+      const target = extractTargetFromInput(validatedInput);
+      const screenshotInfo = extractScreenshotInfo(toolName, toolResult);
+
+      let executionContext: 'e2e' | 'prod' | undefined;
+      try {
+        executionContext = config.sessionManager.getEnvironmentMode();
+      } catch {
+        // session manager may not support environment mode
+      }
+
+      await knowledgeStore.recordStep({
+        sessionId,
+        toolName,
+        input: validatedInput as Record<string, unknown>,
+        target,
+        outcome,
+        observation:
+          observation ?? createDefaultObservation({} as ExtensionState),
+        durationMs: Date.now() - startTime,
+        ...(screenshotInfo ? { screenshotPath: screenshotInfo.path } : {}),
+        ...(screenshotInfo?.dimensions
+          ? { screenshotDimensions: screenshotInfo.dimensions }
+          : {}),
+        context: executionContext,
+      });
+    } catch {
+      // non-fatal: recording failure must not block tool responses
+    }
+  }
+
+  /**
+   * Shared tool executor — validates input, runs through the queue,
+   * records knowledge steps, and collects observations.
+   *
+   * @param toolName - The registered tool name to execute.
+   * @param rawInput - The unvalidated input payload from the request body.
+   * @param res - The Express response object to write the result to.
+   */
+  async function executeTool(
+    toolName: string,
+    rawInput: unknown,
+    res: express.Response,
+  ): Promise<void> {
+    const tool = toolRegistry.get(toolName);
+    if (!tool) {
+      res.status(404).json({
+        ok: false,
+        error: { code: 'TOOL_NOT_FOUND', message: `Unknown tool: ${toolName}` },
+      });
+      return;
+    }
+
+    if (!workflowContext) {
+      res.status(503).json({
+        ok: false,
+        error: {
+          code: 'SERVER_NOT_STARTED',
+          message: 'Server has not been started yet.',
+        },
+      });
+      return;
+    }
+
+    const schema =
+      toolName in toolSchemas ? toolSchemas[toolName as ToolName] : undefined;
+    let validatedInput = rawInput;
+
+    if (schema) {
+      const parsed = schema.safeParse(rawInput);
+      if (!parsed.success) {
+        res.status(400).json({
+          ok: false,
+          error: {
+            code: 'VALIDATION_ERROR',
+            message: parsed.error.issues
+              .map((i) =>
+                i.path.length > 0
+                  ? `${i.path.join('.')}: ${i.message}`
+                  : i.message,
+              )
+              .join('; '),
+          },
+        });
+        return;
+      }
+      validatedInput = parsed.data;
+    }
+
+    const startTime = Date.now();
+    const currentWorkflowContext = workflowContext;
+
+    const category = getToolCategory(toolName);
+
+    try {
+      const { toolResult, observations } = await queue.enqueue(async () => {
+        const context = buildToolContext(currentWorkflowContext);
+        const result = await tool(validatedInput, context);
+
+        let obs: StepRecordObservation | undefined;
+        if (
+          shouldCollectObservations(
+            category,
+            validatedInput as Record<string, unknown>,
+          ) &&
+          config.sessionManager.hasActiveSession()
+        ) {
+          try {
+            const page = config.sessionManager.getPage();
+
+            if (category === 'mutating') {
+              await page
+                .waitForLoadState('domcontentloaded')
+                .catch(() => undefined);
+              await page
+                .waitForFunction(
+                  async () =>
+                    new Promise<boolean>((resolve) => {
+                      requestAnimationFrame(() => {
+                        const allSettled = document
+                          .getAnimations()
+                          .every((a: Animation) => a.playState !== 'running');
+                        resolve(allSettled);
+                      });
+                    }),
+                  { timeout: 3000 },
+                )
+                .catch(() => undefined);
+            }
+            let state = await config.sessionManager.getExtensionState();
+
+            // Post-mutation recheck: if currentScreen is 'unknown' after a mutation,
+            // the extension's internal router may not have updated yet. Poll briefly.
+            if (category === 'mutating' && state.currentScreen === 'unknown') {
+              const RECHECK_DEADLINE_MS = 500;
+              const RECHECK_INTERVAL_MS = 100;
+              const deadline = Date.now() + RECHECK_DEADLINE_MS;
+
+              while (Date.now() < deadline) {
+                await new Promise<void>((resolve) =>
+                  setTimeout(resolve, RECHECK_INTERVAL_MS),
+                );
+                const rechecked =
+                  await config.sessionManager.getExtensionState();
+                if (rechecked.currentScreen !== 'unknown') {
+                  state = rechecked;
+                  break;
+                }
+              }
+            }
+            const testIds = await collectTestIds(
+              page,
+              OBSERVATION_TESTID_LIMIT,
+            );
+            const { nodes, refMap: newRefMap } =
+              await collectTrimmedA11ySnapshot(page);
+            config.sessionManager.setRefMap(newRefMap);
+            obs = createDefaultObservation(state, testIds, nodes);
+          } catch {
+            // non-fatal: observation failure must not block the tool response
+          }
+        }
+
+        return { toolResult: result, observations: obs };
+      });
+
+      await recordToolStep(
+        toolName,
+        validatedInput,
+        extractToolOutcome(toolResult),
+        observations,
+        toolResult,
+        startTime,
+      );
+
+      const includeInResponse = shouldIncludeObservationsInResponse(
+        category,
+        toolResult,
+        validatedInput as Record<string, unknown>,
+      );
+      const responseObservations =
+        includeInResponse && observations
+          ? compactObservation(observations, lastObservation)
+          : undefined;
+      res.json(buildResponseBody(toolResult, responseObservations));
+
+      if (
+        toolName === 'describe_screen' ||
+        toolName === 'launch' ||
+        toolName === 'cleanup'
+      ) {
+        lastObservation = null;
+      } else if (observations) {
+        lastObservation = observations;
+      }
+    } catch (error) {
+      await recordToolStep(
+        toolName,
+        validatedInput,
+        {
+          ok: false,
+          error: {
+            code: 'TOOL_EXECUTION_FAILED',
+            message: extractErrorMessage(error),
+          },
+        },
+        undefined,
+        undefined,
+        startTime,
+      );
+
+      res.status(500).json({
+        ok: false,
+        error: {
+          code: 'TOOL_EXECUTION_FAILED',
+          message: extractErrorMessage(error),
+        },
+      });
+    }
+  }
+
+  app.post('/launch', async (req, res) => {
+    await executeTool('launch', req.body, res);
+  });
+
+  app.post('/cleanup', async (_req, res) => {
+    await executeTool('cleanup', {}, res);
+  });
+
+  app.post(
+    '/tool/:name',
+    async (req: express.Request<{ name: string }>, res) => {
+      await executeTool(req.params.name, req.body, res);
+    },
+  );
+
+  app.use(
+    (
+      error: Error,
+      _req: express.Request,
+      res: express.Response,
+      _next: express.NextFunction,
+    ) => {
+      appendLog(config.logFilePath, `[ERROR] ${error.message}`);
+      res.status(500).json({
+        ok: false,
+        error: {
+          code: 'INTERNAL_ERROR',
+          message: error.message,
+        },
+      });
+    },
+  );
+
+  const instance: ServerInstance = {
+    async start(): Promise<DaemonState> {
+      worktreeRoot = execSync('git rev-parse --show-toplevel', {
+        cwd: process.cwd(),
+      })
+        .toString()
+        .trim();
+
+      try {
+        workflowContext = await config.contextFactory();
+      } catch (error) {
+        throw new Error(
+          `contextFactory failed during server startup: ${error instanceof Error ? error.message : String(error)}`,
+          { cause: error },
+        );
+      }
+
+      if (
+        !workflowContext ||
+        typeof workflowContext !== 'object' ||
+        !workflowContext.config ||
+        typeof workflowContext.config.environment !== 'string'
+      ) {
+        throw new Error(
+          'contextFactory must return an object with a valid config.environment field',
+        );
+      }
+
+      const rawPorts = workflowContext.allocatedPorts;
+      if (rawPorts !== undefined) {
+        if (typeof rawPorts !== 'object' || rawPorts === null) {
+          throw new Error('allocatedPorts must be a plain object');
+        }
+        for (const [key, val] of Object.entries(rawPorts)) {
+          if (typeof val !== 'number' || !Number.isFinite(val)) {
+            throw new Error(
+              `allocatedPorts["${key}"] must be a finite number, got ${String(val)}`,
+            );
+          }
+        }
+      }
+
+      subPorts = workflowContext.allocatedPorts ?? {};
+      config.sessionManager.setWorkflowContext(workflowContext);
+      startedAt = new Date().toISOString();
+
+      // Everything after setWorkflowContext may have side-effects the
+      // consumer expects to be cleaned up.  Wrap in try/catch so a
+      // listen() or writeDaemonState() failure still runs cleanup.
+      try {
+        // Bind daemon directly to port 0 to eliminate TOCTOU race —
+        // the OS assigns the port atomically at listen time.
+        httpServer = await new Promise<http.Server>((resolve, reject) => {
+          const srv = http.createServer(app);
+          srv.listen(0, '127.0.0.1', () => {
+            const addr = srv.address();
+            if (addr && typeof addr !== 'string') {
+              daemonPort = addr.port;
+            }
+            resolve(srv);
+          });
+          srv.on('error', reject);
+        });
+
+        const state: DaemonState = {
+          port: daemonPort,
+          pid: process.pid,
+          startedAt,
+          nonce,
+          version: pkg.version,
+          subPorts,
+        };
+
+        await writeDaemonState(worktreeRoot, state);
+        appendLog(
+          config.logFilePath,
+          `[INFO] Daemon started on port ${daemonPort} (pid ${process.pid})`,
+        );
+
+        shutdownHandler = (): void => {
+          instance
+            .stop()
+            .then(() => process.exit(0))
+            .catch((error: Error) => {
+              appendLog(
+                config.logFilePath,
+                `[ERROR] Daemon failed to shut down: ${error.message}`,
+              );
+              process.exit(1);
+            });
+        };
+
+        process.on('SIGTERM', shutdownHandler);
+        process.on('SIGINT', shutdownHandler);
+
+        const { idleShutdownMs } = config;
+        if (idleShutdownMs && idleShutdownMs > 0) {
+          const checkMs = Math.min(idleShutdownMs / 10, 60_000);
+          idleCheckInterval = setInterval(() => {
+            if (Date.now() - lastRequestTime > idleShutdownMs) {
+              appendLog(
+                config.logFilePath,
+                '[INFO] Idle timeout reached, shutting down',
+              );
+              if (idleCheckInterval) {
+                clearInterval(idleCheckInterval);
+                idleCheckInterval = null;
+              }
+              shutdownHandler?.();
+            }
+          }, checkMs);
+          idleCheckInterval.unref();
+        }
+
+        return state;
+      } catch (startupError) {
+        // Best-effort rollback: close the HTTP server if it was created,
+        // then let the session manager clean up any resources the
+        // contextFactory may have started.
+        const serverToClose = httpServer;
+        if (serverToClose) {
+          await new Promise<void>((resolve) => {
+            serverToClose.close(() => {
+              httpServer = null;
+              resolve();
+            });
+          });
+        }
+        try {
+          await config.sessionManager.cleanup();
+        } catch {
+          // Swallow — we're already propagating startupError.
+        }
+        workflowContext = null; // eslint-disable-line require-atomic-updates
+        subPorts = {};
+        throw startupError;
+      }
+    },
+
+    async stop(): Promise<void> {
+      if (shuttingDown) {
+        return;
+      }
+      shuttingDown = true;
+
+      appendLog(config.logFilePath, '[INFO] Daemon shutting down');
+
+      // 1. Remove signal handlers
+      if (shutdownHandler) {
+        process.removeListener('SIGTERM', shutdownHandler);
+        process.removeListener('SIGINT', shutdownHandler);
+        shutdownHandler = null;
+      }
+
+      // 2. Clear idle check interval
+      if (idleCheckInterval) {
+        clearInterval(idleCheckInterval);
+        idleCheckInterval = null;
+      }
+
+      // 3. Stop accepting new connections, wait for in-flight (max 10s)
+      await new Promise<void>((resolve) => {
+        if (!httpServer) {
+          resolve();
+          return;
+        }
+
+        const forceClose = setTimeout(() => {
+          httpServer?.closeAllConnections();
+          resolve();
+        }, 10_000);
+
+        httpServer.close(() => {
+          clearTimeout(forceClose);
+          httpServer = null;
+          resolve();
+        });
+      });
+
+      // 4. Clean up session
+      try {
+        await config.sessionManager.cleanup();
+      } catch (error) {
+        appendLog(
+          config.logFilePath,
+          `[ERROR] Cleanup failed: ${extractErrorMessage(error)}`,
+          true,
+        );
+      }
+
+      // 5. Remove .mm-server file
+      if (worktreeRoot) {
+        await removeDaemonState(worktreeRoot);
+      }
+
+      appendLog(config.logFilePath, '[INFO] Daemon stopped');
+    },
+  };
+
+  return instance;
+}
+
+/**
+ * Appends a timestamped line to the daemon log file.
+ *
+ * @param logFilePath - Path to the log file, or undefined to skip file logging.
+ * @param message - The log message to append.
+ * @param fatal - Whether to also write to stderr.
+ */
+function appendLog(
+  logFilePath: string | undefined,
+  message: string,
+  fatal = false,
+): void {
+  const line = `[${new Date().toISOString()}] ${message}\n`;
+  if (fatal) {
+    process.stderr.write(line);
+  }
+  if (logFilePath) {
+    fs.appendFile(logFilePath, line, 'utf-8').catch((error) => {
+      process.stderr.write(`Failed to write log: ${error.message}\n`);
+    });
+  }
+}
diff --git a/src/server/daemon-state.test.ts b/src/server/daemon-state.test.ts
new file mode 100644
index 0000000..f8a26be
--- /dev/null
+++ b/src/server/daemon-state.test.ts
@@ -0,0 +1,233 @@
+/* eslint-disable n/no-unsupported-features/node-builtins */
+import * as fs from 'node:fs/promises';
+import * as os from 'node:os';
+import * as path from 'node:path';
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+
+import {
+  writeDaemonState,
+  readDaemonState,
+  removeDaemonState,
+  acquireStartupLock,
+  releaseStartupLock,
+  isDaemonAlive,
+  isDaemonVersionMatch,
+  generateNonce,
+} from './daemon-state.js';
+import pkg from '../../package.json';
+import type { DaemonState } from '../types/http.js';
+
+const tmpDir = path.join(os.tmpdir(), `mm-daemon-state-test-${Date.now()}`);
+
+const mockState: DaemonState = {
+  port: 12345,
+  pid: process.pid,
+  startedAt: new Date().toISOString(),
+  nonce: 'test-nonce-abc',
+  version: pkg.version,
+  subPorts: { serviceA: 3001, serviceB: 3002 },
+};
+
+describe('daemon-state', () => {
+  beforeEach(async () => {
+    await fs.mkdir(tmpDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    await fs.rm(tmpDir, { recursive: true, force: true });
+  });
+
+  describe('writeDaemonState / readDaemonState', () => {
+    it('writes and reads state atomically', async () => {
+      await writeDaemonState(tmpDir, mockState);
+      const read = await readDaemonState(tmpDir);
+      expect(read).toStrictEqual(mockState);
+    });
+
+    it('overwrites existing state', async () => {
+      await writeDaemonState(tmpDir, mockState);
+      const updated: DaemonState = { ...mockState, port: 99999 };
+      await writeDaemonState(tmpDir, updated);
+      const read = await readDaemonState(tmpDir);
+      expect(read?.port).toBe(99999);
+    });
+  });
+
+  describe('readDaemonState', () => {
+    it('returns null when file does not exist', async () => {
+      const result = await readDaemonState(tmpDir);
+      expect(result).toBeNull();
+    });
+
+    it('returns null for invalid JSON', async () => {
+      await fs.writeFile(path.join(tmpDir, '.mm-server'), 'not-json', 'utf-8');
+      const result = await readDaemonState(tmpDir);
+      expect(result).toBeNull();
+    });
+  });
+
+  describe('removeDaemonState', () => {
+    it('removes the state file', async () => {
+      await writeDaemonState(tmpDir, mockState);
+      await removeDaemonState(tmpDir);
+      const result = await readDaemonState(tmpDir);
+      expect(result).toBeNull();
+    });
+
+    it('does not throw when file does not exist', async () => {
+      expect(await removeDaemonState(tmpDir)).toBeUndefined();
+    });
+  });
+
+  describe('isDaemonAlive', () => {
+    it('returns false for an unreachable port', async () => {
+      const alive = await isDaemonAlive({ ...mockState, port: 1 });
+      expect(alive).toBe(false);
+    });
+
+    it('returns false when response.ok is false', async () => {
+      vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+        ok: false,
+      } as Response);
+
+      const alive = await isDaemonAlive(mockState);
+
+      expect(alive).toBe(false);
+    });
+
+    it('returns false when nonce does not match', async () => {
+      vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+        ok: true,
+        json: vi.fn().mockResolvedValue({ nonce: 'different-nonce' }),
+      } as unknown as Response);
+
+      const alive = await isDaemonAlive({
+        ...mockState,
+        nonce: 'expected-nonce',
+      });
+
+      expect(alive).toBe(false);
+    });
+  });
+
+  describe('acquireStartupLock / releaseStartupLock', () => {
+    it('creates the lock file and writes the current pid', async () => {
+      const acquired = await acquireStartupLock(tmpDir);
+
+      expect(acquired).toBe(true);
+      expect(
+        await fs.readFile(path.join(tmpDir, '.mm-server.lock'), 'utf-8'),
+      ).toBe(`${process.pid}\n`);
+    });
+
+    it('returns false when another process holds a fresh lock', async () => {
+      await fs.writeFile(
+        path.join(tmpDir, '.mm-server.lock'),
+        `${process.pid}\n`,
+      );
+
+      const acquired = await acquireStartupLock(tmpDir);
+
+      expect(acquired).toBe(false);
+    });
+
+    it('reclaims a stale lock by age', async () => {
+      const lockPath = path.join(tmpDir, '.mm-server.lock');
+      const staleTime = new Date(Date.now() - 31_000);
+
+      await fs.writeFile(lockPath, `${process.pid}\n`);
+      await fs.utimes(lockPath, staleTime, staleTime);
+
+      const acquired = await acquireStartupLock(tmpDir);
+
+      expect(acquired).toBe(true);
+      expect(await fs.readFile(lockPath, 'utf-8')).toBe(`${process.pid}\n`);
+    });
+
+    it('reclaims a stale lock for a dead pid', async () => {
+      const lockPath = path.join(tmpDir, '.mm-server.lock');
+
+      await fs.writeFile(lockPath, '999999\n');
+
+      const acquired = await acquireStartupLock(tmpDir);
+
+      expect(acquired).toBe(true);
+      expect(await fs.readFile(lockPath, 'utf-8')).toBe(`${process.pid}\n`);
+    });
+
+    it('returns false when stale lock check errors', async () => {
+      await fs.writeFile(path.join(tmpDir, '.mm-server.lock'), '12345\n');
+      await fs.chmod(path.join(tmpDir, '.mm-server.lock'), 0o000);
+
+      const acquired = await acquireStartupLock(tmpDir);
+
+      expect(acquired).toBe(false);
+    });
+
+    it('throws when lock creation fails with a non-EEXIST error', async () => {
+      await fs.rm(tmpDir, { recursive: true, force: true });
+
+      await expect(acquireStartupLock(tmpDir)).rejects.toMatchObject({
+        code: 'ENOENT',
+      });
+    });
+
+    it('removes the lock file', async () => {
+      const lockPath = path.join(tmpDir, '.mm-server.lock');
+
+      await fs.writeFile(lockPath, `${process.pid}\n`);
+      await releaseStartupLock(tmpDir);
+
+      await expect(fs.access(lockPath)).rejects.toMatchObject({
+        code: 'ENOENT',
+      });
+    });
+
+    it('ignores ENOENT when releasing the lock', async () => {
+      expect(await releaseStartupLock(tmpDir)).toBeUndefined();
+    });
+
+    it('throws when lock release fails with a non-ENOENT error', async () => {
+      await fs.mkdir(path.join(tmpDir, '.mm-server.lock'));
+
+      // Linux returns EISDIR, macOS returns EPERM for unlink on a directory
+      await expect(releaseStartupLock(tmpDir)).rejects.toMatchObject({
+        code: expect.stringMatching(/^(EPERM|EISDIR)$/u),
+      });
+    });
+  });
+
+  describe('isDaemonVersionMatch', () => {
+    it('returns true when version matches package.json version', () => {
+      expect(isDaemonVersionMatch(mockState)).toBe(true);
+    });
+
+    it('returns false when version differs', () => {
+      expect(isDaemonVersionMatch({ ...mockState, version: '0.0.0' })).toBe(
+        false,
+      );
+    });
+
+    it('returns false when version is absent (pre-version-tracking daemon)', () => {
+      const { version: _, ...stateWithoutVersion } = mockState;
+      expect(isDaemonVersionMatch(stateWithoutVersion as DaemonState)).toBe(
+        false,
+      );
+    });
+  });
+
+  describe('generateNonce', () => {
+    it('returns a non-empty string', () => {
+      const nonce = generateNonce();
+      expect(typeof nonce).toBe('string');
+      expect(nonce.length).toBeGreaterThan(0);
+    });
+
+    it('returns unique values on successive calls', () => {
+      const a = generateNonce();
+      const b = generateNonce();
+      expect(a).not.toBe(b);
+    });
+  });
+});
diff --git a/src/server/daemon-state.ts b/src/server/daemon-state.ts
new file mode 100644
index 0000000..d3cfe9a
--- /dev/null
+++ b/src/server/daemon-state.ts
@@ -0,0 +1,209 @@
+import { randomUUID } from 'node:crypto';
+import { constants } from 'node:fs';
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+
+import pkg from '../../package.json';
+import type { DaemonState } from '../types/http.js';
+
+const DAEMON_STATE_FILE = '.mm-server';
+const DAEMON_STATE_TMP_FILE = '.mm-server.tmp';
+const DAEMON_LOCK_FILE = '.mm-server.lock';
+const LOCK_STALE_MS = 30_000;
+
+/**
+ * Writes daemon state atomically using rename pattern.
+ * Writes to .mm-server.tmp first, then renames to .mm-server.
+ *
+ * @param worktreeRoot - Absolute path to the git worktree root.
+ * @param state - The daemon state to persist.
+ */
+export async function writeDaemonState(
+  worktreeRoot: string,
+  state: DaemonState,
+): Promise<void> {
+  const tmpPath = path.join(worktreeRoot, DAEMON_STATE_TMP_FILE);
+  const finalPath = path.join(worktreeRoot, DAEMON_STATE_FILE);
+  await fs.writeFile(tmpPath, JSON.stringify(state, null, 2), 'utf-8');
+  await fs.rename(tmpPath, finalPath);
+}
+
+/**
+ * Reads daemon state from .mm-server file.
+ * Returns null if file doesn't exist, JSON is invalid, or required fields are missing.
+ *
+ * @param worktreeRoot - Absolute path to the git worktree root.
+ * @returns The parsed daemon state, or null if unavailable.
+ */
+export async function readDaemonState(
+  worktreeRoot: string,
+): Promise<DaemonState | null> {
+  const filePath = path.join(worktreeRoot, DAEMON_STATE_FILE);
+  try {
+    const content = await fs.readFile(filePath, 'utf-8');
+    const parsed = JSON.parse(content) as Record<string, unknown>;
+    if (
+      typeof parsed.port !== 'number' ||
+      typeof parsed.pid !== 'number' ||
+      typeof parsed.nonce !== 'string' ||
+      typeof parsed.startedAt !== 'string'
+    ) {
+      return null;
+    }
+    return parsed as DaemonState;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Removes the .mm-server file.
+ * Silently ignores if file doesn't exist.
+ *
+ * @param worktreeRoot - Absolute path to the git worktree root.
+ */
+export async function removeDaemonState(worktreeRoot: string): Promise<void> {
+  const filePath = path.join(worktreeRoot, DAEMON_STATE_FILE);
+  try {
+    await fs.unlink(filePath);
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
+      throw error;
+    }
+  }
+}
+
+/**
+ * Checks if a daemon is alive by sending GET /health and verifying the nonce.
+ * Returns false if connection refused, timeout, or nonce mismatch.
+ *
+ * @param state - The daemon state containing port and nonce to verify.
+ * @returns Whether the daemon is responding and matches the expected nonce.
+ */
+export async function isDaemonAlive(state: DaemonState): Promise<boolean> {
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 2000);
+    try {
+      const response = await fetch(`http://127.0.0.1:${state.port}/health`, {
+        signal: controller.signal,
+      });
+      if (!response.ok) {
+        return false;
+      }
+      const body = (await response.json()) as { nonce?: string };
+      return body.nonce === state.nonce;
+    } finally {
+      clearTimeout(timeout);
+    }
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Checks whether the daemon's package version matches the current CLI version.
+ * Returns false if the daemon state has no version (pre-version-tracking daemon).
+ *
+ * @param state - The daemon state to check.
+ * @returns Whether the versions match.
+ */
+export function isDaemonVersionMatch(state: DaemonState): boolean {
+  return state.version === pkg.version;
+}
+
+/**
+ * Generates a new random nonce for daemon identification.
+ *
+ * @returns A UUID string.
+ */
+export function generateNonce(): string {
+  return randomUUID();
+}
+
+/**
+ * Acquires an exclusive startup lock for the worktree.
+ * Uses O_CREAT | O_EXCL to atomically create the lock file — if it already
+ * exists, checks whether the lock is stale (dead PID or older than 30s)
+ * and reclaims it if so.
+ *
+ * @param worktreeRoot - Absolute path to the git worktree root.
+ * @returns true if the lock was acquired, false if another process holds it.
+ */
+export async function acquireStartupLock(
+  worktreeRoot: string,
+): Promise<boolean> {
+  const lockPath = path.join(worktreeRoot, DAEMON_LOCK_FILE);
+  try {
+    // eslint-disable-next-line no-bitwise
+    const flags = constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY;
+    const fd = await fs.open(lockPath, flags);
+    await fd.write(`${process.pid}\n`);
+    await fd.close();
+    return true;
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'EEXIST') {
+      if (await isLockStale(lockPath)) {
+        try {
+          await fs.unlink(lockPath);
+        } catch {
+          return false;
+        }
+        return acquireStartupLock(worktreeRoot);
+      }
+      return false;
+    }
+    throw error;
+  }
+}
+
+/**
+ * Checks whether a lock file is stale by examining PID liveness and file age.
+ *
+ * @param lockPath - Absolute path to the lock file.
+ * @returns true if the lock holder is dead or the file is older than LOCK_STALE_MS.
+ */
+async function isLockStale(lockPath: string): Promise<boolean> {
+  try {
+    const [content, stat] = await Promise.all([
+      fs.readFile(lockPath, 'utf-8'),
+      fs.stat(lockPath),
+    ]);
+
+    const ageMs = Date.now() - stat.mtimeMs;
+    if (ageMs > LOCK_STALE_MS) {
+      return true;
+    }
+
+    const pid = parseInt(content.trim(), 10);
+    if (!isNaN(pid)) {
+      try {
+        process.kill(pid, 0);
+        return false;
+      } catch {
+        return true;
+      }
+    }
+
+    return false;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Releases the startup lock for the worktree.
+ * Silently ignores if the lock file doesn't exist.
+ *
+ * @param worktreeRoot - Absolute path to the git worktree root.
+ */
+export async function releaseStartupLock(worktreeRoot: string): Promise<void> {
+  const lockPath = path.join(worktreeRoot, DAEMON_LOCK_FILE);
+  try {
+    await fs.unlink(lockPath);
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
+      throw error;
+    }
+  }
+}
diff --git a/src/server/observation-compaction.test.ts b/src/server/observation-compaction.test.ts
new file mode 100644
index 0000000..033c41f
--- /dev/null
+++ b/src/server/observation-compaction.test.ts
@@ -0,0 +1,561 @@
+import { describe, expect, it, vi } from 'vitest';
+
+import {
+  collapseOptionSubtrees,
+  compactObservation,
+  diffObservation,
+  nodeChanged,
+  observationCompactionDeps,
+} from './observation-compaction.js';
+import type { A11yNodeTrimmed } from '../tools/types/discovery.js';
+import type { StepRecordObservation } from '../tools/types/step-record.js';
+
+function createNode(
+  ref: string,
+  role: string,
+  overrides: Partial<A11yNodeTrimmed> = {},
+): A11yNodeTrimmed {
+  return {
+    ref,
+    role,
+    name: overrides.name ?? `${role}-${ref}`,
+    path: overrides.path ?? ['root', ref],
+    ...overrides,
+  };
+}
+
+function createOptionRun(count: number, start = 1): A11yNodeTrimmed[] {
+  return Array.from({ length: count }, (_, index) => {
+    const refNumber = start + index;
+    return createNode(`e${refNumber}`, 'option', {
+      name: `Option ${refNumber}`,
+      path: ['root', 'combo', `option-${refNumber}`],
+    });
+  });
+}
+
+function createObservation(
+  nodes: A11yNodeTrimmed[],
+  overrides: Partial<StepRecordObservation> = {},
+): StepRecordObservation {
+  return {
+    state: overrides.state ?? ({} as StepRecordObservation['state']),
+    testIds: overrides.testIds ?? [],
+    a11y: {
+      nodes,
+      ...(overrides.a11y?.diff ? { diff: overrides.a11y.diff } : {}),
+    },
+    ...(overrides.priorKnowledge
+      ? { priorKnowledge: overrides.priorKnowledge }
+      : {}),
+  } as StepRecordObservation;
+}
+
+describe('collapseOptionSubtrees', () => {
+  it('collapses 55 options after a combobox into a summary node', () => {
+    const combobox = createNode('e1', 'combobox', {
+      name: 'Select network',
+      path: ['root', 'combobox'],
+    });
+    const nodes = [combobox, ...createOptionRun(55, 2)];
+
+    const result = collapseOptionSubtrees(nodes);
+
+    expect(result).toHaveLength(2);
+    expect(result[0]).toBe(combobox);
+    expect(result[1]).toStrictEqual({
+      ref: 'e2\u2013e56',
+      role: 'option',
+      name: '55 options (refs e2\u2013e56)',
+      path: ['root', 'combo', 'option-2'],
+    });
+  });
+
+  it('does not collapse runs below the threshold', () => {
+    const combobox = createNode('e1', 'combobox');
+    const optionOne = createNode('e2', 'option');
+    const optionTwo = createNode('e3', 'option');
+
+    const result = collapseOptionSubtrees([combobox, optionOne, optionTwo]);
+
+    expect(result).toHaveLength(3);
+    expect(result).toStrictEqual([combobox, optionOne, optionTwo]);
+  });
+
+  it('leaves bare options unchanged when no combobox or listbox precedes them', () => {
+    const options = createOptionRun(4);
+
+    const result = collapseOptionSubtrees(options);
+
+    expect(result).toStrictEqual(options);
+  });
+
+  it('handles multiple combobox and listbox groups independently', () => {
+    const firstCombobox = createNode('e1', 'combobox', {
+      path: ['root', 'first-combobox'],
+    });
+    const separator = createNode('e12', 'button', {
+      name: 'Continue',
+      path: ['root', 'separator'],
+    });
+    const secondListbox = createNode('e13', 'listbox', {
+      path: ['root', 'second-listbox'],
+    });
+    const nodes = [
+      firstCombobox,
+      ...createOptionRun(10, 2),
+      separator,
+      secondListbox,
+      ...createOptionRun(5, 14),
+    ];
+
+    const result = collapseOptionSubtrees(nodes);
+
+    expect(result).toStrictEqual([
+      firstCombobox,
+      {
+        ref: 'e2\u2013e11',
+        role: 'option',
+        name: '10 options (refs e2\u2013e11)',
+        path: ['root', 'combo', 'option-2'],
+      },
+      separator,
+      secondListbox,
+      {
+        ref: 'e14\u2013e18',
+        role: 'option',
+        name: '5 options (refs e14\u2013e18)',
+        path: ['root', 'combo', 'option-14'],
+      },
+    ]);
+  });
+
+  it('preserves already-collapsed option summaries as a single entry', () => {
+    const combobox = createNode('e1', 'combobox');
+    const collapsedSummary = createNode('e2\u2013e6', 'option', {
+      name: '5 options (refs e2\u2013e6)',
+      path: ['root', 'combo', 'option-2'],
+    });
+
+    const result = collapseOptionSubtrees([combobox, collapsedSummary]);
+
+    expect(result).toStrictEqual([combobox, collapsedSummary]);
+  });
+
+  it('does not collapse when a non-option node immediately follows the combobox', () => {
+    const combobox = createNode('e1', 'combobox');
+    const button = createNode('e2', 'button', { name: 'Apply' });
+    const options = createOptionRun(3, 3);
+
+    const result = collapseOptionSubtrees([combobox, button, ...options]);
+
+    expect(result).toStrictEqual([combobox, button, ...options]);
+  });
+
+  it('treats malformed option range refs as single options during compaction', () => {
+    const combobox = createNode('e1', 'combobox');
+    const malformedSummary = createNode(
+      `e${'9'.repeat(400)}\u2013e2`,
+      'option',
+      {
+        name: 'Malformed range',
+        path: ['root', 'combo', 'option-weird'],
+      },
+    );
+    const optionTwo = createNode('e3', 'option', {
+      name: 'Option 3',
+      path: ['root', 'combo', 'option-3'],
+    });
+    const optionThree = createNode('e4', 'option', {
+      name: 'Option 4',
+      path: ['root', 'combo', 'option-4'],
+    });
+
+    const result = collapseOptionSubtrees([
+      combobox,
+      malformedSummary,
+      optionTwo,
+      optionThree,
+    ]);
+
+    expect(result).toStrictEqual([
+      combobox,
+      {
+        ref: `${malformedSummary.ref}\u2013e4`,
+        role: 'option',
+        name: `3 options (refs ${malformedSummary.ref}\u2013e4)`,
+        path: ['root', 'combo', 'option-weird'],
+      },
+    ]);
+  });
+});
+
+describe('compactObservation', () => {
+  it('preserves non-a11y fields by reference while returning a new object', () => {
+    const state = { connected: true };
+    const testIds = [{ testId: 'submit', tag: 'button', visible: true }];
+    const priorKnowledge = { schemaVersion: 1, notes: ['cached'] };
+    const observation = {
+      state,
+      testIds,
+      a11y: {
+        nodes: [createNode('e1', 'combobox'), ...createOptionRun(4, 2)],
+      },
+      priorKnowledge,
+    } as unknown as StepRecordObservation;
+
+    const result = compactObservation(observation);
+
+    expect(result).not.toBe(observation);
+    expect(result.state).toBe(state);
+    expect(result.testIds).toBe(testIds);
+    expect(result.priorKnowledge).toBe(priorKnowledge);
+    expect(result.a11y).not.toBe(observation.a11y);
+    expect(result.a11y.nodes).toStrictEqual([
+      observation.a11y.nodes[0],
+      {
+        ref: 'e2\u2013e5',
+        role: 'option',
+        name: '4 options (refs e2\u2013e5)',
+        path: ['root', 'combo', 'option-2'],
+      },
+    ]);
+  });
+
+  it('is idempotent when called repeatedly on the same result', () => {
+    const observation = {
+      state: {},
+      testIds: [],
+      a11y: {
+        nodes: [createNode('e1', 'listbox'), ...createOptionRun(6, 2)],
+      },
+    } as unknown as StepRecordObservation;
+
+    const first = compactObservation(observation);
+    const second = compactObservation(first);
+
+    expect(second).toStrictEqual(first);
+  });
+
+  it('returns a diff when the previous observation produces a smaller payload', () => {
+    const previous = createObservation([
+      createNode('e1', 'button', { name: 'Continue' }),
+      createNode('e2', 'button', { name: 'Cancel' }),
+    ]);
+    const current = createObservation([
+      createNode('e1', 'button', { name: 'Continue' }),
+      createNode('e3', 'button', { name: 'Confirm' }),
+    ]);
+
+    const result = compactObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([
+      createNode('e3', 'button', { name: 'Confirm' }),
+    ]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: ['e3'],
+      removed: ['e2'],
+      unchanged: 1,
+    });
+  });
+
+  it('returns the full compacted observation when the diff is not smaller', () => {
+    const previous = createObservation([createNode('e1', 'button')]);
+    const current = createObservation(
+      Array.from({ length: 10 }, (_, index) =>
+        createNode(`e${index + 10}`, 'button', { name: `Action ${index + 1}` }),
+      ),
+    );
+
+    const result = compactObservation(current, previous);
+
+    expect(result.a11y.diff).toBeUndefined();
+    expect(result.a11y.nodes).toStrictEqual(current.a11y.nodes);
+  });
+
+  it('falls back to the original observation when compaction throws', () => {
+    const observation = {
+      state: {},
+      testIds: [],
+      a11y: { nodes: [createNode('e1', 'combobox')] },
+    } as unknown as StepRecordObservation;
+    const collapseSpy = vi
+      .spyOn(observationCompactionDeps, 'collapseOptionSubtrees')
+      .mockImplementation(() => {
+        throw new Error('boom');
+      });
+
+    const result = compactObservation(observation);
+
+    expect(result).toBe(observation);
+    collapseSpy.mockRestore();
+  });
+
+  it('handles empty node arrays gracefully', () => {
+    const observation = {
+      state: {},
+      testIds: [],
+      a11y: { nodes: [] },
+    } as unknown as StepRecordObservation;
+
+    const result = compactObservation(observation);
+
+    expect(result).not.toBe(observation);
+    expect(result.a11y.nodes).toStrictEqual([]);
+  });
+
+  describe('activeTab passthrough', () => {
+    it('preserves state.activeTab when present', () => {
+      const state = {
+        isLoaded: true,
+        currentUrl: 'chrome-extension://extension/home.html',
+        extensionId: 'extension-id',
+        isUnlocked: true,
+        currentScreen: 'home',
+        accountAddress: '0x123',
+        networkName: 'Ethereum Mainnet',
+        chainId: 1,
+        balance: '1 ETH',
+        activeTab: {
+          role: 'dapp',
+          url: 'https://app.uniswap.org/',
+          title: 'Uniswap',
+        },
+      } satisfies StepRecordObservation['state'];
+      const observation = createObservation(
+        [createNode('e1', 'listbox'), ...createOptionRun(6, 2)],
+        { state },
+      );
+
+      const result = compactObservation(observation);
+
+      expect(result.state).toBe(state);
+      expect(result.state.activeTab).toStrictEqual(state.activeTab);
+    });
+
+    it('works when state.activeTab is undefined (backward compat)', () => {
+      const state = {
+        isLoaded: true,
+        currentUrl: 'chrome-extension://extension/home.html',
+        extensionId: 'extension-id',
+        isUnlocked: false,
+        currentScreen: 'unlock',
+        accountAddress: null,
+        networkName: null,
+        chainId: null,
+        balance: null,
+      } satisfies StepRecordObservation['state'];
+      const observation = createObservation(
+        [createNode('e1', 'listbox'), ...createOptionRun(6, 2)],
+        { state },
+      );
+
+      const result = compactObservation(observation);
+
+      expect(result.state).toBe(state);
+      expect(result.state.activeTab).toBeUndefined();
+    });
+  });
+});
+
+describe('nodeChanged', () => {
+  it('returns true when the name changes', () => {
+    const previous = createNode('e1', 'button', { name: 'Continue' });
+    const current = createNode('e1', 'button', { name: 'Confirm' });
+
+    expect(nodeChanged(current, previous)).toBe(true);
+  });
+
+  it('returns true when the role changes', () => {
+    const previous = createNode('e1', 'button');
+    const current = createNode('e1', 'link');
+
+    expect(nodeChanged(current, previous)).toBe(true);
+  });
+
+  it('returns true when the path changes', () => {
+    const previous = createNode('e1', 'button', { path: ['root', 'page'] });
+    const current = createNode('e1', 'button', {
+      path: ['root', 'dialog', 'page'],
+    });
+
+    expect(nodeChanged(current, previous)).toBe(true);
+  });
+
+  it('returns true when the disabled state changes', () => {
+    const previous = createNode('e1', 'button', { disabled: false });
+    const current = createNode('e1', 'button', { disabled: true });
+
+    expect(nodeChanged(current, previous)).toBe(true);
+  });
+
+  it('returns false for identical nodes', () => {
+    const previous = createNode('e1', 'checkbox', {
+      checked: true,
+      expanded: false,
+      testId: 'accept',
+      textContent: 'Accept terms',
+      path: ['root', 'form', 'accept'],
+    });
+    const current = createNode('e1', 'checkbox', {
+      checked: true,
+      expanded: false,
+      testId: 'accept',
+      textContent: 'Accept terms',
+      path: ['root', 'form', 'accept'],
+    });
+
+    expect(nodeChanged(current, previous)).toBe(false);
+  });
+
+  it('does not compare refs', () => {
+    const previous = createNode('e1', 'button', {
+      name: 'Continue',
+      path: ['root', 'actions'],
+    });
+    const current = createNode('e999', 'button', {
+      name: 'Continue',
+      path: ['root', 'actions'],
+    });
+
+    expect(nodeChanged(current, previous)).toBe(false);
+  });
+});
+
+describe('diffObservation', () => {
+  it('tracks added nodes and omits unchanged nodes from the diff payload', () => {
+    const stable = createNode('e1', 'button', { name: 'Continue' });
+    const added = createNode('e2', 'button', { name: 'Cancel' });
+    const previous = createObservation([stable]);
+    const current = createObservation([stable, added]);
+
+    const result = diffObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([added]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: ['e2'],
+      removed: [],
+      unchanged: 1,
+    });
+  });
+
+  it('tracks removed nodes without including them in nodes', () => {
+    const stable = createNode('e1', 'button', { name: 'Continue' });
+    const removed = createNode('e2', 'button', { name: 'Cancel' });
+    const previous = createObservation([stable, removed]);
+    const current = createObservation([stable]);
+
+    const result = diffObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: [],
+      removed: ['e2'],
+      unchanged: 1,
+    });
+  });
+
+  it('includes changed nodes without marking them as added or removed', () => {
+    const previous = createObservation([
+      createNode('e1', 'button', { disabled: false, name: 'Continue' }),
+    ]);
+    const changed = createNode('e1', 'button', {
+      disabled: true,
+      name: 'Continue',
+    });
+    const current = createObservation([changed]);
+
+    const result = diffObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([changed]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: [],
+      removed: [],
+      unchanged: 0,
+    });
+  });
+
+  it('returns an empty diff payload when nothing changed', () => {
+    const previous = createObservation([
+      createNode('e1', 'button'),
+      createNode('e2', 'checkbox', { checked: true }),
+    ]);
+    const current = createObservation([
+      createNode('e1', 'button'),
+      createNode('e2', 'checkbox', { checked: true }),
+    ]);
+
+    const result = diffObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: [],
+      removed: [],
+      unchanged: 2,
+    });
+  });
+
+  it('supports mixed added, removed, changed, and unchanged nodes', () => {
+    const unchangedNodes = Array.from({ length: 5 }, (_, index) =>
+      createNode(`u${index + 1}`, 'button', { name: `Stable ${index + 1}` }),
+    );
+    const previous = createObservation([
+      ...unchangedNodes,
+      createNode('c1', 'button', { disabled: false, name: 'Changed' }),
+      createNode('r1', 'button', { name: 'Removed' }),
+    ]);
+    const changed = createNode('c1', 'button', {
+      disabled: true,
+      name: 'Changed',
+    });
+    const addedOne = createNode('a1', 'button', { name: 'Added 1' });
+    const addedTwo = createNode('a2', 'button', { name: 'Added 2' });
+    const current = createObservation([
+      ...unchangedNodes,
+      changed,
+      addedOne,
+      addedTwo,
+    ]);
+
+    const result = diffObservation(current, previous);
+
+    expect(result.a11y.nodes).toStrictEqual([changed, addedOne, addedTwo]);
+    expect(result.a11y.diff).toStrictEqual({
+      added: ['a1', 'a2'],
+      removed: ['r1'],
+      unchanged: 5,
+    });
+  });
+
+  it('preserves the current state and testIds', () => {
+    const state = {
+      mode: 'current',
+    } as unknown as StepRecordObservation['state'];
+    const testIds = [{ testId: 'submit', tag: 'button', visible: true }];
+    const previous = createObservation([]);
+    const current = createObservation([createNode('e1', 'button')], {
+      state,
+      testIds,
+    });
+
+    const result = diffObservation(current, previous);
+
+    expect(result.state).toBe(state);
+    expect(result.testIds).toBe(testIds);
+  });
+
+  it('preserves the current priorKnowledge', () => {
+    const priorKnowledge = {
+      schemaVersion: 1,
+      notes: ['cached'],
+    } as unknown as StepRecordObservation['priorKnowledge'];
+    const previous = createObservation([]);
+    const current = createObservation([createNode('e1', 'button')], {
+      priorKnowledge,
+    });
+
+    const result = diffObservation(current, previous);
+
+    expect(result.priorKnowledge).toBe(priorKnowledge);
+  });
+});
diff --git a/src/server/observation-compaction.ts b/src/server/observation-compaction.ts
new file mode 100644
index 0000000..e525c8d
--- /dev/null
+++ b/src/server/observation-compaction.ts
@@ -0,0 +1,261 @@
+import type { A11yNodeTrimmed } from '../tools/types/discovery.js';
+import type { StepRecordObservation } from '../tools/types/step-record.js';
+import { OPTION_COLLAPSE_MIN_COUNT } from '../tools/utils/constants.js';
+
+const OPTION_RANGE_PATTERN =
+  /^(?<prefix>[^\d]+)(?<start>\d+)\u2013\k<prefix>(?<end>\d+)$/u;
+
+type RefRange = {
+  firstRef: string;
+  lastRef: string;
+  count: number;
+};
+
+/**
+ * Parses a ref string into its first/last ref and total node count.
+ * Handles range refs like "e2–e6" from collapseIdenticalRuns, returning
+ * the spanning range and the count of individual nodes it represents.
+ *
+ * @param ref - A node ref string, either a simple ref (e.g. "e3") or a range (e.g. "e2–e6").
+ * @returns The first ref, last ref, and total count of nodes the ref represents.
+ */
+function parseRefRange(ref: string): RefRange {
+  const match = OPTION_RANGE_PATTERN.exec(ref);
+  if (!match?.groups) {
+    return { firstRef: ref, lastRef: ref, count: 1 };
+  }
+
+  const { prefix, start, end } = match.groups;
+  const startIndex = Number(start);
+  const endIndex = Number(end);
+
+  if (!Number.isFinite(startIndex) || !Number.isFinite(endIndex)) {
+    return { firstRef: ref, lastRef: ref, count: 1 };
+  }
+
+  return {
+    firstRef: `${prefix}${start}`,
+    lastRef: `${prefix}${end}`,
+    count: Math.abs(endIndex - startIndex) + 1,
+  };
+}
+
+/**
+ * Builds a summary node representing a collapsed group of option nodes.
+ *
+ * @param nodes - Array of option nodes to summarize.
+ * @returns A single summary node representing the collapsed options.
+ */
+function buildOptionSummary(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed {
+  const firstRange = parseRefRange(nodes[0].ref);
+  const lastRange = parseRefRange(nodes[nodes.length - 1].ref);
+  const optionCount = nodes.reduce(
+    (count, node) => count + parseRefRange(node.ref).count,
+    0,
+  );
+  const refRange = `${firstRange.firstRef}\u2013${lastRange.lastRef}`;
+
+  return {
+    ref: refRange,
+    role: 'option',
+    name: `${optionCount} options (refs ${refRange})`,
+    path: nodes[0].path,
+  };
+}
+
+export const observationCompactionDeps = {
+  collapseOptionSubtrees(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed[] {
+    const collapsed: A11yNodeTrimmed[] = [];
+    let cursor = 0;
+
+    while (cursor < nodes.length) {
+      const current = nodes[cursor];
+
+      if (current.role !== 'combobox' && current.role !== 'listbox') {
+        collapsed.push(current);
+        cursor += 1;
+        continue;
+      }
+
+      collapsed.push(current);
+      cursor += 1;
+
+      const optionNodes: A11yNodeTrimmed[] = [];
+      while (cursor < nodes.length && nodes[cursor].role === 'option') {
+        optionNodes.push(nodes[cursor]);
+        cursor += 1;
+      }
+
+      if (optionNodes.length === 0) {
+        continue;
+      }
+
+      const optionCount = optionNodes.reduce(
+        (count, node) => count + parseRefRange(node.ref).count,
+        0,
+      );
+
+      if (optionCount >= OPTION_COLLAPSE_MIN_COUNT) {
+        collapsed.push(buildOptionSummary(optionNodes));
+        continue;
+      }
+
+      collapsed.push(...optionNodes);
+    }
+
+    return collapsed;
+  },
+};
+
+/**
+ * Collapses consecutive option nodes immediately beneath combobox/listbox nodes.
+ *
+ * @param nodes - Flat accessibility nodes to compact.
+ * @returns A new node array with large option runs summarized.
+ */
+export function collapseOptionSubtrees(
+  nodes: A11yNodeTrimmed[],
+): A11yNodeTrimmed[] {
+  return observationCompactionDeps.collapseOptionSubtrees(nodes);
+}
+
+/**
+ * Creates a compacted copy of an observation while preserving non-a11y fields.
+ *
+ * @param observation - Observation to compact.
+ * @param previousObservation - Optional previous observation to compute diff against.
+ * @returns A new compacted observation, or the original observation on failure.
+ */
+export function compactObservation(
+  observation: StepRecordObservation,
+  previousObservation?: StepRecordObservation | null,
+): StepRecordObservation {
+  try {
+    const optionFiltered: StepRecordObservation = {
+      ...observation,
+      a11y: {
+        ...observation.a11y,
+        nodes: observationCompactionDeps.collapseOptionSubtrees(
+          observation.a11y.nodes,
+        ),
+      },
+    };
+
+    if (!previousObservation) {
+      return optionFiltered;
+    }
+
+    const previousFiltered: StepRecordObservation = {
+      ...previousObservation,
+      a11y: {
+        ...previousObservation.a11y,
+        nodes: observationCompactionDeps.collapseOptionSubtrees(
+          previousObservation.a11y.nodes,
+        ),
+      },
+    };
+
+    const diffResult = diffObservation(optionFiltered, previousFiltered);
+
+    if (diffResult.a11y.nodes.length >= optionFiltered.a11y.nodes.length) {
+      return optionFiltered;
+    }
+
+    return diffResult;
+  } catch {
+    return observation;
+  }
+}
+
+/**
+ * Checks if two string arrays are equal.
+ *
+ * @param left - First array to compare.
+ * @param right - Second array to compare.
+ * @returns True if arrays have equal length and identical elements.
+ */
+function arraysEqual(left: string[], right: string[]): boolean {
+  return (
+    left.length === right.length && left.every((val, idx) => val === right[idx])
+  );
+}
+
+/**
+ * Checks if two accessibility nodes have changed.
+ *
+ * @param a - First node to compare.
+ * @param b - Second node to compare.
+ * @returns True if any property differs between the nodes.
+ */
+export function nodeChanged(a: A11yNodeTrimmed, b: A11yNodeTrimmed): boolean {
+  return (
+    a.name !== b.name ||
+    a.role !== b.role ||
+    a.disabled !== b.disabled ||
+    a.checked !== b.checked ||
+    a.expanded !== b.expanded ||
+    a.testId !== b.testId ||
+    a.textContent !== b.textContent ||
+    !arraysEqual(a.path, b.path)
+  );
+}
+
+/**
+ * Computes the diff between two observations, returning only changed or new nodes.
+ *
+ * @param current - The current observation to compare.
+ * @param previous - The previous observation to compare against.
+ * @returns A new observation containing only changed/new nodes with diff metadata.
+ */
+export function diffObservation(
+  current: StepRecordObservation,
+  previous: StepRecordObservation,
+): StepRecordObservation {
+  const prevMap = new Map(
+    previous.a11y.nodes.map((node) => [node.ref, node] as const),
+  );
+  const currMap = new Map(
+    current.a11y.nodes.map((node) => [node.ref, node] as const),
+  );
+  const changedOrNewNodes: A11yNodeTrimmed[] = [];
+  const addedRefs: string[] = [];
+  const removedRefs: string[] = [];
+  let unchangedCount = 0;
+
+  for (const [ref, currNode] of currMap) {
+    const prevNode = prevMap.get(ref);
+
+    if (!prevNode) {
+      addedRefs.push(ref);
+      changedOrNewNodes.push(currNode);
+      continue;
+    }
+
+    if (nodeChanged(currNode, prevNode)) {
+      changedOrNewNodes.push(currNode);
+      continue;
+    }
+
+    unchangedCount += 1;
+  }
+
+  for (const ref of prevMap.keys()) {
+    if (!currMap.has(ref)) {
+      removedRefs.push(ref);
+    }
+  }
+
+  return {
+    state: current.state,
+    testIds: current.testIds,
+    a11y: {
+      nodes: changedOrNewNodes,
+      diff: {
+        added: addedRefs,
+        removed: removedRefs,
+        unchanged: unchangedCount,
+      },
+    },
+    priorKnowledge: current.priorKnowledge,
+  };
+}
diff --git a/src/server/port-allocator.test.ts b/src/server/port-allocator.test.ts
new file mode 100644
index 0000000..c2f89c5
--- /dev/null
+++ b/src/server/port-allocator.test.ts
@@ -0,0 +1,48 @@
+import { describe, it, expect } from 'vitest';
+
+import { allocatePort } from './port-allocator.js';
+
+describe('allocatePort', () => {
+  it('returns a valid port number', async () => {
+    const { port, server } = await allocatePort();
+    try {
+      expect(port).toBeGreaterThan(0);
+      expect(port).toBeLessThan(65536);
+    } finally {
+      server.close();
+    }
+  });
+
+  it('returns different ports on concurrent calls', async () => {
+    const [a, b] = await Promise.all([allocatePort(), allocatePort()]);
+    try {
+      expect(a.port).not.toBe(b.port);
+    } finally {
+      a.server.close();
+      b.server.close();
+    }
+  });
+
+  it('returns a server that is already listening', async () => {
+    const { server } = await allocatePort();
+    try {
+      expect(server.listening).toBe(true);
+    } finally {
+      server.close();
+    }
+  });
+
+  it('binds to 127.0.0.1', async () => {
+    const { server } = await allocatePort();
+    try {
+      const address = server.address();
+      expect(address).not.toBeNull();
+      expect(typeof address).toBe('object');
+      if (typeof address === 'object' && address !== null) {
+        expect(address.address).toBe('127.0.0.1');
+      }
+    } finally {
+      server.close();
+    }
+  });
+});
diff --git a/src/server/port-allocator.ts b/src/server/port-allocator.ts
new file mode 100644
index 0000000..dbc7982
--- /dev/null
+++ b/src/server/port-allocator.ts
@@ -0,0 +1,27 @@
+import * as net from 'node:net';
+
+/**
+ * Allocates an available port by binding to port 0.
+ * Returns both the port number AND the bound server to avoid port-grab race conditions.
+ * The caller is responsible for passing the server to Express or closing it.
+ *
+ * @returns The allocated port and bound server.
+ */
+export async function allocatePort(): Promise<{
+  port: number;
+  server: net.Server;
+}> {
+  return new Promise((resolve, reject) => {
+    const server = net.createServer();
+    server.listen(0, '127.0.0.1', () => {
+      const address = server.address();
+      if (!address || typeof address === 'string') {
+        server.close();
+        reject(new Error('Failed to get server address'));
+        return;
+      }
+      resolve({ port: address.port, server });
+    });
+    server.on('error', reject);
+  });
+}
diff --git a/src/server/request-queue.test.ts b/src/server/request-queue.test.ts
new file mode 100644
index 0000000..7729a3f
--- /dev/null
+++ b/src/server/request-queue.test.ts
@@ -0,0 +1,92 @@
+import { describe, it, expect } from 'vitest';
+
+import { RequestQueue } from './request-queue.js';
+
+async function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+describe('RequestQueue', () => {
+  it('executes enqueued functions sequentially', async () => {
+    const queue = new RequestQueue();
+    const results: number[] = [];
+
+    await Promise.all([
+      queue.enqueue(async () => {
+        await sleep(30);
+        results.push(1);
+      }),
+      queue.enqueue(async () => {
+        results.push(2);
+      }),
+      queue.enqueue(async () => {
+        results.push(3);
+      }),
+    ]);
+
+    expect(results).toStrictEqual([1, 2, 3]);
+  });
+
+  it('returns the value produced by the enqueued function', async () => {
+    const queue = new RequestQueue();
+    const result = await queue.enqueue(async () => 42);
+    expect(result).toBe(42);
+  });
+
+  it('returns values from concurrent enqueues in order', async () => {
+    const queue = new RequestQueue();
+    const [a, b, c] = await Promise.all([
+      queue.enqueue(async () => 'first'),
+      queue.enqueue(async () => 'second'),
+      queue.enqueue(async () => 'third'),
+    ]);
+
+    expect(a).toBe('first');
+    expect(b).toBe('second');
+    expect(c).toBe('third');
+  });
+
+  it('rejects when the function exceeds the timeout', async () => {
+    const queue = new RequestQueue(50);
+
+    await expect(
+      queue.enqueue(
+        async () => new Promise((resolve) => setTimeout(resolve, 500)),
+      ),
+    ).rejects.toThrowError('timed out');
+  });
+
+  it('remains functional after a timeout rejection', async () => {
+    const queue = new RequestQueue(50);
+
+    await queue
+      .enqueue(async () => new Promise((resolve) => setTimeout(resolve, 500)))
+      .catch(() => {});
+
+    const result = await queue.enqueue(async () => 'recovered');
+    expect(result).toBe('recovered');
+  });
+
+  it('propagates errors thrown by the enqueued function', async () => {
+    const queue = new RequestQueue();
+
+    await expect(
+      queue.enqueue(async () => {
+        throw new Error('task failed');
+      }),
+    ).rejects.toThrowError('task failed');
+  });
+
+  it('continues processing after an error in a previous task', async () => {
+    const queue = new RequestQueue();
+
+    await queue
+      .enqueue(async () => {
+        throw new Error('fail');
+      })
+      .catch(() => {});
+
+    const result = await queue.enqueue(async () => 'after-error');
+    expect(result).toBe('after-error');
+  });
+});
diff --git a/src/server/request-queue.ts b/src/server/request-queue.ts
new file mode 100644
index 0000000..79f52b3
--- /dev/null
+++ b/src/server/request-queue.ts
@@ -0,0 +1,64 @@
+import { debugWarn } from '../utils';
+
+/**
+ * Async mutex for serializing concurrent tool requests.
+ * Ensures only one tool executes at a time.
+ */
+export class RequestQueue {
+  #queue: Promise<void> = Promise.resolve();
+
+  readonly #timeoutMs: number;
+
+  /**
+   * @param timeoutMs - Maximum milliseconds a queued task may run.
+   */
+  constructor(timeoutMs = 30_000) {
+    this.#timeoutMs = timeoutMs;
+  }
+
+  /**
+   * Enqueues an async task for serial execution with a timeout.
+   *
+   * @param fn - The async function to execute.
+   * @returns The resolved value of the provided function.
+   */
+  async enqueue<Result>(fn: () => Promise<Result>): Promise<Result> {
+    let release!: () => void;
+    const next = new Promise<void>((resolve) => {
+      release = resolve;
+    });
+    const prev = this.#queue;
+    this.#queue = next;
+    await prev;
+    let timer: ReturnType<typeof setTimeout> | undefined;
+    const fnPromise = fn();
+    try {
+      return await Promise.race([
+        fnPromise,
+        new Promise<never>((_resolve, reject) => {
+          timer = setTimeout(
+            () =>
+              reject(
+                new Error(
+                  `Tool execution timed out after ${this.#timeoutMs}ms`,
+                ),
+              ),
+            this.#timeoutMs,
+          );
+        }),
+      ]);
+    } finally {
+      if (timer !== undefined) {
+        clearTimeout(timer);
+      }
+      // Wait for the task to actually settle before releasing the mutex,
+      // even after a timeout rejection. This preserves the serialization
+      // guarantee — the next task cannot start while a timed-out task
+      // is still running and potentially mutating shared state.
+      await fnPromise.catch((error) => {
+        debugWarn('request-queue.enqueue', error);
+      });
+      release();
+    }
+  }
+}
diff --git a/src/mcp-server/session-manager.ts b/src/server/session-manager.ts
similarity index 85%
rename from src/mcp-server/session-manager.ts
rename to src/server/session-manager.ts
index 5de75bc..2ac10fe 100644
--- a/src/mcp-server/session-manager.ts
+++ b/src/server/session-manager.ts
@@ -1,5 +1,5 @@
 /**
- * Generic Session Manager Interface for Browser Extension MCP Servers.
+ * Generic Session Manager Interface for Browser Extension HTTP Servers.
  *
  * This module defines the interface that concrete session managers must implement.
  * The interface abstracts browser session management, page tracking, and extension state.
@@ -10,8 +10,10 @@
 
 import type { Page, BrowserContext } from '@playwright/test';
 
-import type { TabRole, SessionState, SessionMetadata } from './types';
-import type { EnvironmentMode } from '../capabilities/context.js';
+import type {
+  EnvironmentMode,
+  WorkflowContext,
+} from '../capabilities/context.js';
 import type {
   ExtensionState,
   BuildCapability,
@@ -21,6 +23,7 @@ import type {
   StateSnapshotCapability,
   ScreenshotResult,
 } from '../capabilities/types.js';
+import type { TabRole, SessionState, SessionMetadata } from '../tools/types';
 
 /**
  * Represents a tracked browser page with its role and URL.
@@ -265,6 +268,18 @@ export type ISessionManager = {
   // Environment Configuration
   // -----------------------------------------------------------------------------
 
+  /**
+   * Set the workflow context created by the server's context factory.
+   *
+   * Called by `createServer` during startup so that the session manager has
+   * access to the same capability objects that tools receive. Implementations
+   * should store the context and expose its capabilities through the
+   * individual capability getters.
+   *
+   * @param context - The workflow context produced by the configured `contextFactory`.
+   */
+  setWorkflowContext(context: WorkflowContext): void;
+
   /**
    * Get the current environment mode.
    *
@@ -295,47 +310,3 @@ export type ISessionManager = {
     canSwitchContext: boolean;
   };
 };
-
-/**
- * Session manager instance holder.
- *
- * In the core package, this is undefined by default.
- * Extension implementations should call setSessionManager() to inject
- * their concrete implementation.
- */
-let _sessionManager: ISessionManager | undefined;
-
-/**
- * Set the session manager instance.
- *
- * This should be called by extension-specific code during server initialization.
- *
- * @param manager The session manager implementation to inject
- */
-export function setSessionManager(manager: ISessionManager): void {
-  _sessionManager = manager;
-}
-
-/**
- * Get the session manager instance.
- *
- * @throws Error if no session manager has been set
- * @returns The session manager instance
- */
-export function getSessionManager(): ISessionManager {
-  if (!_sessionManager) {
-    throw new Error(
-      'Session manager not initialized. Call setSessionManager() first.',
-    );
-  }
-  return _sessionManager;
-}
-
-/**
- * Check if a session manager has been set.
- *
- * @returns True if a session manager is set, false otherwise
- */
-export function hasSessionManager(): boolean {
-  return _sessionManager !== undefined;
-}
diff --git a/src/tools/batch.test.ts b/src/tools/batch.test.ts
new file mode 100644
index 0000000..2cfffab
--- /dev/null
+++ b/src/tools/batch.test.ts
@@ -0,0 +1,627 @@
+import { describe, it, expect, vi } from 'vitest';
+
+import { runStepsTool } from './batch.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext, ToolFunction } from '../types/http.js';
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    toolRegistry?: Map<string, ToolFunction<any, any>>;
+  } = {},
+): ToolContext {
+  const { hasActive = true, toolRegistry } = options;
+
+  return {
+    sessionManager: createMockSessionManager({ hasActive }),
+    page: {} as ToolContext['page'],
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+    toolRegistry,
+  } as unknown as ToolContext;
+}
+
+describe('runStepsTool', () => {
+  it('returns error when no active session', async () => {
+    const context = createMockContext({ hasActive: false });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'click', args: { testId: 'button' } }] },
+      context,
+    );
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+    }
+  });
+
+  it('returns internal error when tool registry is missing', async () => {
+    const context = createMockContext();
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'click', args: { testId: 'button' } }] },
+      context,
+    );
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.code).toBe(ErrorCodes.MM_INTERNAL_ERROR);
+      expect(result.error.message).toContain('Tool registry not available');
+    }
+  });
+
+  it('executes a single step successfully', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'clicked',
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['click', clickHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'click', args: { testId: 'button' } }] },
+      context,
+    );
+
+    expect(clickHandler).toHaveBeenCalledWith(
+      { testId: 'button', timeoutMs: 15000 },
+      context,
+    );
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(1);
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: true,
+        result: 'clicked',
+      });
+      expect(result.result.steps[0].meta.durationMs).toBeGreaterThanOrEqual(0);
+      expect(result.result.steps[0].meta.timestamp).toStrictEqual(
+        expect.any(String),
+      );
+      expect(result.result.summary).toMatchObject({
+        ok: true,
+        total: 1,
+        succeeded: 1,
+        failed: 0,
+      });
+    }
+  });
+
+  it('returns unknown tool error in the step result', async () => {
+    const context = createMockContext({ toolRegistry: new Map() });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'unknown_tool', args: {} }] },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'unknown_tool',
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_UNKNOWN_TOOL,
+          message: 'Unknown tool: unknown_tool',
+        },
+      });
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('records a failed step when a handler returns ok false', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: false,
+      error: { code: 'MM_CLICK_FAILED', message: 'Click failed' },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['click', clickHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'click', args: { testId: 'btn' } }] },
+      context,
+    );
+
+    expect(clickHandler).toHaveBeenCalledWith(
+      { testId: 'btn', timeoutMs: 15000 },
+      context,
+    );
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: false,
+        error: { code: 'MM_CLICK_FAILED', message: 'Click failed' },
+      });
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('stops on error when stopOnError is true', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: false,
+      error: { code: 'ERR', message: 'fail' },
+    });
+    const typeHandler = vi
+      .fn()
+      .mockResolvedValue({ ok: true, result: 'typed' });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: { testId: 'btn' } },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+        stopOnError: true,
+      },
+      context,
+    );
+
+    expect(clickHandler).toHaveBeenCalledTimes(1);
+    expect(typeHandler).not.toHaveBeenCalled();
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(1);
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('collects multiple step results with mixed outcomes', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'clicked',
+    });
+    const typeHandler = vi.fn().mockResolvedValue({
+      ok: false,
+      error: { code: 'MM_TYPE_FAILED', message: 'Type failed' },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: { testId: 'button' } },
+          { tool: 'unknown_tool', args: {} },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+      },
+      context,
+    );
+
+    expect(clickHandler).toHaveBeenCalledTimes(1);
+    expect(typeHandler).toHaveBeenCalledTimes(1);
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(3);
+      expect(result.result.steps.map((step) => step.ok)).toStrictEqual([
+        true,
+        false,
+        false,
+      ]);
+      expect(result.result.steps[1].error?.code).toBe(
+        ErrorCodes.MM_UNKNOWN_TOOL,
+      );
+      expect(result.result.steps[2].error?.code).toBe('MM_TYPE_FAILED');
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 3,
+        succeeded: 1,
+        failed: 2,
+      });
+      expect(result.result.summary.durationMs).toBeGreaterThanOrEqual(0);
+    }
+  });
+
+  it('records internal error when a handler throws', async () => {
+    const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout'));
+    const context = createMockContext({
+      toolRegistry: new Map([['click', clickHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'click', args: { testId: 'btn' } }] },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_INTERNAL_ERROR,
+        },
+      });
+      expect(result.result.steps[0].error?.message).toContain('Timeout');
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('stops on error for unknown tool when stopOnError is true', async () => {
+    const typeHandler = vi
+      .fn()
+      .mockResolvedValue({ ok: true, result: 'typed' });
+    const context = createMockContext({
+      toolRegistry: new Map([['type', typeHandler]]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'unknown_tool', args: {} },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+        stopOnError: true,
+      },
+      context,
+    );
+
+    expect(typeHandler).not.toHaveBeenCalled();
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(1);
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'unknown_tool',
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_UNKNOWN_TOOL,
+        },
+      });
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('returns validation error for invalid tool args', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'clicked',
+    });
+    const typeHandler = vi
+      .fn()
+      .mockResolvedValue({ ok: true, result: 'typed' });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: {} },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+      },
+      context,
+    );
+
+    expect(clickHandler).not.toHaveBeenCalled();
+    expect(typeHandler).toHaveBeenCalledTimes(1);
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(2);
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: false,
+        error: {
+          code: 'VALIDATION_ERROR',
+        },
+      });
+      expect(result.result.steps[0].error?.message).toContain('Exactly one of');
+      expect(result.result.steps[1]).toMatchObject({
+        tool: 'type',
+        ok: true,
+      });
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 2,
+        succeeded: 1,
+        failed: 1,
+      });
+    }
+  });
+
+  it('stops on validation error when stopOnError is true', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'clicked',
+    });
+    const typeHandler = vi
+      .fn()
+      .mockResolvedValue({ ok: true, result: 'typed' });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: {} },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+        stopOnError: true,
+      },
+      context,
+    );
+
+    expect(clickHandler).not.toHaveBeenCalled();
+    expect(typeHandler).not.toHaveBeenCalled();
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(1);
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: false,
+        error: {
+          code: 'VALIDATION_ERROR',
+        },
+      });
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('stops on handler throw when stopOnError is true', async () => {
+    const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout'));
+    const typeHandler = vi
+      .fn()
+      .mockResolvedValue({ ok: true, result: 'typed' });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: { testId: 'btn' } },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+        ],
+        stopOnError: true,
+      },
+      context,
+    );
+
+    expect(clickHandler).toHaveBeenCalledTimes(1);
+    expect(typeHandler).not.toHaveBeenCalled();
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(1);
+      expect(result.result.steps[0]).toMatchObject({
+        tool: 'click',
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_INTERNAL_ERROR,
+        },
+      });
+      expect(result.result.steps[0].error?.message).toContain('Timeout');
+      expect(result.result.summary).toMatchObject({
+        ok: false,
+        total: 1,
+        succeeded: 0,
+        failed: 1,
+      });
+    }
+  });
+
+  it('excludes observations when includeObservations is "none"', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: { clicked: true },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['click', clickHandler]]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [{ tool: 'click', args: { testId: 'btn' } }],
+        includeObservations: 'none',
+      },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0]).not.toHaveProperty('observation');
+    }
+  });
+
+  it('marks remaining steps as skipped when batchTimeoutMs is exceeded', async () => {
+    const clickHandler = vi.fn().mockImplementation(
+      async () =>
+        new Promise((resolve) => {
+          setTimeout(() => resolve({ ok: true, result: 'clicked' }), 50);
+        }),
+    );
+    const typeHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'typed',
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([
+        ['click', clickHandler],
+        ['type', typeHandler],
+      ]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          { tool: 'click', args: { testId: 'btn' } },
+          { tool: 'type', args: { testId: 'input', text: 'hello' } },
+          { tool: 'click', args: { testId: 'submit' } },
+        ],
+        batchTimeoutMs: 1,
+      },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps).toHaveLength(3);
+      // First step may succeed or be skipped depending on timing
+      // Steps after deadline should be skipped
+      const skippedSteps = result.result.steps.filter(
+        (step) => step.meta.skipped === true,
+      );
+      expect(skippedSteps.length).toBeGreaterThan(0);
+      skippedSteps.forEach((step) => {
+        expect(step.ok).toBe(false);
+        expect(step.error?.code).toBe('MM_BATCH_TIMEOUT');
+      });
+      expect(result.result.summary.skipped).toBeGreaterThan(0);
+    }
+  });
+
+  it('resolves navigate_home alias to navigate with screen: home', async () => {
+    const navigateHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: { navigated: true },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['navigate', navigateHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'navigate_home' }] },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0].ok).toBe(true);
+    }
+    expect(navigateHandler).toHaveBeenCalledWith({ screen: 'home' }, context);
+  });
+
+  it('resolves navigate-home (hyphenated) alias to navigate with screen: home', async () => {
+    const navigateHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: { navigated: true },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['navigate', navigateHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'navigate-home' }] },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0].ok).toBe(true);
+    }
+    expect(navigateHandler).toHaveBeenCalledWith({ screen: 'home' }, context);
+  });
+
+  it('resolves navigate_settings alias to navigate with screen: settings', async () => {
+    const navigateHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: { navigated: true },
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['navigate', navigateHandler]]),
+    });
+
+    const result = await runStepsTool(
+      { steps: [{ tool: 'navigate_settings' }] },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.steps[0].ok).toBe(true);
+    }
+    expect(navigateHandler).toHaveBeenCalledWith(
+      { screen: 'settings' },
+      context,
+    );
+  });
+
+  it('normalises within.ref to within.a11yRef in step args', async () => {
+    const clickHandler = vi.fn().mockResolvedValue({
+      ok: true,
+      result: 'clicked',
+    });
+    const context = createMockContext({
+      toolRegistry: new Map([['click', clickHandler]]),
+    });
+
+    const result = await runStepsTool(
+      {
+        steps: [
+          {
+            tool: 'click',
+            args: { testId: 'btn', within: { ref: 'e1' } },
+          },
+        ],
+      },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(clickHandler).toHaveBeenCalledWith(
+        expect.objectContaining({
+          testId: 'btn',
+          within: { a11yRef: 'e1' },
+        }),
+        context,
+      );
+    }
+  });
+});
diff --git a/src/tools/batch.ts b/src/tools/batch.ts
new file mode 100644
index 0000000..dc88651
--- /dev/null
+++ b/src/tools/batch.ts
@@ -0,0 +1,251 @@
+import type { RunStepsInput, RunStepsResult, StepResult } from './types';
+import { ErrorCodes } from './types';
+import { createToolError, createToolSuccess } from './utils.js';
+import type { ToolContext, ToolFunction, ToolResponse } from '../types/http.js';
+import { extractErrorMessage } from '../utils';
+import type { ToolName } from '../validation/schemas.js';
+import { toolSchemas } from '../validation/schemas.js';
+
+/** Tools whose args include a target selection (a11yRef/testId/selector). */
+const TARGET_TOOLS = new Set(['click', 'type', 'wait_for']);
+
+/**
+ * Maps CLI-style compound tool names to their registry name + injected args.
+ * The CLI handles these conversions for standalone commands, but agents using
+ * run-steps bypass CLI parsing and may send compound names directly.
+ */
+const TOOL_ALIASES: Record<
+  string,
+  { tool: string; inject: Record<string, unknown> }
+> = {
+  navigate_home: { tool: 'navigate', inject: { screen: 'home' } },
+  'navigate-home': { tool: 'navigate', inject: { screen: 'home' } },
+  navigate_settings: { tool: 'navigate', inject: { screen: 'settings' } },
+  'navigate-settings': { tool: 'navigate', inject: { screen: 'settings' } },
+  navigate_notification: {
+    tool: 'navigate',
+    inject: { screen: 'notification' },
+  },
+  'navigate-notification': {
+    tool: 'navigate',
+    inject: { screen: 'notification' },
+  },
+};
+
+type NormalisedStep = {
+  tool: string;
+  args: Record<string, unknown>;
+};
+
+/**
+ * Resolves tool aliases and normalises shorthand arg keys.
+ *
+ * @param tool - Raw tool name (may be an alias like `navigate_home`).
+ * @param args - Raw step arguments.
+ * @returns Resolved tool name and normalised arguments.
+ */
+function normaliseStep(
+  tool: string,
+  args: Record<string, unknown>,
+): NormalisedStep {
+  const alias = TOOL_ALIASES[tool];
+  const resolvedTool = alias ? alias.tool : tool;
+  let normalised = alias ? { ...alias.inject, ...args } : args;
+
+  if (TARGET_TOOLS.has(resolvedTool)) {
+    if ('ref' in normalised && !('a11yRef' in normalised)) {
+      const { ref, ...rest } = normalised;
+      normalised = { a11yRef: ref, ...rest };
+    }
+
+    if (typeof normalised.within === 'object' && normalised.within !== null) {
+      const withinObj = normalised.within as Record<string, unknown>;
+      if ('ref' in withinObj && !('a11yRef' in withinObj)) {
+        const { ref: withinRef, ...withinRest } = withinObj;
+        normalised = {
+          ...normalised,
+          within: { a11yRef: withinRef, ...withinRest },
+        };
+      }
+    }
+  }
+
+  return { tool: resolvedTool, args: normalised };
+}
+
+/**
+ * Executes a batch of tool steps sequentially.
+ *
+ * @param input - The batch step definitions and options.
+ * @param context - The tool execution context.
+ * @returns The aggregated step results and summary.
+ */
+export async function runStepsTool(
+  input: RunStepsInput,
+  context: ToolContext,
+): Promise<ToolResponse<RunStepsResult>> {
+  if (!context.sessionManager.hasActiveSession()) {
+    return createToolError(
+      ErrorCodes.MM_NO_ACTIVE_SESSION,
+      'No active session. Call launch first.',
+    );
+  }
+
+  if (!context.toolRegistry) {
+    return createToolError(
+      ErrorCodes.MM_INTERNAL_ERROR,
+      'Tool registry not available.',
+    );
+  }
+
+  const { steps: stepInputs, stopOnError = false, batchTimeoutMs } = input;
+  const stepResults: StepResult[] = [];
+  let succeeded = 0;
+  let failed = 0;
+  let skipped = 0;
+  const batchStartTime = Date.now();
+  const batchDeadline = batchTimeoutMs
+    ? batchStartTime + batchTimeoutMs
+    : undefined;
+
+  for (const stepInput of stepInputs) {
+    if (batchDeadline && Date.now() > batchDeadline) {
+      const remainingIndex = stepInputs.indexOf(stepInput);
+      for (const remaining of stepInputs.slice(remainingIndex)) {
+        stepResults.push({
+          tool: remaining.tool,
+          ok: false,
+          error: {
+            code: 'MM_BATCH_TIMEOUT',
+            message: `Batch deadline exceeded after ${batchTimeoutMs}ms`,
+          },
+          meta: {
+            durationMs: 0,
+            timestamp: new Date().toISOString(),
+            skipped: true,
+          },
+        });
+        skipped += 1;
+        failed += 1;
+      }
+      break;
+    }
+    const stepStartTime = Date.now();
+    const { tool: rawTool, args: rawArgs = {} } = stepInput;
+    const { tool, args } = normaliseStep(rawTool, rawArgs);
+    const handler = context.toolRegistry.get(tool) as
+      | ToolFunction<Record<string, unknown>, unknown>
+      | undefined;
+
+    if (!handler) {
+      stepResults.push({
+        tool,
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_UNKNOWN_TOOL,
+          message: `Unknown tool: ${tool}`,
+        },
+        meta: {
+          durationMs: Date.now() - stepStartTime,
+          timestamp: new Date().toISOString(),
+        },
+      });
+      failed += 1;
+
+      if (stopOnError) {
+        break;
+      }
+
+      continue;
+    }
+
+    const schema =
+      tool in toolSchemas ? toolSchemas[tool as ToolName] : undefined;
+    let validatedArgs: Record<string, unknown> = args;
+    if (schema) {
+      const parsed = schema.safeParse(args);
+      if (!parsed.success) {
+        stepResults.push({
+          tool,
+          ok: false,
+          error: {
+            code: 'VALIDATION_ERROR',
+            message: parsed.error.issues
+              .map((i) =>
+                i.path.length > 0
+                  ? `${i.path.join('.')}: ${i.message}`
+                  : i.message,
+              )
+              .join('; '),
+          },
+          meta: {
+            durationMs: Date.now() - stepStartTime,
+            timestamp: new Date().toISOString(),
+          },
+        });
+        failed += 1;
+
+        if (stopOnError) {
+          break;
+        }
+
+        continue;
+      }
+      validatedArgs = parsed.data as Record<string, unknown>;
+    }
+
+    try {
+      const response = await handler(validatedArgs, context);
+
+      stepResults.push({
+        tool,
+        ok: response.ok,
+        result: response.ok ? response.result : undefined,
+        error: response.ok ? undefined : response.error,
+        meta: {
+          durationMs: Date.now() - stepStartTime,
+          timestamp: new Date().toISOString(),
+        },
+      });
+
+      if (response.ok) {
+        succeeded += 1;
+      } else {
+        failed += 1;
+        if (stopOnError) {
+          break;
+        }
+      }
+    } catch (error) {
+      stepResults.push({
+        tool,
+        ok: false,
+        error: {
+          code: ErrorCodes.MM_INTERNAL_ERROR,
+          message: `Unexpected error: ${extractErrorMessage(error)}`,
+        },
+        meta: {
+          durationMs: Date.now() - stepStartTime,
+          timestamp: new Date().toISOString(),
+        },
+      });
+      failed += 1;
+
+      if (stopOnError) {
+        break;
+      }
+    }
+  }
+
+  return createToolSuccess({
+    steps: stepResults,
+    summary: {
+      ok: failed === 0,
+      total: stepResults.length,
+      succeeded,
+      failed,
+      skipped,
+      durationMs: Date.now() - batchStartTime,
+    },
+  });
+}
diff --git a/src/tools/build.test.ts b/src/tools/build.test.ts
new file mode 100644
index 0000000..4429237
--- /dev/null
+++ b/src/tools/build.test.ts
@@ -0,0 +1,192 @@
+/**
+ * Unit tests for build tool handler.
+ *
+ * Tests the build handler with BuildCapability and legacy build paths,
+ * including success/failure scenarios and build options handling.
+ */
+
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { buildTool } from './build.js';
+import type { BuildCapability } from '../capabilities/types.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(
+  options: { buildCapability?: BuildCapability } = {},
+) {
+  const sessionManager = createMockSessionManager({
+    hasActive: true,
+    sessionId: 'test-session-123',
+    sessionMetadata: {
+      schemaVersion: 1,
+      sessionId: 'test-session-123',
+      createdAt: new Date().toISOString(),
+      flowTags: [],
+      tags: [],
+      launch: { stateMode: 'default' },
+    },
+  });
+
+  sessionManager.getBuildCapability.mockReturnValue(options.buildCapability);
+
+  return {
+    sessionManager,
+    page: {},
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('buildTool', () => {
+  let mockBuildCapability: BuildCapability;
+
+  beforeEach(() => {
+    mockBuildCapability = {
+      build: vi.fn(),
+      getExtensionPath: vi.fn(),
+      isBuilt: vi.fn(),
+    };
+  });
+
+  describe('with capability', () => {
+    it('builds extension successfully with default buildType', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
+        success: true,
+        extensionPath: '/path/to/dist/chrome',
+        durationMs: 5000,
+      });
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.buildType).toBe('build:test');
+        expect(result.result.extensionPathResolved).toBe(
+          '/path/to/dist/chrome',
+        );
+      }
+      expect(mockBuildCapability.build).toHaveBeenCalledWith({
+        buildType: undefined,
+        force: undefined,
+      });
+    });
+
+    it('builds extension with explicit buildType', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
+        success: true,
+        extensionPath: '/path/to/dist/chrome',
+        durationMs: 5000,
+      });
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({ buildType: 'build:test' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.buildType).toBe('build:test');
+        expect(result.result.extensionPathResolved).toBe(
+          '/path/to/dist/chrome',
+        );
+      }
+      expect(mockBuildCapability.build).toHaveBeenCalledWith({
+        buildType: 'build:test',
+        force: undefined,
+      });
+    });
+
+    it('builds extension with force flag', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
+        success: true,
+        extensionPath: '/path/to/dist/chrome',
+        durationMs: 5000,
+      });
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({ force: true }, context);
+
+      expect(result.ok).toBe(true);
+      expect(mockBuildCapability.build).toHaveBeenCalledWith({
+        buildType: undefined,
+        force: true,
+      });
+    });
+
+    it('returns error when build fails with error message', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
+        success: false,
+        extensionPath: '',
+        durationMs: 1000,
+        error: 'Compilation error',
+      });
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
+        expect(result.error.message).toContain('Compilation error');
+      }
+    });
+
+    it('returns error when build fails without error message', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({
+        success: false,
+        extensionPath: '',
+        durationMs: 1000,
+      });
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
+        expect(result.error.message).toContain('Unknown error');
+      }
+    });
+
+    it('returns error when build throws exception', async () => {
+      vi.spyOn(mockBuildCapability, 'build').mockRejectedValue(
+        new Error('Build process crashed'),
+      );
+      const context = createMockContext({
+        buildCapability: mockBuildCapability,
+      });
+
+      const result = await buildTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED);
+        expect(result.error.message).toContain('Build process crashed');
+      }
+    });
+  });
+
+  it('returns error when build capability is unavailable', async () => {
+    const context = createMockContext();
+
+    const result = await buildTool({}, context);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
+      expect(result.error.message).toContain('BuildCapability not available');
+    }
+  });
+});
diff --git a/src/tools/build.ts b/src/tools/build.ts
new file mode 100644
index 0000000..316b756
--- /dev/null
+++ b/src/tools/build.ts
@@ -0,0 +1,52 @@
+import type { BuildInput, BuildToolResult } from './types';
+import { ErrorCodes } from './types';
+import { createToolError, createToolSuccess } from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+import { extractErrorMessage } from '../utils';
+
+/**
+ * Triggers an extension build using the configured build capability.
+ *
+ * @param input - The build configuration options.
+ * @param context - The tool execution context.
+ * @returns The build result with the resolved extension path.
+ */
+export async function buildTool(
+  input: BuildInput,
+  context: ToolContext,
+): Promise<ToolResponse<BuildToolResult>> {
+  const buildCapability =
+    context.workflowContext.build ??
+    context.sessionManager.getBuildCapability();
+
+  if (!buildCapability) {
+    return createToolError(
+      ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE,
+      'BuildCapability not available. The mm_build tool requires either: (1) running in e2e mode with the MetaMask extension wrapper, or (2) running directly in the metamask-extension repository with dependencies installed.',
+    );
+  }
+
+  try {
+    const result = await buildCapability.build({
+      buildType: input.buildType,
+      force: input.force,
+    });
+
+    if (!result.success) {
+      return createToolError(
+        ErrorCodes.MM_BUILD_FAILED,
+        `Build failed: ${result.error ?? 'Unknown error'}`,
+      );
+    }
+
+    return createToolSuccess({
+      buildType: input.buildType ?? 'build:test',
+      extensionPathResolved: result.extensionPath,
+    });
+  } catch (error) {
+    return createToolError(
+      ErrorCodes.MM_BUILD_FAILED,
+      `Build failed: ${extractErrorMessage(error)}`,
+    );
+  }
+}
diff --git a/src/tools/cleanup.test.ts b/src/tools/cleanup.test.ts
new file mode 100644
index 0000000..5348703
--- /dev/null
+++ b/src/tools/cleanup.test.ts
@@ -0,0 +1,74 @@
+/**
+ * Unit tests for cleanup tool handler.
+ *
+ * Tests session cleanup with various session states.
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+import { cleanupTool } from './cleanup.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(hasActive = false): ToolContext {
+  return {
+    sessionManager: createMockSessionManager({ hasActive }),
+    page: {},
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('cleanupTool', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('cleans up active session successfully', async () => {
+    const context = createMockContext(true);
+    vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(true);
+
+    const result = await cleanupTool({}, context);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.cleanedUp).toBe(true);
+    }
+    expect(context.sessionManager.cleanup).toHaveBeenCalled();
+  });
+
+  it('returns false when no session to clean up', async () => {
+    const context = createMockContext(false);
+    vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(false);
+
+    const result = await cleanupTool({}, context);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.cleanedUp).toBe(false);
+    }
+  });
+
+  it('cleans up multiple times without error', async () => {
+    const context = createMockContext(true);
+    vi.spyOn(context.sessionManager, 'cleanup')
+      .mockResolvedValueOnce(true)
+      .mockResolvedValueOnce(false);
+
+    const result1 = await cleanupTool({}, context);
+    const result2 = await cleanupTool({}, context);
+
+    expect(result1.ok).toBe(true);
+    if (result1.ok) {
+      expect(result1.result.cleanedUp).toBe(true);
+    }
+
+    expect(result2.ok).toBe(true);
+    if (result2.ok) {
+      expect(result2.result.cleanedUp).toBe(false);
+    }
+
+    expect(context.sessionManager.cleanup).toHaveBeenCalledTimes(2);
+  });
+});
diff --git a/src/tools/cleanup.ts b/src/tools/cleanup.ts
new file mode 100644
index 0000000..cae4b36
--- /dev/null
+++ b/src/tools/cleanup.ts
@@ -0,0 +1,19 @@
+import type { CleanupInput, CleanupResult } from './types';
+import { createToolSuccess } from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Tears down the active browser session and cleans up resources.
+ *
+ * @param _input - Unused input parameters.
+ * @param context - The tool execution context.
+ * @returns The cleanup result indicating what was cleaned up.
+ */
+export async function cleanupTool(
+  _input: CleanupInput,
+  context: ToolContext,
+): Promise<ToolResponse<CleanupResult>> {
+  const cleanedUp = await context.sessionManager.cleanup();
+
+  return createToolSuccess({ cleanedUp });
+}
diff --git a/src/tools/clipboard.test.ts b/src/tools/clipboard.test.ts
new file mode 100644
index 0000000..d067712
--- /dev/null
+++ b/src/tools/clipboard.test.ts
@@ -0,0 +1,215 @@
+/**
+ * Unit tests for clipboard tool handler.
+ *
+ * Tests CDP-based clipboard operations (read/write) with proper mocking.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import { clipboardTool } from './clipboard.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    cdpSession?: {
+      send: ReturnType<typeof vi.fn>;
+      detach: ReturnType<typeof vi.fn>;
+    };
+  } = {},
+): ToolContext {
+  const { hasActive = true, cdpSession } = options;
+
+  const mockCdpSession = cdpSession ?? {
+    send: vi.fn().mockResolvedValue(undefined),
+    detach: vi.fn().mockResolvedValue(undefined),
+  };
+
+  const mockPage = {
+    context: vi.fn().mockReturnValue({
+      newCDPSession: vi.fn().mockResolvedValue(mockCdpSession),
+    }),
+  };
+
+  return {
+    sessionManager: createMockSessionManager({ hasActive }),
+    page: mockPage,
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('clipboardTool', () => {
+  describe('write action', () => {
+    it('writes text to clipboard via CDP', async () => {
+      const cdpSession = {
+        send: vi.fn().mockResolvedValue(undefined),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool(
+        { action: 'write', text: 'test content' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.action).toBe('write');
+        expect(result.result.success).toBe(true);
+        expect(result.result.text).toBe('test content');
+      }
+      expect(cdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', {
+        expression: 'navigator.clipboard.writeText("test content")',
+        awaitPromise: true,
+        userGesture: true,
+      });
+      expect(cdpSession.detach).toHaveBeenCalled();
+    });
+
+    it('detaches CDP session even if write fails', async () => {
+      const cdpSession = {
+        send: vi.fn().mockRejectedValue(new Error('Write failed')),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool(
+        { action: 'write', text: 'test' },
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      expect(cdpSession.detach).toHaveBeenCalled();
+    });
+  });
+
+  describe('read action', () => {
+    it('reads text from clipboard via CDP', async () => {
+      const cdpSession = {
+        send: vi.fn().mockResolvedValue({
+          result: { value: 'clipboard content' },
+        }),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.action).toBe('read');
+        expect(result.result.success).toBe(true);
+        expect(result.result.text).toBe('clipboard content');
+      }
+      expect(cdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', {
+        expression: 'navigator.clipboard.readText()',
+        awaitPromise: true,
+        userGesture: true,
+      });
+    });
+
+    it('uses description when value is missing', async () => {
+      const cdpSession = {
+        send: vi.fn().mockResolvedValue({
+          result: { description: 'fallback content' },
+        }),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('fallback content');
+      }
+    });
+
+    it('returns empty string when result is missing', async () => {
+      const cdpSession = {
+        send: vi.fn().mockResolvedValue({ result: {} }),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('');
+      }
+    });
+  });
+
+  describe('error classification', () => {
+    it('classifies permission denied errors', async () => {
+      const cdpSession = {
+        send: vi.fn().mockRejectedValue(new Error('permissions denied')),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe('MM_CLIPBOARD_PERMISSION_DENIED');
+        expect(result.error.message).toContain('Clipboard permission denied');
+      }
+    });
+
+    it('classifies LavaMoat blocked errors', async () => {
+      const cdpSession = {
+        send: vi.fn().mockRejectedValue(new Error('LavaMoat policy violation')),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool(
+        { action: 'write', text: 'test' },
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe('MM_CLIPBOARD_LAVAMOAT_BLOCKED');
+        expect(result.error.message).toContain(
+          'Clipboard blocked by LavaMoat policy',
+        );
+      }
+    });
+
+    it('classifies generic clipboard errors', async () => {
+      const cdpSession = {
+        send: vi.fn().mockRejectedValue(new Error('Unknown error')),
+        detach: vi.fn().mockResolvedValue(undefined),
+      };
+      const context = createMockContext({ cdpSession });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe('MM_CLIPBOARD_FAILED');
+        expect(result.error.message).toContain('Clipboard operation failed');
+      }
+    });
+  });
+
+  describe('session validation', () => {
+    it('returns error when no active session', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await clipboardTool({ action: 'read' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+});
diff --git a/src/tools/clipboard.ts b/src/tools/clipboard.ts
new file mode 100644
index 0000000..ade71a3
--- /dev/null
+++ b/src/tools/clipboard.ts
@@ -0,0 +1,82 @@
+import type { ClipboardInput, ClipboardResult } from './types';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Reads from or writes to the system clipboard via CDP.
+ *
+ * @param input - The clipboard action and optional text payload.
+ * @param context - The tool execution context.
+ * @returns The clipboard operation result with the text content.
+ */
+export async function clipboardTool(
+  input: ClipboardInput,
+  context: ToolContext,
+): Promise<ToolResponse<ClipboardResult>> {
+  const missingSession = requireActiveSession<ClipboardResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  try {
+    const cdpSession = await context.page.context().newCDPSession(context.page);
+
+    try {
+      if (input.action === 'write') {
+        await cdpSession.send('Runtime.evaluate', {
+          expression: `navigator.clipboard.writeText(${JSON.stringify(input.text)})`,
+          awaitPromise: true,
+          userGesture: true,
+        });
+
+        return createToolSuccess({
+          action: 'write',
+          success: true,
+          text: input.text,
+        });
+      }
+
+      const result = await cdpSession.send('Runtime.evaluate', {
+        expression: 'navigator.clipboard.readText()',
+        awaitPromise: true,
+        userGesture: true,
+      });
+
+      const clipboardText =
+        result.result?.value ?? result.result?.description ?? '';
+
+      return createToolSuccess({
+        action: 'read',
+        success: true,
+        text: clipboardText as string,
+      });
+    } finally {
+      await cdpSession.detach().catch(() => undefined);
+    }
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+
+    if (message.includes('permissions') || message.includes('denied')) {
+      return createToolError(
+        'MM_CLIPBOARD_PERMISSION_DENIED',
+        `Clipboard permission denied: ${message}`,
+      );
+    }
+
+    if (message.includes('LavaMoat') || message.includes('policy')) {
+      return createToolError(
+        'MM_CLIPBOARD_LAVAMOAT_BLOCKED',
+        `Clipboard blocked by LavaMoat policy: ${message}`,
+      );
+    }
+
+    return createToolError(
+      'MM_CLIPBOARD_FAILED',
+      `Clipboard operation failed: ${message}`,
+    );
+  }
+}
diff --git a/src/tools/context.test.ts b/src/tools/context.test.ts
new file mode 100644
index 0000000..2af9de5
--- /dev/null
+++ b/src/tools/context.test.ts
@@ -0,0 +1,176 @@
+/**
+ * Unit tests for context tool handlers.
+ *
+ * Tests context switching (e2e/prod) and context info retrieval.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import { setContextTool, getContextTool } from './context.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    sessionId?: string;
+    environmentMode?: 'e2e' | 'prod';
+  } = {},
+): ToolContext {
+  return {
+    sessionManager: createMockSessionManager(options),
+    page: {} as ToolContext['page'],
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('setContextTool', () => {
+  it('switches context from e2e to prod', async () => {
+    const context = createMockContext({ environmentMode: 'e2e' });
+    vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({
+      currentContext: 'prod',
+      hasActiveSession: false,
+      sessionId: null,
+      capabilities: { available: ['build', 'fixture'] },
+      canSwitchContext: true,
+    });
+
+    const result = await setContextTool({ context: 'prod' }, context);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.previousContext).toBe('e2e');
+      expect(result.result.newContext).toBe('prod');
+      expect(result.result.availableCapabilities).toStrictEqual([
+        'build',
+        'fixture',
+      ]);
+    }
+    expect(context.sessionManager.setContext).toHaveBeenCalledWith(
+      'prod',
+      undefined,
+    );
+  });
+
+  it('forwards context options to session manager', async () => {
+    const context = createMockContext({ environmentMode: 'e2e' });
+    vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({
+      currentContext: 'e2e',
+      hasActiveSession: false,
+      sessionId: null,
+      capabilities: { available: ['build', 'fixture', 'chain'] },
+      canSwitchContext: true,
+    });
+
+    const contextOptions = {
+      mockServer: {
+        enabled: true,
+        port: 18000,
+      },
+    };
+
+    const result = await setContextTool(
+      {
+        context: 'e2e',
+        options: contextOptions,
+      },
+      context,
+    );
+
+    expect(result.ok).toBe(true);
+    expect(context.sessionManager.setContext).toHaveBeenCalledWith(
+      'e2e',
+      contextOptions,
+    );
+  });
+
+  it('switches context from prod to e2e', async () => {
+    const context = createMockContext({ environmentMode: 'prod' });
+    vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({
+      currentContext: 'e2e',
+      hasActiveSession: false,
+      sessionId: null,
+      capabilities: { available: ['build', 'fixture', 'chain', 'seeding'] },
+      canSwitchContext: true,
+    });
+
+    const result = await setContextTool({ context: 'e2e' }, context);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.previousContext).toBe('prod');
+      expect(result.result.newContext).toBe('e2e');
+      expect(result.result.availableCapabilities).toStrictEqual([
+        'build',
+        'fixture',
+        'chain',
+        'seeding',
+      ]);
+    }
+  });
+
+  it('classifies context switch blocked errors', async () => {
+    const context = createMockContext({ environmentMode: 'e2e' });
+    vi.mocked(context.sessionManager.setContext).mockImplementation(() => {
+      throw new Error(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
+    });
+
+    const result = await setContextTool({ context: 'prod' }, context);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.code).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
+      expect(result.error.message).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED);
+    }
+  });
+
+  it('classifies generic context errors', async () => {
+    const context = createMockContext({ environmentMode: 'e2e' });
+    vi.mocked(context.sessionManager.setContext).mockImplementation(() => {
+      throw new Error('Unknown error');
+    });
+
+    const result = await setContextTool({ context: 'prod' }, context);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.code).toBe(ErrorCodes.MM_SET_CONTEXT_FAILED);
+      expect(result.error.message).toContain('Context switch failed');
+    }
+  });
+});
+
+describe('getContextTool', () => {
+  it('returns context info when getContextInfo is available', async () => {
+    const context = createMockContext({
+      hasActive: true,
+      sessionId: 'test-session-123',
+      environmentMode: 'e2e',
+    });
+    vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({
+      currentContext: 'e2e',
+      hasActiveSession: true,
+      sessionId: 'test-session-123',
+      capabilities: { available: ['build', 'fixture', 'chain'] },
+      canSwitchContext: false,
+    });
+
+    const result = await getContextTool({}, context);
+
+    expect(result.ok).toBe(true);
+    if (result.ok) {
+      expect(result.result.currentContext).toBe('e2e');
+      expect(result.result.hasActiveSession).toBe(true);
+      expect(result.result.sessionId).toBe('test-session-123');
+      expect(result.result.capabilities.available).toStrictEqual([
+        'build',
+        'fixture',
+        'chain',
+      ]);
+      expect(result.result.canSwitchContext).toBe(false);
+    }
+  });
+});
diff --git a/src/tools/context.ts b/src/tools/context.ts
new file mode 100644
index 0000000..65f501d
--- /dev/null
+++ b/src/tools/context.ts
@@ -0,0 +1,55 @@
+import { classifyContextError } from './error-classification.js';
+import type { SetContextInput } from './types/tool-inputs.js';
+import type {
+  SetContextResult,
+  GetContextResult,
+} from './types/tool-outputs.js';
+import { createToolError, createToolSuccess } from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+export type { SetContextInput } from './types/tool-inputs.js';
+export type {
+  SetContextResult,
+  GetContextResult,
+} from './types/tool-outputs.js';
+
+/**
+ * Switches the session environment context between e2e and prod modes.
+ *
+ * @param input - The target context and optional configuration.
+ * @param context - The tool execution context.
+ * @returns The previous and new context with available capabilities.
+ */
+export async function setContextTool(
+  input: SetContextInput,
+  context: ToolContext,
+): Promise<ToolResponse<SetContextResult>> {
+  try {
+    const previousContext = context.sessionManager.getEnvironmentMode();
+    context.sessionManager.setContext(input.context, input.options);
+    const info = context.sessionManager.getContextInfo();
+
+    return createToolSuccess({
+      previousContext,
+      newContext: input.context,
+      availableCapabilities: info.capabilities.available,
+    });
+  } catch (error) {
+    const errorInfo = classifyContextError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Retrieves the current session context, capabilities, and status.
+ *
+ * @param _input - Unused input parameters.
+ * @param context - The tool execution context.
+ * @returns The current context information.
+ */
+export async function getContextTool(
+  _input: Record<string, never>,
+  context: ToolContext,
+): Promise<ToolResponse<GetContextResult>> {
+  return createToolSuccess(context.sessionManager.getContextInfo());
+}
diff --git a/src/mcp-server/tools/discovery-tools.test.ts b/src/tools/discovery-tools.test.ts
similarity index 58%
rename from src/mcp-server/tools/discovery-tools.test.ts
rename to src/tools/discovery-tools.test.ts
index 8b5a248..683a7af 100644
--- a/src/mcp-server/tools/discovery-tools.test.ts
+++ b/src/tools/discovery-tools.test.ts
@@ -8,19 +8,18 @@
  */
 
 import type { Page } from '@playwright/test';
-import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
 
 import {
-  handleListTestIds,
-  handleAccessibilitySnapshot,
-  handleDescribeScreen,
+  accessibilitySnapshotTool,
+  describeScreenTool,
+  listTestIdsTool,
 } from './discovery-tools.js';
-import { ScreenshotResult } from '../../capabilities/types.js';
-import * as discoveryModule from '../discovery.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils/mock-factories.js';
-import type { TestIdItem, A11yNodeTrimmed } from '../types';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import type { A11yNodeTrimmed, TestIdItem } from './types';
+import { ErrorCodes } from './types/errors.js';
+import * as discoveryModule from './utils/discovery.js';
+import type { ToolContext } from '../types/http.js';
 
 function createMockPage(): Page {
   return {
@@ -28,12 +27,16 @@ function createMockPage(): Page {
   } as unknown as Page;
 }
 
-describe('discovery-tools', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+  } = {},
+): ToolContext {
+  const { hasActive = true } = options;
 
-    const mockSessionManager = createMockSessionManager({
-      hasActive: true,
+  return {
+    sessionManager: createMockSessionManager({
+      hasActive,
       sessionId: 'test-session-123',
       sessionMetadata: {
         schemaVersion: 1,
@@ -43,34 +46,27 @@ describe('discovery-tools', () => {
         flowTags: ['discovery'],
         tags: [],
         launch: {
-          stateMode: 'default' as const,
+          stateMode: 'default',
         },
       },
-    });
-
-    vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(createMockPage());
-
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({
-      recordStep: vi.fn().mockResolvedValue(undefined),
-      getLastSteps: vi.fn().mockResolvedValue([]),
-      searchSteps: vi.fn().mockResolvedValue([]),
-      summarizeSession: vi.fn().mockResolvedValue({
-        sessionId: 'test-session-123',
-        stepCount: 0,
-        recipe: [],
-      }),
-      listSessions: vi.fn().mockResolvedValue([]),
+    }),
+    page: createMockPage(),
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {
       generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session-123'),
-    } as any);
+    },
+  } as unknown as ToolContext;
+}
+
+describe('discovery-tools', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
   });
 
-  describe('handleListTestIds', () => {
+  describe('listTestIdsTool', () => {
     it('returns list of test IDs with default limit', async () => {
+      const context = createMockContext();
       const mockItems: TestIdItem[] = [
         { testId: 'button-1', tag: 'button', text: 'Click', visible: true },
         { testId: 'input-1', tag: 'input', visible: true },
@@ -84,24 +80,24 @@ describe('discovery-tools', () => {
         },
       );
 
-      const result = await handleListTestIds({});
+      const result = await listTestIdsTool({}, context);
 
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.items).toStrictEqual(mockItems);
-        expect(discoveryModule.collectTestIds).toHaveBeenCalledWith(
-          expect.anything(),
-          150,
-        );
       }
+      expect(discoveryModule.collectTestIds).toHaveBeenCalledWith(
+        context.page,
+        150,
+      );
     });
 
     it('respects custom limit', async () => {
-      const mockItems: TestIdItem[] = [
-        { testId: 'item-1', tag: 'div', visible: true },
-      ];
+      const context = createMockContext();
 
-      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(mockItems);
+      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([
+        { testId: 'item-1', tag: 'div', visible: true },
+      ]);
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
           nodes: [],
@@ -109,18 +105,17 @@ describe('discovery-tools', () => {
         },
       );
 
-      const result = await handleListTestIds({ limit: 50 });
+      const result = await listTestIdsTool({ limit: 50 }, context);
 
       expect(result.ok).toBe(true);
-      if (result.ok) {
-        expect(discoveryModule.collectTestIds).toHaveBeenCalledWith(
-          expect.anything(),
-          50,
-        );
-      }
+      expect(discoveryModule.collectTestIds).toHaveBeenCalledWith(
+        context.page,
+        50,
+      );
     });
 
     it('updates refMap in session manager', async () => {
+      const context = createMockContext();
       const mockRefMap = new Map([['e1', 'role=button[name="Submit"]']]);
 
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
@@ -131,53 +126,30 @@ describe('discovery-tools', () => {
         },
       );
 
-      const sessionManager = sessionManagerModule.getSessionManager();
+      await listTestIdsTool({}, context);
 
-      await handleListTestIds({});
-
-      expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
-    });
-
-    it('records step to knowledge store', async () => {
-      const mockItems: TestIdItem[] = [
-        { testId: 'test-1', tag: 'button', visible: true },
-      ];
-
-      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(mockItems);
-      vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
-        {
-          nodes: [],
-          refMap: new Map(),
-        },
-      );
-
-      const { knowledgeStore } = knowledgeStoreModule;
-
-      await handleListTestIds({});
-
-      expect(knowledgeStore.recordStep).toHaveBeenCalled();
+      expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
     });
 
     it('returns error when no active session', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
+      const context = createMockContext({ hasActive: false });
 
-      const result = await handleListTestIds({});
+      const result = await listTestIdsTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
-        expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION');
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
       }
     });
 
     it('handles discovery errors', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue(
         new Error('Page closed'),
       );
 
-      const result = await handleListTestIds({});
+      const result = await listTestIdsTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
@@ -186,8 +158,9 @@ describe('discovery-tools', () => {
     });
   });
 
-  describe('handleAccessibilitySnapshot', () => {
+  describe('accessibilitySnapshotTool', () => {
     it('returns accessibility tree with refs', async () => {
+      const context = createMockContext();
       const mockNodes: A11yNodeTrimmed[] = [
         { ref: 'e1', role: 'button', name: 'Submit', path: [] },
         { ref: 'e2', role: 'link', name: 'Cancel', path: [] },
@@ -205,7 +178,7 @@ describe('discovery-tools', () => {
       );
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
 
-      const result = await handleAccessibilitySnapshot({});
+      const result = await accessibilitySnapshotTool({}, context);
 
       expect(result.ok).toBe(true);
       if (result.ok) {
@@ -214,6 +187,8 @@ describe('discovery-tools', () => {
     });
 
     it('uses root selector when provided', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
           nodes: [],
@@ -222,15 +197,16 @@ describe('discovery-tools', () => {
       );
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
 
-      await handleAccessibilitySnapshot({ rootSelector: '.modal' });
+      await accessibilitySnapshotTool({ rootSelector: '.modal' }, context);
 
       expect(discoveryModule.collectTrimmedA11ySnapshot).toHaveBeenCalledWith(
-        expect.anything(),
+        context.page,
         '.modal',
       );
     });
 
     it('updates refMap in session manager', async () => {
+      const context = createMockContext();
       const mockRefMap = new Map([['e1', 'role=button[name="OK"]']]);
 
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
@@ -241,14 +217,14 @@ describe('discovery-tools', () => {
       );
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
 
-      const sessionManager = sessionManagerModule.getSessionManager();
-
-      await handleAccessibilitySnapshot({});
+      await accessibilitySnapshotTool({}, context);
 
-      expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
+      expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
     });
 
-    it('records step to knowledge store', async () => {
+    it('collects test ids with observation limit', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
           nodes: [],
@@ -257,33 +233,33 @@ describe('discovery-tools', () => {
       );
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
 
-      const { knowledgeStore } = knowledgeStoreModule;
+      await accessibilitySnapshotTool({}, context);
 
-      await handleAccessibilitySnapshot({});
-
-      expect(knowledgeStore.recordStep).toHaveBeenCalled();
+      expect(discoveryModule.collectTestIds).toHaveBeenCalledWith(
+        context.page,
+        50,
+      );
     });
 
     it('returns error when no active session', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
+      const context = createMockContext({ hasActive: false });
 
-      const result = await handleAccessibilitySnapshot({});
+      const result = await accessibilitySnapshotTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
-        expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION');
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
       }
     });
 
     it('handles discovery errors', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockRejectedValue(
         new Error('Discovery failed'),
       );
 
-      const result = await handleAccessibilitySnapshot({});
+      const result = await accessibilitySnapshotTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
@@ -292,8 +268,9 @@ describe('discovery-tools', () => {
     });
   });
 
-  describe('handleDescribeScreen', () => {
+  describe('describeScreenTool', () => {
     it('returns comprehensive screen state', async () => {
+      const context = createMockContext();
       const mockTestIds: TestIdItem[] = [
         { testId: 'button-1', tag: 'button', visible: true },
       ];
@@ -311,7 +288,7 @@ describe('discovery-tools', () => {
         },
       );
 
-      const result = await handleDescribeScreen({});
+      const result = await describeScreenTool({}, context);
 
       expect(result.ok).toBe(true);
       if (result.ok) {
@@ -323,6 +300,8 @@ describe('discovery-tools', () => {
     });
 
     it('includes screenshot when requested', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
@@ -330,19 +309,20 @@ describe('discovery-tools', () => {
           refMap: new Map(),
         },
       );
-
-      const sessionManager = sessionManagerModule.getSessionManager();
-      vi.spyOn(sessionManager, 'screenshot').mockResolvedValue({
+      vi.mocked(context.sessionManager.screenshot).mockResolvedValue({
         path: '/path/to/screenshot.png',
         width: 1280,
         height: 720,
         base64: 'base64data',
       });
 
-      const result = await handleDescribeScreen({
-        includeScreenshot: true,
-        screenshotName: 'test-screen',
-      });
+      const result = await describeScreenTool(
+        {
+          includeScreenshot: true,
+          screenshotName: 'test-screen',
+        },
+        context,
+      );
 
       expect(result.ok).toBe(true);
       if (result.ok) {
@@ -352,14 +332,16 @@ describe('discovery-tools', () => {
           height: 720,
           base64: null,
         });
-        expect(sessionManager.screenshot).toHaveBeenCalledWith({
-          name: 'test-screen',
-          fullPage: true,
-        });
       }
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
+        name: 'test-screen',
+        fullPage: true,
+      });
     });
 
     it('includes base64 in screenshot when requested', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
@@ -367,19 +349,20 @@ describe('discovery-tools', () => {
           refMap: new Map(),
         },
       );
-
-      const sessionManager = sessionManagerModule.getSessionManager();
-      vi.spyOn(sessionManager, 'screenshot').mockResolvedValue({
+      vi.mocked(context.sessionManager.screenshot).mockResolvedValue({
         path: '/path/to/screenshot.png',
         width: 1280,
         height: 720,
         base64: 'base64data',
       });
 
-      const result = await handleDescribeScreen({
-        includeScreenshot: true,
-        includeScreenshotBase64: true,
-      });
+      const result = await describeScreenTool(
+        {
+          includeScreenshot: true,
+          includeScreenshotBase64: true,
+        },
+        context,
+      );
 
       expect(result.ok).toBe(true);
       if (result.ok) {
@@ -388,6 +371,8 @@ describe('discovery-tools', () => {
     });
 
     it('uses default screenshot name when not provided', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
       vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
         {
@@ -396,30 +381,36 @@ describe('discovery-tools', () => {
         },
       );
 
-      const sessionManager = sessionManagerModule.getSessionManager();
-      const mockedScreenshot = vi
-        .spyOn(sessionManager, 'screenshot')
-        .mockResolvedValue({
-          path: '/path/to/screenshot.png',
-          width: 1280,
-          height: 720,
-        } as ScreenshotResult);
+      await describeScreenTool({ includeScreenshot: true }, context);
 
-      await handleDescribeScreen({ includeScreenshot: true });
-
-      expect(mockedScreenshot).toHaveBeenCalledWith({
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
         name: 'describe-screen',
         fullPage: true,
       });
     });
 
     it('generates prior knowledge from context', async () => {
+      const context = createMockContext();
       const mockTestIds: TestIdItem[] = [
         { testId: 'send-btn', tag: 'button', visible: true },
       ];
       const mockNodes: A11yNodeTrimmed[] = [
         { ref: 'e1', role: 'button', name: 'Send', path: [] },
       ];
+      const mockPriorKnowledge = {
+        schemaVersion: 1 as const,
+        generatedAt: '2026-02-04T00:00:00.000Z',
+        query: {
+          currentScreen: 'home',
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          visibleTestIds: mockTestIds,
+          a11yNodes: mockNodes,
+          currentSessionFlowTags: ['discovery'],
+        },
+        relatedSessions: [],
+        similarSteps: [],
+        suggestedNextActions: [],
+      };
 
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(
         mockTestIds,
@@ -430,37 +421,31 @@ describe('discovery-tools', () => {
           refMap: new Map([['e1', 'role=button[name="Send"]']]),
         },
       );
+      vi.mocked(
+        context.knowledgeStore.generatePriorKnowledge,
+      ).mockResolvedValue(mockPriorKnowledge as any);
 
-      const mockPriorKnowledge = {
-        version: 1 as const,
-        hints: [
-          { type: 'similar_flow' as const, content: 'Previous send flow' },
-        ],
-      };
-
-      const { knowledgeStore } = knowledgeStoreModule;
-      vi.spyOn(knowledgeStore, 'generatePriorKnowledge').mockResolvedValue(
-        mockPriorKnowledge as any,
-      );
-
-      const result = await handleDescribeScreen({});
+      const result = await describeScreenTool({}, context);
 
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.priorKnowledge).toStrictEqual(mockPriorKnowledge);
-        expect(knowledgeStore.generatePriorKnowledge).toHaveBeenCalledWith(
-          expect.objectContaining({
-            currentScreen: 'home',
-            visibleTestIds: mockTestIds,
-            a11yNodes: mockNodes,
-            currentSessionFlowTags: ['discovery'],
-          }),
-          'test-session-123',
-        );
       }
+      expect(
+        context.knowledgeStore.generatePriorKnowledge,
+      ).toHaveBeenCalledWith(
+        expect.objectContaining({
+          currentScreen: 'home',
+          visibleTestIds: mockTestIds,
+          a11yNodes: mockNodes,
+          currentSessionFlowTags: ['discovery'],
+        }),
+        'test-session-123',
+      );
     });
 
     it('updates refMap in session manager', async () => {
+      const context = createMockContext();
       const mockRefMap = new Map([['e1', 'role=button[name="OK"]']]);
 
       vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
@@ -471,49 +456,30 @@ describe('discovery-tools', () => {
         },
       );
 
-      const sessionManager = sessionManagerModule.getSessionManager();
-
-      await handleDescribeScreen({});
+      await describeScreenTool({}, context);
 
-      expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
-    });
-
-    it('records step to knowledge store', async () => {
-      vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]);
-      vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue(
-        {
-          nodes: [],
-          refMap: new Map(),
-        },
-      );
-
-      const { knowledgeStore } = knowledgeStoreModule;
-
-      await handleDescribeScreen({});
-
-      expect(knowledgeStore.recordStep).toHaveBeenCalled();
+      expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap);
     });
 
     it('returns error when no active session', async () => {
-      const mockSessionManager = createMockSessionManager({ hasActive: false });
-      vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-        mockSessionManager,
-      );
+      const context = createMockContext({ hasActive: false });
 
-      const result = await handleDescribeScreen({});
+      const result = await describeScreenTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
-        expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION');
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
       }
     });
 
     it('handles discovery errors', async () => {
+      const context = createMockContext();
+
       vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue(
         new Error('Page closed'),
       );
 
-      const result = await handleDescribeScreen({});
+      const result = await describeScreenTool({}, context);
 
       expect(result.ok).toBe(false);
       if (!result.ok) {
diff --git a/src/tools/discovery-tools.ts b/src/tools/discovery-tools.ts
new file mode 100644
index 0000000..60962fc
--- /dev/null
+++ b/src/tools/discovery-tools.ts
@@ -0,0 +1,163 @@
+import { classifyDiscoveryError } from './error-classification.js';
+import type {
+  AccessibilitySnapshotInput,
+  AccessibilitySnapshotResult,
+  DescribeScreenInput,
+  DescribeScreenResult,
+  ListTestIdsInput,
+  ListTestIdsResult,
+  PriorKnowledgeContext,
+} from './types';
+import {
+  DEFAULT_TESTID_LIMIT,
+  OBSERVATION_TESTID_LIMIT,
+} from './utils/constants.js';
+import {
+  collectTestIds,
+  collectTrimmedA11ySnapshot,
+} from './utils/discovery.js';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Collects visible test IDs from the current page.
+ *
+ * @param input - The test ID collection options including limit.
+ * @param context - The tool execution context.
+ * @returns The list of discovered test ID items.
+ */
+export async function listTestIdsTool(
+  input: ListTestIdsInput,
+  context: ToolContext,
+): Promise<ToolResponse<ListTestIdsResult>> {
+  const missingSession = requireActiveSession<ListTestIdsResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const limit = input.limit ?? DEFAULT_TESTID_LIMIT;
+
+  try {
+    const items = await collectTestIds(context.page, limit);
+    const { refMap } = await collectTrimmedA11ySnapshot(context.page);
+
+    context.sessionManager.setRefMap(refMap);
+
+    return createToolSuccess({ items });
+  } catch (error) {
+    const errorInfo = classifyDiscoveryError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Captures a trimmed accessibility tree snapshot of the current page.
+ *
+ * @param input - The snapshot options including optional root selector.
+ * @param context - The tool execution context.
+ * @returns The accessibility snapshot nodes.
+ */
+export async function accessibilitySnapshotTool(
+  input: AccessibilitySnapshotInput,
+  context: ToolContext,
+): Promise<ToolResponse<AccessibilitySnapshotResult>> {
+  const missingSession =
+    requireActiveSession<AccessibilitySnapshotResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  try {
+    const { nodes, refMap } = await collectTrimmedA11ySnapshot(
+      context.page,
+      input.rootSelector,
+    );
+
+    context.sessionManager.setRefMap(refMap);
+    await collectTestIds(context.page, OBSERVATION_TESTID_LIMIT);
+
+    return createToolSuccess({ nodes });
+  } catch (error) {
+    const errorInfo = classifyDiscoveryError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Captures a full screen description including state, test IDs, a11y, and prior knowledge.
+ *
+ * @param input - The describe-screen options including screenshot flags.
+ * @param context - The tool execution context.
+ * @returns The composite screen description result.
+ */
+export async function describeScreenTool(
+  input: DescribeScreenInput,
+  context: ToolContext,
+): Promise<ToolResponse<DescribeScreenResult>> {
+  const missingSession = requireActiveSession<DescribeScreenResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  try {
+    const state = await context.sessionManager.getExtensionState();
+    const testIds = await collectTestIds(context.page, DEFAULT_TESTID_LIMIT);
+    const { nodes, refMap } = await collectTrimmedA11ySnapshot(context.page);
+
+    context.sessionManager.setRefMap(refMap);
+
+    const trackedPages = context.sessionManager.getTrackedPages();
+    const activePage = context.sessionManager.getPage();
+    const activeTracked = trackedPages.find((tp) => tp.page === activePage);
+    const activeTab = activeTracked
+      ? { role: activeTracked.role, url: activePage.url() }
+      : undefined;
+
+    let screenshot: DescribeScreenResult['screenshot'] = null;
+
+    if (input.includeScreenshot) {
+      const screenshotName = input.screenshotName ?? 'describe-screen';
+      const result = await context.sessionManager.screenshot({
+        name: screenshotName,
+        fullPage: true,
+      });
+
+      screenshot = {
+        path: result.path,
+        width: result.width,
+        height: result.height,
+        base64: input.includeScreenshotBase64 ? result.base64 : null,
+      };
+    }
+
+    const sessionMetadata = context.sessionManager.getSessionMetadata();
+    const priorKnowledgeContext: PriorKnowledgeContext = {
+      currentScreen: state.currentScreen,
+      currentUrl: state.currentUrl,
+      visibleTestIds: testIds,
+      a11yNodes: nodes,
+      currentSessionFlowTags: sessionMetadata?.flowTags,
+    };
+
+    const priorKnowledge = await context.knowledgeStore.generatePriorKnowledge(
+      priorKnowledgeContext,
+      context.sessionManager.getSessionId(),
+    );
+
+    return createToolSuccess({
+      state,
+      activeTab,
+      testIds: { items: testIds },
+      a11y: { nodes },
+      screenshot,
+      priorKnowledge,
+    });
+  } catch (error) {
+    const errorInfo = classifyDiscoveryError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/mcp-server/tools/error-classification.test.ts b/src/tools/error-classification.test.ts
similarity index 99%
rename from src/mcp-server/tools/error-classification.test.ts
rename to src/tools/error-classification.test.ts
index b9ba1bb..e141e12 100644
--- a/src/mcp-server/tools/error-classification.test.ts
+++ b/src/tools/error-classification.test.ts
@@ -20,8 +20,8 @@ import {
   classifyStateError,
   classifySeedingError,
   classifyContextError,
-} from './error-classification';
-import { ErrorCodes } from '../types';
+} from './error-classification.js';
+import { ErrorCodes } from './types';
 
 describe('error-classification', () => {
   describe('isPageClosedError', () => {
diff --git a/src/mcp-server/tools/error-classification.ts b/src/tools/error-classification.ts
similarity index 99%
rename from src/mcp-server/tools/error-classification.ts
rename to src/tools/error-classification.ts
index c424d91..9b844f3 100644
--- a/src/mcp-server/tools/error-classification.ts
+++ b/src/tools/error-classification.ts
@@ -5,7 +5,7 @@
  * based on error message patterns.
  */
 
-import { ErrorCodes } from '../types';
+import { ErrorCodes } from './types';
 import { extractErrorMessage } from '../utils';
 
 const ERROR_PATTERNS = {
diff --git a/src/tools/index.ts b/src/tools/index.ts
new file mode 100644
index 0000000..c75bad2
--- /dev/null
+++ b/src/tools/index.ts
@@ -0,0 +1,15 @@
+export * from './batch.js';
+export * from './build.js';
+export * from './cleanup.js';
+export * from './clipboard.js';
+export * from './context.js';
+export * from './discovery-tools.js';
+export * from './interaction.js';
+export * from './knowledge.js';
+export * from './launch.js';
+export * from './navigation.js';
+export * from './registry.js';
+export * from './screenshot.js';
+export * from './seeding.js';
+export * from './state.js';
+export * from './utils.js';
diff --git a/src/tools/interaction.test.ts b/src/tools/interaction.test.ts
new file mode 100644
index 0000000..4099384
--- /dev/null
+++ b/src/tools/interaction.test.ts
@@ -0,0 +1,894 @@
+/**
+ * Unit tests for interaction tool handlers.
+ *
+ * Tests handleClick, handleType, and handleWaitFor with various target types,
+ * error scenarios, and page closure detection.
+ */
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+
+import {
+  clickTool,
+  getTextTool,
+  typeTool,
+  waitForTool,
+} from './interaction.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import * as discoveryModule from './utils/discovery.js';
+import * as targetsModule from './utils/targets.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockLocator() {
+  return {
+    click: vi.fn().mockResolvedValue(undefined),
+    fill: vi.fn().mockResolvedValue(undefined),
+    waitFor: vi.fn().mockResolvedValue(undefined),
+    textContent: vi.fn().mockResolvedValue('Hello World'),
+  };
+}
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    page?: object;
+    refMap?: Map<string, string>;
+  } = {},
+): ToolContext {
+  return {
+    sessionManager: createMockSessionManager({
+      hasActive: options.hasActive ?? true,
+    }),
+    page: (options.page ?? {}) as ToolContext['page'],
+    refMap: options.refMap ?? new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('interaction', () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  describe('clickTool', () => {
+    it('clicks element by testId', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ testId: 'my-button' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.clicked).toBe(true);
+        expect(result.result.target).toBe('testId:my-button');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'my-button',
+        context.refMap,
+        15000,
+        undefined,
+      );
+      expect(locator.click).toHaveBeenCalled();
+    });
+
+    it('uses custom timeout when provided', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      await clickTool({ testId: 'my-button', timeoutMs: 5000 }, context);
+
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'my-button',
+        context.refMap,
+        5000,
+        undefined,
+      );
+    });
+
+    it('passes within scope to waitForTarget', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool(
+        { testId: 'btn', within: { testId: 'parent' } },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'btn',
+        context.refMap,
+        15000,
+        { type: 'testId', value: 'parent' },
+      );
+    });
+
+    it('clicks element by CSS selector', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ selector: 'button.primary' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.clicked).toBe(true);
+        expect(result.result.target).toBe('selector:button.primary');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'selector',
+        'button.primary',
+        context.refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('clicks element by accessibility reference', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const refMap = new Map([['e5', 'button[aria-label="Submit"]']]);
+      const context = createMockContext({ page, refMap });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ a11yRef: 'e5' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.clicked).toBe(true);
+        expect(result.result.target).toBe('a11yRef:e5');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'a11yRef',
+        'e5',
+        refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('returns error when no target specified', async () => {
+      const result = await clickTool({} as any, createMockContext());
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when multiple targets specified', async () => {
+      const result = await clickTool(
+        { testId: 'button', selector: '.button' } as any,
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
+      vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({
+        valid: true,
+      } as any);
+
+      const result = await clickTool({ testId: 'button' }, createMockContext());
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toBe('Invalid target selection');
+      }
+    });
+
+    it('handles page closure gracefully', async () => {
+      const locator = createMockLocator();
+      locator.click.mockRejectedValue(
+        new Error('Target page, context or browser has been closed'),
+      );
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ testId: 'close-btn' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.clicked).toBe(true);
+        expect(result.result.pageClosedAfterClick).toBe(true);
+        expect(result.result.target).toBe('testId:close-btn');
+      }
+    });
+
+    it('handles browser closed error gracefully', async () => {
+      const locator = createMockLocator();
+      locator.click.mockRejectedValue(new Error('browser has been closed'));
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ testId: 'close-btn' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.pageClosedAfterClick).toBe(true);
+      }
+    });
+
+    it('returns error when click fails with non-closure error', async () => {
+      const locator = createMockLocator();
+      locator.click.mockRejectedValue(new Error('Element is not clickable'));
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await clickTool({ testId: 'my-button' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED);
+      }
+    });
+
+    it('returns error when element not found', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
+        new Error('Timeout waiting for element'),
+      );
+
+      const result = await clickTool({ testId: 'nonexistent' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
+      }
+    });
+
+    it('returns error when no session active', async () => {
+      const result = await clickTool(
+        { testId: 'my-button' },
+        createMockContext({ hasActive: false }),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+
+  describe('typeTool', () => {
+    it('types text into element by testId', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool(
+        { testId: 'amount-input', text: '0.5' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.typed).toBe(true);
+        expect(result.result.target).toBe('testId:amount-input');
+        expect(result.result.textLength).toBe(3);
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'amount-input',
+        context.refMap,
+        15000,
+        undefined,
+      );
+      expect(locator.fill).toHaveBeenCalledWith('0.5');
+    });
+
+    it('uses custom timeout when provided', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      await typeTool(
+        { testId: 'input', text: 'test', timeoutMs: 3000 },
+        context,
+      );
+
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'input',
+        context.refMap,
+        3000,
+        undefined,
+      );
+    });
+
+    it('passes within scope to waitForTarget', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool(
+        { testId: 'input', text: 'hello', within: { selector: '.form' } },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'input',
+        context.refMap,
+        15000,
+        { type: 'selector', value: '.form' },
+      );
+    });
+
+    it('types text into element by CSS selector', async () => {
+      const locator = createMockLocator();
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool(
+        { selector: 'input[name="email"]', text: 'test@example.com' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.typed).toBe(true);
+        expect(result.result.target).toBe('selector:input[name="email"]');
+        expect(result.result.textLength).toBe(16);
+      }
+      expect(locator.fill).toHaveBeenCalledWith('test@example.com');
+    });
+
+    it('types text into element by accessibility reference', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const refMap = new Map([['e3', 'input[aria-label="Amount"]']]);
+      const context = createMockContext({ page, refMap });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool({ a11yRef: 'e3', text: '100' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.typed).toBe(true);
+        expect(result.result.target).toBe('a11yRef:e3');
+        expect(result.result.textLength).toBe(3);
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'a11yRef',
+        'e3',
+        refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('types empty string and reports zero length', async () => {
+      const locator = createMockLocator();
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool({ testId: 'input', text: '' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.typed).toBe(true);
+        expect(result.result.textLength).toBe(0);
+      }
+      expect(locator.fill).toHaveBeenCalledWith('');
+    });
+
+    it('returns error when no target specified', async () => {
+      const result = await typeTool(
+        { text: 'test' } as any,
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when multiple targets specified', async () => {
+      const result = await typeTool(
+        { testId: 'input', selector: 'input', text: 'test' } as any,
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
+      vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({
+        valid: true,
+      } as any);
+
+      const result = await typeTool(
+        { testId: 'input', text: 'test' },
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toBe('Invalid target selection');
+      }
+    });
+
+    it('returns error when fill fails', async () => {
+      const locator = createMockLocator();
+      locator.fill.mockRejectedValue(new Error('Element is not editable'));
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await typeTool({ testId: 'input', text: 'test' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_TYPE_FAILED);
+      }
+    });
+
+    it('returns error when element not found', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
+        new Error('Timeout waiting for element'),
+      );
+
+      const result = await typeTool(
+        { testId: 'nonexistent', text: 'test' },
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
+      }
+    });
+
+    it('returns error when no session active', async () => {
+      const result = await typeTool(
+        { testId: 'input', text: 'test' },
+        createMockContext({ hasActive: false }),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+
+  describe('waitForTool', () => {
+    it('waits for element by testId', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await waitForTool({ testId: 'loading-spinner' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.found).toBe(true);
+        expect(result.result.target).toBe('testId:loading-spinner');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'loading-spinner',
+        context.refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('uses custom timeout when provided', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      await waitForTool({ testId: 'element', timeoutMs: 30000 }, context);
+
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'testId',
+        'element',
+        context.refMap,
+        30000,
+        undefined,
+      );
+    });
+
+    it('passes within scope to waitForTarget', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await waitForTool(
+        { a11yRef: 'e5', within: { a11yRef: 'e1' } },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'a11yRef',
+        'e5',
+        context.refMap,
+        15000,
+        { type: 'a11yRef', value: 'e1' },
+      );
+    });
+
+    it('waits for element by CSS selector', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const context = createMockContext({ page });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await waitForTool(
+        { selector: '.success-message' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.found).toBe(true);
+        expect(result.result.target).toBe('selector:.success-message');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'selector',
+        '.success-message',
+        context.refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('waits for element by accessibility reference', async () => {
+      const page = {};
+      const locator = createMockLocator();
+      const refMap = new Map([['e10', 'button[aria-label="Confirm"]']]);
+      const context = createMockContext({ page, refMap });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await waitForTool({ a11yRef: 'e10' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.found).toBe(true);
+        expect(result.result.target).toBe('a11yRef:e10');
+      }
+      expect(discoveryModule.waitForTarget).toHaveBeenCalledWith(
+        page,
+        'a11yRef',
+        'e10',
+        refMap,
+        15000,
+        undefined,
+      );
+    });
+
+    it('returns error when no target specified', async () => {
+      const result = await waitForTool({} as any, createMockContext());
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when multiple targets specified', async () => {
+      const result = await waitForTool(
+        { testId: 'element', selector: '.element' } as any,
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Exactly one');
+      }
+    });
+
+    it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => {
+      vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({
+        valid: true,
+      } as any);
+
+      const result = await waitForTool(
+        { testId: 'element' },
+        createMockContext(),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toBe('Invalid target selection');
+      }
+    });
+
+    it('returns error when element not found within timeout', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
+        new Error('Timeout 15000ms exceeded'),
+      );
+
+      const result = await waitForTool({ testId: 'nonexistent' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
+      }
+    });
+
+    it('returns error when page closed during wait', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
+        new Error('Target page has been closed'),
+      );
+
+      const result = await waitForTool({ testId: 'element' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT);
+      }
+    });
+
+    it('returns error when no session active', async () => {
+      const result = await waitForTool(
+        { testId: 'element' },
+        createMockContext({ hasActive: false }),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+
+  describe('getTextTool', () => {
+    it('returns textContent by testId', async () => {
+      const locator = createMockLocator();
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await getTextTool({ testId: 'my-element' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('Hello World');
+        expect(result.result).toHaveLength(11);
+        expect(result.result.target).toBe('testId:my-element');
+      }
+    });
+
+    it('returns empty string when textContent is null', async () => {
+      const locator = createMockLocator();
+      locator.textContent.mockResolvedValue(null);
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await getTextTool({ testId: 'empty-node' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('');
+        expect(result.result).toHaveLength(0);
+      }
+    });
+
+    it('returns textContent by a11yRef', async () => {
+      const locator = createMockLocator();
+      locator.textContent.mockResolvedValue('Ref content');
+      const context = createMockContext({
+        refMap: new Map([['e1', 'button[name="Submit"]']]),
+      });
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await getTextTool({ a11yRef: 'e1' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('Ref content');
+      }
+    });
+
+    it('returns textContent by CSS selector', async () => {
+      const locator = createMockLocator();
+      locator.textContent.mockResolvedValue('Selector content');
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue(
+        locator as any,
+      );
+
+      const result = await getTextTool({ selector: '#result-text' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.text).toBe('Selector content');
+        expect(result.result.target).toBe('selector:#result-text');
+      }
+    });
+
+    it('returns error when element not found', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue(
+        new Error('Timeout waiting for selector'),
+      );
+
+      const result = await getTextTool({ testId: 'missing' }, context);
+
+      expect(result.ok).toBe(false);
+    });
+
+    it('returns error with invalid target selection', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({
+        valid: false,
+        error: 'No target provided',
+      } as any);
+
+      const result = await getTextTool({} as any, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+      }
+    });
+
+    it('returns error when no session active', async () => {
+      const result = await getTextTool(
+        { testId: 'element' },
+        createMockContext({ hasActive: false }),
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+
+    it('supports --within scoping', async () => {
+      const locator = createMockLocator();
+      locator.textContent.mockResolvedValue('Scoped text');
+      const context = createMockContext();
+      const spy = vi
+        .spyOn(discoveryModule, 'waitForTarget')
+        .mockResolvedValue(locator as any);
+
+      const result = await getTextTool(
+        {
+          testId: 'child-element',
+          within: { testId: 'parent-container' },
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(spy).toHaveBeenCalledWith(
+        expect.anything(),
+        'testId',
+        'child-element',
+        expect.any(Map),
+        expect.any(Number),
+        { type: 'testId', value: 'parent-container' },
+      );
+    });
+  });
+});
diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts
new file mode 100644
index 0000000..5e3c3af
--- /dev/null
+++ b/src/tools/interaction.ts
@@ -0,0 +1,283 @@
+import {
+  classifyClickError,
+  classifyTypeError,
+  classifyWaitError,
+  isPageClosedError,
+} from './error-classification.js';
+import type {
+  ClickInput,
+  ClickResult,
+  GetTextInput,
+  GetTextResult,
+  TypeInput,
+  TypeResult,
+  WaitForInput,
+  WaitForResult,
+  WithinTarget,
+} from './types';
+import { ErrorCodes } from './types';
+import { DEFAULT_INTERACTION_TIMEOUT_MS } from './utils/constants.js';
+import { waitForTarget } from './utils/discovery.js';
+import type { WithinScope } from './utils/discovery.js';
+import { validateTargetSelection } from './utils/targets.js';
+import {
+  isInvalidTargetSelection,
+  isValidTargetSelection,
+} from './utils/type-guards.js';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Converts a WithinTarget input to the WithinScope format expected by waitForTarget.
+ *
+ * @param within - The optional within target from tool input.
+ * @returns The resolved scope, or undefined if no within target is provided.
+ */
+function resolveWithinScope(
+  within: WithinTarget | undefined,
+): WithinScope | undefined {
+  if (!within) {
+    return undefined;
+  }
+  if (within.a11yRef) {
+    return { type: 'a11yRef', value: within.a11yRef };
+  }
+  if (within.testId) {
+    return { type: 'testId', value: within.testId };
+  }
+  if (within.selector) {
+    return { type: 'selector', value: within.selector };
+  }
+  return undefined;
+}
+
+/**
+ * Clicks an element identified by ref, test ID, or selector.
+ *
+ * @param input - The click target and timeout options.
+ * @param context - The tool execution context.
+ * @returns The click operation result.
+ */
+export async function clickTool(
+  input: ClickInput,
+  context: ToolContext,
+): Promise<ToolResponse<ClickResult>> {
+  const missingSession = requireActiveSession<ClickResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
+  const validation = validateTargetSelection(input);
+
+  if (isInvalidTargetSelection(validation)) {
+    return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error);
+  }
+
+  if (!isValidTargetSelection(validation)) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Invalid target selection',
+    );
+  }
+
+  const { type: targetType, value: targetValue } = validation;
+
+  try {
+    const locator = await waitForTarget(
+      context.page,
+      targetType,
+      targetValue,
+      context.refMap,
+      timeoutMs,
+      resolveWithinScope(input.within),
+    );
+
+    try {
+      await locator.click();
+      return createToolSuccess({
+        clicked: true,
+        target: `${targetType}:${targetValue}`,
+      });
+    } catch (clickError) {
+      if (isPageClosedError(clickError)) {
+        return createToolSuccess({
+          clicked: true,
+          target: `${targetType}:${targetValue}`,
+          pageClosedAfterClick: true,
+        });
+      }
+
+      throw clickError;
+    }
+  } catch (error) {
+    const errorInfo = classifyClickError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Types text into an element identified by ref, test ID, or selector.
+ *
+ * @param input - The type target, text content, and timeout options.
+ * @param context - The tool execution context.
+ * @returns The type operation result.
+ */
+export async function typeTool(
+  input: TypeInput,
+  context: ToolContext,
+): Promise<ToolResponse<TypeResult>> {
+  const missingSession = requireActiveSession<TypeResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
+  const validation = validateTargetSelection(input);
+
+  if (isInvalidTargetSelection(validation)) {
+    return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error);
+  }
+
+  if (!isValidTargetSelection(validation)) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Invalid target selection',
+    );
+  }
+
+  const { type: targetType, value: targetValue } = validation;
+
+  try {
+    const locator = await waitForTarget(
+      context.page,
+      targetType,
+      targetValue,
+      context.refMap,
+      timeoutMs,
+      resolveWithinScope(input.within),
+    );
+
+    await locator.fill(input.text);
+
+    return createToolSuccess({
+      typed: true,
+      target: `${targetType}:${targetValue}`,
+      textLength: input.text.length,
+    });
+  } catch (error) {
+    const errorInfo = classifyTypeError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Waits for an element to appear on the page within a timeout.
+ *
+ * @param input - The wait target and timeout options.
+ * @param context - The tool execution context.
+ * @returns The wait result indicating whether the element was found.
+ */
+export async function waitForTool(
+  input: WaitForInput,
+  context: ToolContext,
+): Promise<ToolResponse<WaitForResult>> {
+  const missingSession = requireActiveSession<WaitForResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
+  const validation = validateTargetSelection(input);
+
+  if (isInvalidTargetSelection(validation)) {
+    return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error);
+  }
+
+  if (!isValidTargetSelection(validation)) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Invalid target selection',
+    );
+  }
+
+  const { type: targetType, value: targetValue } = validation;
+
+  try {
+    await waitForTarget(
+      context.page,
+      targetType,
+      targetValue,
+      context.refMap,
+      timeoutMs,
+      resolveWithinScope(input.within),
+    );
+
+    return createToolSuccess({
+      found: true,
+      target: `${targetType}:${targetValue}`,
+    });
+  } catch (error) {
+    const errorInfo = classifyWaitError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Reads the text content of an element identified by ref, test ID, or selector.
+ *
+ * @param input - The target element and timeout options.
+ * @param context - The tool execution context.
+ * @returns The text content of the matched element.
+ */
+export async function getTextTool(
+  input: GetTextInput,
+  context: ToolContext,
+): Promise<ToolResponse<GetTextResult>> {
+  const missingSession = requireActiveSession<GetTextResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
+  const validation = validateTargetSelection(input);
+
+  if (isInvalidTargetSelection(validation)) {
+    return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error);
+  }
+
+  if (!isValidTargetSelection(validation)) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Invalid target selection',
+    );
+  }
+
+  const { type: targetType, value: targetValue } = validation;
+
+  try {
+    const locator = await waitForTarget(
+      context.page,
+      targetType,
+      targetValue,
+      context.refMap,
+      timeoutMs,
+      resolveWithinScope(input.within),
+    );
+
+    const text = (await locator.textContent()) ?? '';
+
+    return createToolSuccess({
+      text,
+      target: `${targetType}:${targetValue}`,
+      length: text.length,
+    });
+  } catch (error) {
+    const errorInfo = classifyWaitError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/mcp-server/tools/knowledge.test.ts b/src/tools/knowledge.test.ts
similarity index 53%
rename from src/mcp-server/tools/knowledge.test.ts
rename to src/tools/knowledge.test.ts
index afb0233..5eaa60c 100644
--- a/src/mcp-server/tools/knowledge.test.ts
+++ b/src/tools/knowledge.test.ts
@@ -5,25 +5,21 @@
  * summarize, and session listing with various filter combinations.
  */
 
-import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { describe, it, expect, vi, beforeEach } from 'vitest';
 
 import {
-  handleKnowledgeLast,
-  handleKnowledgeSearch,
-  handleKnowledgeSummarize,
-  handleKnowledgeSessions,
+  knowledgeLastTool,
+  knowledgeSearchTool,
+  knowledgeSummarizeTool,
+  knowledgeSessionsTool,
 } from './knowledge.js';
-import * as knowledgeStoreModule from '../knowledge-store.js';
-import * as sessionManagerModule from '../session-manager.js';
-import { createMockSessionManager } from '../test-utils';
-import { ErrorCodes } from '../types/errors.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
 
-describe('knowledge', () => {
-  let mockSessionManager: ReturnType<typeof createMockSessionManager>;
-  let mockKnowledgeStore: any;
-
-  beforeEach(() => {
-    mockSessionManager = createMockSessionManager({
+function createMockContext(): ToolContext {
+  return {
+    sessionManager: createMockSessionManager({
       hasActive: true,
       sessionId: 'test-session-123',
       sessionMetadata: {
@@ -34,14 +30,11 @@ describe('knowledge', () => {
         tags: [],
         launch: { stateMode: 'default' },
       },
-    });
-    vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue(
-      mockSessionManager,
-    );
-
-    // Mock knowledge store to prevent "not initialized" errors
-    mockKnowledgeStore = {
-      recordStep: vi.fn().mockResolvedValue(undefined),
+    }),
+    page: {},
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {
       getLastSteps: vi.fn().mockResolvedValue([]),
       searchSteps: vi.fn().mockResolvedValue([]),
       summarizeSession: vi.fn().mockResolvedValue({
@@ -50,59 +43,71 @@ describe('knowledge', () => {
         recipe: [],
       }),
       listSessions: vi.fn().mockResolvedValue([]),
-      generatePriorKnowledge: vi.fn().mockResolvedValue(undefined),
-      writeSessionMetadata: vi.fn().mockResolvedValue('test-session'),
-    };
-    vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue(
-      mockKnowledgeStore,
-    );
-  });
+    },
+  } as unknown as ToolContext;
+}
+
+describe('knowledge', () => {
+  let context: ToolContext;
 
-  afterEach(() => {
-    vi.restoreAllMocks();
+  beforeEach(() => {
+    context = createMockContext();
   });
 
-  describe('handleKnowledgeLast', () => {
+  describe('knowledgeLastTool', () => {
     it('retrieves last N steps with default parameters', async () => {
-      // Arrange
       const mockSteps = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' },
-        { timestamp: '2026-02-04T10:01:00Z', tool: 'mm_type', screen: 'home' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'home',
+          snippet: 'Clicked send',
+        },
+        {
+          timestamp: '2026-02-04T10:01:00Z',
+          tool: 'type',
+          screen: 'home',
+          snippet: 'Entered amount',
+        },
       ];
-      mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps);
+      vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue(
+        mockSteps,
+      );
 
-      // Act
-      const result = await handleKnowledgeLast({});
+      const result = await knowledgeLastTool({}, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.steps).toStrictEqual(mockSteps);
       }
-      expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith(
-        20, // default n
-        'current', // default scope
+      expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith(
+        20,
+        'current',
         'test-session-123',
-        undefined, // no filters
+        undefined,
       );
     });
 
     it('retrieves last N steps with custom n parameter', async () => {
-      // Arrange
       const mockSteps = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'home',
+          snippet: 'Clicked send',
+        },
       ];
-      mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps);
+      vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue(
+        mockSteps,
+      );
 
-      // Act
-      const result = await handleKnowledgeLast({ n: 5 });
+      const result = await knowledgeLastTool({ n: 5 }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.steps).toStrictEqual(mockSteps);
       }
-      expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith(
         5,
         'current',
         'test-session-123',
@@ -111,18 +116,22 @@ describe('knowledge', () => {
     });
 
     it('retrieves steps with scope "all"', async () => {
-      // Arrange
       const mockSteps = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'home',
+          snippet: 'Clicked send',
+        },
       ];
-      mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps);
+      vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue(
+        mockSteps,
+      );
 
-      // Act
-      const result = await handleKnowledgeLast({ scope: 'all' });
+      const result = await knowledgeLastTool({ scope: 'all' }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
-      expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith(
         20,
         'all',
         'test-session-123',
@@ -131,26 +140,30 @@ describe('knowledge', () => {
     });
 
     it('retrieves steps with filters', async () => {
-      // Arrange
       const mockSteps = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'send' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'send',
+          snippet: 'Clicked confirm',
+        },
       ];
-      mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps);
       const filters = {
         flowTag: 'send',
         screen: 'send',
         sinceHours: 24,
       };
+      vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue(
+        mockSteps,
+      );
 
-      // Act
-      const result = await handleKnowledgeLast({ n: 10, filters });
+      const result = await knowledgeLastTool({ n: 10, filters }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.steps).toStrictEqual(mockSteps);
       }
-      expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith(
         10,
         'current',
         'test-session-123',
@@ -159,13 +172,10 @@ describe('knowledge', () => {
     });
 
     it('returns empty array when no steps found', async () => {
-      // Arrange
-      mockKnowledgeStore.getLastSteps.mockResolvedValue([]);
+      vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue([]);
 
-      // Act
-      const result = await handleKnowledgeLast({ n: 10 });
+      const result = await knowledgeLastTool({ n: 10 }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.steps).toStrictEqual([]);
@@ -173,15 +183,12 @@ describe('knowledge', () => {
     });
 
     it('returns error when knowledge store fails', async () => {
-      // Arrange
-      mockKnowledgeStore.getLastSteps.mockRejectedValue(
+      vi.mocked(context.knowledgeStore.getLastSteps).mockRejectedValue(
         new Error('Database connection failed'),
       );
 
-      // Act
-      const result = await handleKnowledgeLast({ n: 10 });
+      const result = await knowledgeLastTool({ n: 10 }, context);
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR);
@@ -191,52 +198,60 @@ describe('knowledge', () => {
     });
   });
 
-  describe('handleKnowledgeSearch', () => {
+  describe('knowledgeSearchTool', () => {
     it('searches steps with default parameters', async () => {
-      // Arrange
       const mockMatches = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'home',
+          snippet: 'Clicked send',
+        },
       ];
-      mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches);
+      vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue(
+        mockMatches,
+      );
 
-      // Act
-      const result = await handleKnowledgeSearch({ query: 'mm_click' });
+      const result = await knowledgeSearchTool({ query: 'click' }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.matches).toStrictEqual(mockMatches);
-        expect(result.result.query).toBe('mm_click');
+        expect(result.result.query).toBe('click');
       }
-      expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith(
-        'mm_click',
-        20, // default limit
-        'all', // default scope
+      expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith(
+        'click',
+        20,
+        'all',
         'test-session-123',
-        undefined, // no filters
+        undefined,
       );
     });
 
     it('searches steps with custom limit', async () => {
-      // Arrange
       const mockMatches = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_type', screen: 'send' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'type',
+          screen: 'send',
+          snippet: 'Entered recipient',
+        },
       ];
-      mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches);
+      vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue(
+        mockMatches,
+      );
 
-      // Act
-      const result = await handleKnowledgeSearch({
-        query: 'mm_type',
-        limit: 50,
-      });
+      const result = await knowledgeSearchTool(
+        { query: 'type', limit: 50 },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.matches).toStrictEqual(mockMatches);
       }
-      expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith(
-        'mm_type',
+      expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith(
+        'type',
         50,
         'all',
         'test-session-123',
@@ -245,22 +260,26 @@ describe('knowledge', () => {
     });
 
     it('searches steps with scope "current"', async () => {
-      // Arrange
       const mockMatches = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'home',
+          snippet: 'Clicked send',
+        },
       ];
-      mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches);
+      vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue(
+        mockMatches,
+      );
 
-      // Act
-      const result = await handleKnowledgeSearch({
-        query: 'mm_click',
-        scope: 'current',
-      });
+      const result = await knowledgeSearchTool(
+        { query: 'click', scope: 'current' },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
-      expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith(
-        'mm_click',
+      expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith(
+        'click',
         20,
         'current',
         'test-session-123',
@@ -269,30 +288,33 @@ describe('knowledge', () => {
     });
 
     it('searches steps with filters', async () => {
-      // Arrange
       const mockMatches = [
-        { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'send' },
+        {
+          timestamp: '2026-02-04T10:00:00Z',
+          tool: 'click',
+          screen: 'send',
+          snippet: 'Confirmed transaction',
+        },
       ];
-      mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches);
       const filters = {
         flowTag: 'send',
         tag: 'transaction',
         screen: 'send',
       };
+      vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue(
+        mockMatches,
+      );
 
-      // Act
-      const result = await handleKnowledgeSearch({
-        query: 'confirm',
-        limit: 10,
-        filters,
-      });
+      const result = await knowledgeSearchTool(
+        { query: 'confirm', limit: 10, filters },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.matches).toStrictEqual(mockMatches);
       }
-      expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith(
         'confirm',
         10,
         'all',
@@ -302,13 +324,13 @@ describe('knowledge', () => {
     });
 
     it('returns empty array when no matches found', async () => {
-      // Arrange
-      mockKnowledgeStore.searchSteps.mockResolvedValue([]);
+      vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue([]);
 
-      // Act
-      const result = await handleKnowledgeSearch({ query: 'nonexistent' });
+      const result = await knowledgeSearchTool(
+        { query: 'nonexistent' },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.matches).toStrictEqual([]);
@@ -317,15 +339,12 @@ describe('knowledge', () => {
     });
 
     it('returns error when search fails', async () => {
-      // Arrange
-      mockKnowledgeStore.searchSteps.mockRejectedValue(
+      vi.mocked(context.knowledgeStore.searchSteps).mockRejectedValue(
         new Error('Search index corrupted'),
       );
 
-      // Act
-      const result = await handleKnowledgeSearch({ query: 'test' });
+      const result = await knowledgeSearchTool({ query: 'test' }, context);
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR);
@@ -335,109 +354,106 @@ describe('knowledge', () => {
     });
   });
 
-  describe('handleKnowledgeSummarize', () => {
+  describe('knowledgeSummarizeTool', () => {
     it('summarizes current session by default', async () => {
-      // Arrange
       const mockSummary = {
         sessionId: 'test-session-123',
         stepCount: 5,
         recipe: [
-          { stepNumber: 1, tool: 'mm_click', notes: 'Clicked send button' },
-          { stepNumber: 2, tool: 'mm_type', notes: 'Entered amount' },
+          { stepNumber: 1, tool: 'click', notes: 'Clicked send button' },
+          { stepNumber: 2, tool: 'type', notes: 'Entered amount' },
         ],
       };
-      mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary);
+      vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue(
+        mockSummary,
+      );
 
-      // Act
-      const result = await handleKnowledgeSummarize({});
+      const result = await knowledgeSummarizeTool({}, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result).toStrictEqual(mockSummary);
       }
-      expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith(
         'test-session-123',
       );
     });
 
     it('summarizes current session with scope "current"', async () => {
-      // Arrange
       const mockSummary = {
         sessionId: 'test-session-123',
         stepCount: 3,
         recipe: [],
       };
-      mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary);
+      vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue(
+        mockSummary,
+      );
 
-      // Act
-      const result = await handleKnowledgeSummarize({ scope: 'current' });
+      const result = await knowledgeSummarizeTool(
+        { scope: 'current' },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result).toStrictEqual(mockSummary);
       }
-      expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith(
         'test-session-123',
       );
     });
 
     it('summarizes specific session by sessionId', async () => {
-      // Arrange
       const mockSummary = {
         sessionId: 'other-session-456',
         stepCount: 10,
-        recipe: [
-          { stepNumber: 1, tool: 'mm_launch', notes: 'Launched browser' },
-        ],
+        recipe: [{ stepNumber: 1, tool: 'launch', notes: 'Launched browser' }],
       };
-      mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary);
+      vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue(
+        mockSummary,
+      );
 
-      // Act
-      const result = await handleKnowledgeSummarize({
-        sessionId: 'other-session-456',
-      });
+      const result = await knowledgeSummarizeTool(
+        { sessionId: 'other-session-456' },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result).toStrictEqual(mockSummary);
       }
-      expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith(
         'other-session-456',
       );
     });
 
     it('summarizes session with scope object containing sessionId', async () => {
-      // Arrange
       const mockSummary = {
         sessionId: 'scoped-session-789',
         stepCount: 7,
         recipe: [],
       };
-      mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary);
+      vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue(
+        mockSummary,
+      );
 
-      // Act
-      const result = await handleKnowledgeSummarize({
-        scope: { sessionId: 'scoped-session-789' },
-      });
+      const result = await knowledgeSummarizeTool(
+        { scope: { sessionId: 'scoped-session-789' } },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result).toStrictEqual(mockSummary);
       }
-      expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith(
         'scoped-session-789',
       );
     });
 
     it('returns error when scope is "all"', async () => {
-      // Act
-      const result = await handleKnowledgeSummarize({ scope: 'all' });
+      const result = await knowledgeSummarizeTool({ scope: 'all' }, context);
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
@@ -446,13 +462,10 @@ describe('knowledge', () => {
     });
 
     it('returns error when no sessionId can be determined', async () => {
-      // Arrange
-      vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined);
+      vi.mocked(context.sessionManager.getSessionId).mockReturnValue(undefined);
 
-      // Act
-      const result = await handleKnowledgeSummarize({});
+      const result = await knowledgeSummarizeTool({}, context);
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
@@ -461,17 +474,15 @@ describe('knowledge', () => {
     });
 
     it('returns error when summarize fails', async () => {
-      // Arrange
-      mockKnowledgeStore.summarizeSession.mockRejectedValue(
+      vi.mocked(context.knowledgeStore.summarizeSession).mockRejectedValue(
         new Error('Session not found'),
       );
 
-      // Act
-      const result = await handleKnowledgeSummarize({
-        sessionId: 'nonexistent-session',
-      });
+      const result = await knowledgeSummarizeTool(
+        { sessionId: 'nonexistent-session' },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR);
@@ -481,9 +492,8 @@ describe('knowledge', () => {
     });
   });
 
-  describe('handleKnowledgeSessions', () => {
+  describe('knowledgeSessionsTool', () => {
     it('lists sessions with default limit', async () => {
-      // Arrange
       const mockSessions = [
         {
           sessionId: 'session-1',
@@ -499,24 +509,23 @@ describe('knowledge', () => {
           tags: ['test'],
         },
       ];
-      mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions);
+      vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue(
+        mockSessions,
+      );
 
-      // Act
-      const result = await handleKnowledgeSessions({});
+      const result = await knowledgeSessionsTool({}, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.sessions).toStrictEqual(mockSessions);
       }
-      expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith(
-        10, // default limit
-        undefined, // no filters
+      expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith(
+        10,
+        undefined,
       );
     });
 
     it('lists sessions with custom limit', async () => {
-      // Arrange
       const mockSessions = [
         {
           sessionId: 'session-1',
@@ -525,24 +534,23 @@ describe('knowledge', () => {
           tags: [],
         },
       ];
-      mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions);
+      vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue(
+        mockSessions,
+      );
 
-      // Act
-      const result = await handleKnowledgeSessions({ limit: 25 });
+      const result = await knowledgeSessionsTool({ limit: 25 }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.sessions).toStrictEqual(mockSessions);
       }
-      expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith(
+      expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith(
         25,
         undefined,
       );
     });
 
     it('lists sessions with filters', async () => {
-      // Arrange
       const mockSessions = [
         {
           sessionId: 'session-1',
@@ -551,31 +559,34 @@ describe('knowledge', () => {
           tags: [],
         },
       ];
-      mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions);
       const filters = {
         flowTag: 'send',
         sinceHours: 48,
       };
+      vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue(
+        mockSessions,
+      );
 
-      // Act
-      const result = await handleKnowledgeSessions({ limit: 20, filters });
+      const result = await knowledgeSessionsTool(
+        { limit: 20, filters },
+        context,
+      );
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.sessions).toStrictEqual(mockSessions);
       }
-      expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith(20, filters);
+      expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith(
+        20,
+        filters,
+      );
     });
 
     it('returns empty array when no sessions found', async () => {
-      // Arrange
-      mockKnowledgeStore.listSessions.mockResolvedValue([]);
+      vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue([]);
 
-      // Act
-      const result = await handleKnowledgeSessions({ limit: 10 });
+      const result = await knowledgeSessionsTool({ limit: 10 }, context);
 
-      // Assert
       expect(result.ok).toBe(true);
       if (result.ok) {
         expect(result.result.sessions).toStrictEqual([]);
@@ -583,15 +594,12 @@ describe('knowledge', () => {
     });
 
     it('returns error when listing fails', async () => {
-      // Arrange
-      mockKnowledgeStore.listSessions.mockRejectedValue(
+      vi.mocked(context.knowledgeStore.listSessions).mockRejectedValue(
         new Error('Database unavailable'),
       );
 
-      // Act
-      const result = await handleKnowledgeSessions({});
+      const result = await knowledgeSessionsTool({}, context);
 
-      // Assert
       expect(result.ok).toBe(false);
       if (!result.ok) {
         expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR);
diff --git a/src/tools/knowledge.ts b/src/tools/knowledge.ts
new file mode 100644
index 0000000..27a3939
--- /dev/null
+++ b/src/tools/knowledge.ts
@@ -0,0 +1,164 @@
+import { extractErrorMessage } from '../utils';
+import type {
+  KnowledgeLastInput,
+  KnowledgeLastResult,
+  KnowledgeScope,
+  KnowledgeSearchInput,
+  KnowledgeSearchResult,
+  KnowledgeSessionsInput,
+  KnowledgeSessionsResult,
+  KnowledgeSummarizeInput,
+  KnowledgeSummarizeResult,
+} from './types';
+import { ErrorCodes } from './types';
+import { createToolError, createToolSuccess } from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Retrieves the most recent knowledge steps from the store.
+ *
+ * @param input - The step retrieval options including count and scope.
+ * @param context - The tool execution context.
+ * @returns The retrieved knowledge steps.
+ */
+export async function knowledgeLastTool(
+  input: KnowledgeLastInput,
+  context: ToolContext,
+): Promise<ToolResponse<KnowledgeLastResult>> {
+  const sessionId = context.sessionManager.getSessionId();
+  const nSteps = input.n ?? 20;
+  const scope: KnowledgeScope = input.scope ?? 'current';
+
+  try {
+    const steps = await context.knowledgeStore.getLastSteps(
+      nSteps,
+      scope,
+      sessionId,
+      input.filters,
+    );
+
+    return createToolSuccess({ steps });
+  } catch (error) {
+    return createToolError(
+      ErrorCodes.MM_KNOWLEDGE_ERROR,
+      `Failed to retrieve steps: ${extractErrorMessage(error)}`,
+    );
+  }
+}
+
+/**
+ * Searches knowledge steps by query string.
+ *
+ * @param input - The search query, limit, scope, and filters.
+ * @param context - The tool execution context.
+ * @returns The matching knowledge steps and query.
+ */
+export async function knowledgeSearchTool(
+  input: KnowledgeSearchInput,
+  context: ToolContext,
+): Promise<ToolResponse<KnowledgeSearchResult>> {
+  const sessionId = context.sessionManager.getSessionId();
+  const limit = input.limit ?? 20;
+  const scope: KnowledgeScope = input.scope ?? 'all';
+
+  try {
+    const matches = await context.knowledgeStore.searchSteps(
+      input.query,
+      limit,
+      scope,
+      sessionId,
+      input.filters,
+    );
+
+    return createToolSuccess({
+      matches,
+      query: input.query,
+    });
+  } catch (error) {
+    return createToolError(
+      ErrorCodes.MM_KNOWLEDGE_ERROR,
+      `Search failed: ${extractErrorMessage(error)}`,
+    );
+  }
+}
+
+/**
+ * Generates a summary of a knowledge session.
+ *
+ * @param input - The session ID or scope to summarize.
+ * @param context - The tool execution context.
+ * @returns The session summary.
+ */
+export async function knowledgeSummarizeTool(
+  input: KnowledgeSummarizeInput,
+  context: ToolContext,
+): Promise<ToolResponse<KnowledgeSummarizeResult>> {
+  const currentSessionId = context.sessionManager.getSessionId();
+
+  let targetSessionId: string | undefined;
+
+  if (input.sessionId) {
+    targetSessionId = input.sessionId;
+  } else if (input.scope) {
+    if (input.scope === 'all') {
+      return createToolError(
+        ErrorCodes.MM_INVALID_INPUT,
+        'Cannot summarize all sessions. Use scope="current" or provide a specific sessionId.',
+      );
+    }
+
+    if (input.scope === 'current') {
+      targetSessionId = currentSessionId;
+    } else if (typeof input.scope === 'object' && 'sessionId' in input.scope) {
+      targetSessionId = input.scope.sessionId;
+    }
+  } else {
+    targetSessionId = currentSessionId;
+  }
+
+  if (!targetSessionId) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'No sessionId provided and no active session',
+    );
+  }
+
+  try {
+    const summary =
+      await context.knowledgeStore.summarizeSession(targetSessionId);
+    return createToolSuccess(summary);
+  } catch (error) {
+    return createToolError(
+      ErrorCodes.MM_KNOWLEDGE_ERROR,
+      `Summarize failed: ${extractErrorMessage(error)}`,
+    );
+  }
+}
+
+/**
+ * Lists available knowledge sessions with optional filters.
+ *
+ * @param input - The listing options including limit and filters.
+ * @param context - The tool execution context.
+ * @returns The list of knowledge sessions.
+ */
+export async function knowledgeSessionsTool(
+  input: KnowledgeSessionsInput,
+  context: ToolContext,
+): Promise<ToolResponse<KnowledgeSessionsResult>> {
+  const limit = input.limit ?? 10;
+
+  try {
+    const sessions = await context.knowledgeStore.listSessions(
+      limit,
+      input.filters,
+    );
+
+    return createToolSuccess({ sessions });
+  } catch (error) {
+    return createToolError(
+      ErrorCodes.MM_KNOWLEDGE_ERROR,
+      `Failed to list sessions: ${extractErrorMessage(error)}`,
+    );
+  }
+}
diff --git a/src/tools/launch.test.ts b/src/tools/launch.test.ts
new file mode 100644
index 0000000..b198fa0
--- /dev/null
+++ b/src/tools/launch.test.ts
@@ -0,0 +1,295 @@
+/**
+ * Unit tests for launch tool handler.
+ *
+ * Tests session launch with various states and error scenarios.
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+import { launchTool } from './launch.js';
+import type { LaunchInput } from './types';
+import type { ExtensionState } from '../capabilities/types.js';
+import type { SessionLaunchResult } from '../server/session-manager.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    launchResult?: SessionLaunchResult;
+    environmentMode?: 'e2e' | 'prod';
+  } = {},
+): ToolContext {
+  return {
+    sessionManager: createMockSessionManager(options),
+    page: {} as ToolContext['page'],
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {
+      writeSessionMetadata: vi.fn().mockResolvedValue('test-session-123'),
+    },
+  } as unknown as ToolContext;
+}
+
+describe('launchTool', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe('successful launch', () => {
+    it('returns session info on successful launch', async () => {
+      const mockState: ExtensionState = {
+        isLoaded: true,
+        currentUrl: 'chrome-extension://ext-123/home.html',
+        extensionId: 'ext-123',
+        isUnlocked: false,
+        currentScreen: 'home',
+        accountAddress: null,
+        networkName: null,
+        chainId: null,
+        balance: null,
+      };
+
+      const mockLaunchResult: SessionLaunchResult = {
+        sessionId: 'test-session-123',
+        extensionId: 'ext-123',
+        state: mockState,
+      };
+
+      const context = createMockContext({
+        hasActive: false,
+        launchResult: mockLaunchResult,
+      });
+      const input: LaunchInput = { stateMode: 'default' };
+
+      const result = await launchTool(input, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.sessionId).toBe('test-session-123');
+        expect(result.result.extensionId).toBe('ext-123');
+        expect(result.result.state).toStrictEqual(mockState);
+      }
+      expect(context.sessionManager.launch).toHaveBeenCalledWith(input);
+    });
+
+    it('includes prerequisites in prod mode', async () => {
+      const mockState: ExtensionState = {
+        isLoaded: true,
+        currentUrl: 'chrome-extension://ext-456/home.html',
+        extensionId: 'ext-456',
+        isUnlocked: true,
+        currentScreen: 'home',
+        accountAddress: '0x1234',
+        networkName: 'Ethereum Mainnet',
+        chainId: 1,
+        balance: '10 ETH',
+      };
+
+      const mockLaunchResult: SessionLaunchResult = {
+        sessionId: 'prod-session-456',
+        extensionId: 'ext-456',
+        state: mockState,
+      };
+
+      const context = createMockContext({
+        hasActive: false,
+        launchResult: mockLaunchResult,
+        environmentMode: 'prod',
+      });
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.prerequisites).toBeDefined();
+        expect(result.result.prerequisites).toHaveLength(3);
+        expect(result.result.prerequisites?.[0].step).toBe('Unlock Wallet');
+        expect(result.result.prerequisites?.[1].step).toBe('Configure Network');
+        expect(result.result.prerequisites?.[2].step).toBe('Set Up Accounts');
+      }
+    });
+
+    it('does not include prerequisites in e2e mode', async () => {
+      const mockState: ExtensionState = {
+        isLoaded: true,
+        currentUrl: 'chrome-extension://ext-123/home.html',
+        extensionId: 'ext-123',
+        isUnlocked: false,
+        currentScreen: 'home',
+        accountAddress: null,
+        networkName: null,
+        chainId: null,
+        balance: null,
+      };
+
+      const mockLaunchResult: SessionLaunchResult = {
+        sessionId: 'e2e-session-789',
+        extensionId: 'ext-123',
+        state: mockState,
+      };
+
+      const context = createMockContext({
+        hasActive: false,
+        launchResult: mockLaunchResult,
+        environmentMode: 'e2e',
+      });
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.prerequisites).toBeUndefined();
+      }
+    });
+
+    it('passes through all launch input parameters', async () => {
+      const context = createMockContext({ hasActive: false });
+      const input: LaunchInput = {
+        stateMode: 'custom',
+        fixturePreset: 'test-preset',
+        autoBuild: false,
+        slowMo: 100,
+        goal: 'Test send flow',
+        flowTags: ['send', 'transaction'],
+        tags: ['smoke-test'],
+        seedContracts: ['hst', 'nfts'],
+        ports: {
+          anvil: 8546,
+          fixtureServer: 12346,
+        },
+      };
+
+      const result = await launchTool(input, context);
+
+      expect(result.ok).toBe(true);
+      expect(context.sessionManager.launch).toHaveBeenCalledWith(input);
+    });
+
+    it('calls setContext before launch when context is provided', async () => {
+      const context = createMockContext({ hasActive: false });
+      const input: LaunchInput = {
+        context: 'prod',
+        stateMode: 'onboarding',
+      };
+
+      await launchTool(input, context);
+
+      expect(context.sessionManager.setContext).toHaveBeenCalledWith('prod');
+      expect(context.sessionManager.launch).toHaveBeenCalledWith(input);
+      const setContextOrder = (
+        context.sessionManager.setContext as ReturnType<typeof vi.fn>
+      ).mock.invocationCallOrder[0];
+      const launchOrder = (
+        context.sessionManager.launch as ReturnType<typeof vi.fn>
+      ).mock.invocationCallOrder[0];
+      expect(setContextOrder).toBeLessThan(launchOrder);
+    });
+
+    it('does not call setContext when context is not provided', async () => {
+      const context = createMockContext({ hasActive: false });
+      const input: LaunchInput = { stateMode: 'default' };
+
+      await launchTool(input, context);
+
+      expect(context.sessionManager.setContext).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('session already running', () => {
+    it('returns error when session already active', async () => {
+      const context = createMockContext({ hasActive: true });
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_SESSION_ALREADY_RUNNING);
+        expect(result.error.message).toBe(
+          'A session is already running. Call cleanup first, or use --force.',
+        );
+      }
+      expect(context.sessionManager.launch).not.toHaveBeenCalled();
+    });
+
+    it('cleans up and relaunches when force is true', async () => {
+      const context = createMockContext({ hasActive: true });
+      vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(true);
+
+      const result = await launchTool(
+        { stateMode: 'default', force: true },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(context.sessionManager.cleanup).toHaveBeenCalled();
+      expect(context.sessionManager.launch).toHaveBeenCalled();
+    });
+  });
+
+  describe('launch failures', () => {
+    it('returns port conflict error for EADDRINUSE', async () => {
+      const context = createMockContext({ hasActive: false });
+      vi.spyOn(context.sessionManager, 'launch').mockRejectedValue(
+        new Error('listen EADDRINUSE: address already in use :::8545'),
+      );
+
+      const input: LaunchInput = { stateMode: 'default' };
+      const result = await launchTool(input, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE);
+        expect(result.error.message).toContain('Port conflict');
+        expect(result.error.message).toContain('EADDRINUSE');
+      }
+    });
+
+    it('returns port conflict error for port keyword in message', async () => {
+      const context = createMockContext({ hasActive: false });
+      vi.spyOn(context.sessionManager, 'launch').mockRejectedValue(
+        new Error('port 8545 is already in use'),
+      );
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE);
+        expect(result.error.message).toContain('Port conflict');
+      }
+    });
+
+    it('returns generic launch failed error for other errors', async () => {
+      const context = createMockContext({ hasActive: false });
+      vi.spyOn(context.sessionManager, 'launch').mockRejectedValue(
+        new Error('Browser failed to start'),
+      );
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED);
+        expect(result.error.message).toContain('Launch failed');
+        expect(result.error.message).toContain('Browser failed to start');
+      }
+    });
+
+    it('handles non-Error exceptions', async () => {
+      const context = createMockContext({ hasActive: false });
+      vi.spyOn(context.sessionManager, 'launch').mockRejectedValue(
+        'string error',
+      );
+
+      const result = await launchTool({ stateMode: 'default' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED);
+        expect(result.error.message).toContain('Launch failed');
+      }
+    });
+  });
+});
diff --git a/src/tools/launch.ts b/src/tools/launch.ts
new file mode 100644
index 0000000..16a7bc2
--- /dev/null
+++ b/src/tools/launch.ts
@@ -0,0 +1,76 @@
+import type { LaunchInput, LaunchPrerequisite, LaunchResult } from './types';
+import { ErrorCodes } from './types';
+import { createToolError, createToolSuccess } from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+import { extractErrorMessage } from '../utils';
+
+const PROD_MODE_PREREQUISITES: LaunchPrerequisite[] = [
+  {
+    step: 'Unlock Wallet',
+    description:
+      'The wallet must be unlocked before interacting with it. Use the extension UI to enter your password.',
+  },
+  {
+    step: 'Configure Network',
+    description:
+      'Ensure the correct network is selected (e.g., Ethereum Mainnet, Sepolia, or custom network).',
+  },
+  {
+    step: 'Set Up Accounts',
+    description:
+      'Import or create accounts as needed. Ensure the active account has sufficient funds for transactions.',
+  },
+];
+
+/**
+ * Launches a new browser session with the configured extension.
+ *
+ * @param input - The launch configuration options.
+ * @param context - The tool execution context.
+ * @returns The launch result with session details and prerequisites.
+ */
+export async function launchTool(
+  input: LaunchInput,
+  context: ToolContext,
+): Promise<ToolResponse<LaunchResult>> {
+  const { sessionManager } = context;
+
+  try {
+    if (sessionManager.hasActiveSession()) {
+      if (input.force) {
+        await sessionManager.cleanup();
+      } else {
+        return createToolError(
+          ErrorCodes.MM_SESSION_ALREADY_RUNNING,
+          'A session is already running. Call cleanup first, or use --force.',
+        );
+      }
+    }
+
+    if (input.context) {
+      sessionManager.setContext(input.context);
+    }
+
+    const result = await sessionManager.launch(input);
+    const isProdMode = sessionManager.getEnvironmentMode() === 'prod';
+
+    return createToolSuccess({
+      ...result,
+      ...(isProdMode && { prerequisites: PROD_MODE_PREREQUISITES }),
+    });
+  } catch (error) {
+    const message = extractErrorMessage(error);
+
+    if (message.includes('EADDRINUSE') || message.includes('port')) {
+      return createToolError(
+        ErrorCodes.MM_PORT_IN_USE,
+        `Port conflict: ${message}`,
+      );
+    }
+
+    return createToolError(
+      ErrorCodes.MM_LAUNCH_FAILED,
+      `Launch failed: ${message}`,
+    );
+  }
+}
diff --git a/src/tools/navigation.test.ts b/src/tools/navigation.test.ts
new file mode 100644
index 0000000..c3ef76b
--- /dev/null
+++ b/src/tools/navigation.test.ts
@@ -0,0 +1,471 @@
+/**
+ * Unit tests for navigation tool handlers.
+ *
+ * Tests handleNavigate, handleWaitForNotification, handleSwitchToTab, and handleCloseTab
+ * with various navigation targets, tab operations, and error scenarios.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import {
+  navigateTool,
+  waitForNotificationTool,
+  switchToTabTool,
+  closeTabTool,
+} from './navigation.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockPage(url = 'about:blank') {
+  return {
+    url: vi.fn().mockReturnValue(url),
+    bringToFront: vi.fn().mockResolvedValue(undefined),
+    close: vi.fn().mockResolvedValue(undefined),
+  };
+}
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    page?: ReturnType<typeof createMockPage>;
+    trackedPages?: { page: unknown; role: string; url: string }[];
+  } = {},
+): ToolContext {
+  const page = options.page ?? createMockPage();
+  const sessionManager = createMockSessionManager({
+    hasActive: options.hasActive ?? true,
+    trackedPages: options.trackedPages as never,
+  });
+
+  return {
+    sessionManager,
+    page: page as never,
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('navigation', () => {
+  describe('navigateTool', () => {
+    it('navigates to home screen', async () => {
+      const page = createMockPage('chrome-extension://ext-123/home.html');
+      const context = createMockContext({ page });
+
+      const result = await navigateTool({ screen: 'home' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.navigated).toBe(true);
+        expect(result.result.currentUrl).toBe(
+          'chrome-extension://ext-123/home.html',
+        );
+      }
+      expect(context.sessionManager.navigateToHome).toHaveBeenCalled();
+    });
+
+    it('navigates to settings screen', async () => {
+      const page = createMockPage('chrome-extension://ext-123/settings.html');
+      const context = createMockContext({ page });
+
+      const result = await navigateTool({ screen: 'settings' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.currentUrl).toBe(
+          'chrome-extension://ext-123/settings.html',
+        );
+      }
+      expect(context.sessionManager.navigateToSettings).toHaveBeenCalled();
+    });
+
+    it('navigates to notification screen', async () => {
+      const page = createMockPage(
+        'chrome-extension://ext-123/notification.html',
+      );
+      const context = createMockContext({ page });
+
+      const result = await navigateTool({ screen: 'notification' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.currentUrl).toBe(
+          'chrome-extension://ext-123/notification.html',
+        );
+      }
+      expect(context.sessionManager.navigateToNotification).toHaveBeenCalled();
+    });
+
+    it('navigates to a custom URL', async () => {
+      const page = createMockPage('https://app.uniswap.org');
+      const context = createMockContext({ page });
+      vi.spyOn(context.sessionManager, 'navigateToUrl').mockResolvedValue(
+        page as never,
+      );
+
+      const result = await navigateTool(
+        { screen: 'url', url: 'https://app.uniswap.org' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.currentUrl).toBe('https://app.uniswap.org');
+      }
+      expect(context.sessionManager.navigateToUrl).toHaveBeenCalledWith(
+        'https://app.uniswap.org',
+      );
+    });
+
+    it('returns error when URL is missing', async () => {
+      const context = createMockContext();
+
+      const result = await navigateTool({ screen: 'url' } as never, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('url is required');
+      }
+    });
+
+    it('returns error for unknown screen', async () => {
+      const context = createMockContext();
+
+      const result = await navigateTool(
+        { screen: 'invalid' } as never,
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+        expect(result.error.message).toContain('Unknown screen');
+      }
+    });
+
+    it('classifies navigation failures', async () => {
+      const context = createMockContext();
+      vi.spyOn(context.sessionManager, 'navigateToHome').mockRejectedValue(
+        new Error('Navigation failed'),
+      );
+
+      const result = await navigateTool({ screen: 'home' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED);
+      }
+    });
+
+    it('returns no active session error when session is missing', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await navigateTool({ screen: 'home' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+
+  describe('waitForNotificationTool', () => {
+    it('waits for notification popup with default timeout', async () => {
+      const notificationPage = createMockPage(
+        'chrome-extension://ext-123/notification.html',
+      );
+      const context = createMockContext();
+      vi.spyOn(
+        context.sessionManager,
+        'waitForNotificationPage',
+      ).mockResolvedValue(notificationPage as never);
+
+      const result = await waitForNotificationTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.found).toBe(true);
+        expect(result.result.pageUrl).toBe(
+          'chrome-extension://ext-123/notification.html',
+        );
+      }
+      expect(
+        context.sessionManager.waitForNotificationPage,
+      ).toHaveBeenCalledWith(15000);
+    });
+
+    it('uses custom timeout value', async () => {
+      const notificationPage = createMockPage(
+        'chrome-extension://ext-123/notification.html',
+      );
+      const context = createMockContext();
+      vi.spyOn(
+        context.sessionManager,
+        'waitForNotificationPage',
+      ).mockResolvedValue(notificationPage as never);
+
+      const result = await waitForNotificationTool(
+        { timeoutMs: 30000 },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(
+        context.sessionManager.waitForNotificationPage,
+      ).toHaveBeenCalledWith(30000);
+    });
+
+    it('classifies notification timeout errors', async () => {
+      const context = createMockContext();
+      vi.spyOn(
+        context.sessionManager,
+        'waitForNotificationPage',
+      ).mockRejectedValue(new Error('Timeout 15000ms exceeded'));
+
+      const result = await waitForNotificationTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT);
+      }
+    });
+
+    it('returns no active session error when session is missing', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await waitForNotificationTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+  });
+
+  describe('switchToTabTool', () => {
+    it('switches to tab by role', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org');
+      const context = createMockContext({
+        page: extensionPage,
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' },
+        ],
+      });
+
+      const result = await switchToTabTool({ role: 'dapp' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.switched).toBe(true);
+        expect(result.result.activeTab.role).toBe('dapp');
+        expect(result.result.activeTab.url).toBe('https://app.uniswap.org');
+      }
+      expect(dappPage.bringToFront).toHaveBeenCalled();
+      expect(context.sessionManager.setActivePage).toHaveBeenCalledWith(
+        dappPage,
+      );
+    });
+
+    it('switches to tab by URL prefix', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org/swap');
+      const context = createMockContext({
+        page: extensionPage,
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          {
+            page: dappPage,
+            role: 'dapp',
+            url: 'https://app.uniswap.org/swap',
+          },
+        ],
+      });
+
+      const result = await switchToTabTool(
+        { url: 'https://app.uniswap.org' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.activeTab.url).toBe(
+          'https://app.uniswap.org/swap',
+        );
+      }
+      expect(dappPage.bringToFront).toHaveBeenCalled();
+    });
+
+    it('returns invalid input when neither role nor url is provided', async () => {
+      const context = createMockContext();
+
+      const result = await switchToTabTool({} as never, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+      }
+    });
+
+    it('returns tab not found when no matching tab exists', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const context = createMockContext({
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+        ],
+      });
+
+      const result = await switchToTabTool({ role: 'dapp' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
+        expect(result.error.message).toContain('No tab found matching: dapp');
+      }
+    });
+  });
+
+  describe('closeTabTool', () => {
+    it('closes tab by role', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org');
+      const context = createMockContext({
+        page: extensionPage,
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' },
+        ],
+      });
+
+      const result = await closeTabTool({ role: 'dapp' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.closed).toBe(true);
+        expect(result.result.closedUrl).toBe('https://app.uniswap.org');
+      }
+      expect(dappPage.close).toHaveBeenCalled();
+    });
+
+    it('closes tab by URL prefix', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org/swap');
+      const context = createMockContext({
+        page: extensionPage,
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          {
+            page: dappPage,
+            role: 'dapp',
+            url: 'https://app.uniswap.org/swap',
+          },
+        ],
+      });
+
+      const result = await closeTabTool(
+        { url: 'https://app.uniswap.org' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.closedUrl).toBe('https://app.uniswap.org/swap');
+      }
+      expect(dappPage.close).toHaveBeenCalled();
+    });
+
+    it('switches to extension tab when closing the active tab', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org');
+      const context = createMockContext({
+        page: dappPage,
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' },
+        ],
+      });
+
+      const result = await closeTabTool({ role: 'dapp' }, context);
+
+      expect(result.ok).toBe(true);
+      expect(extensionPage.bringToFront).toHaveBeenCalled();
+      expect(context.sessionManager.setActivePage).toHaveBeenCalledWith(
+        extensionPage,
+      );
+      expect(dappPage.close).toHaveBeenCalled();
+    });
+
+    it('returns invalid input when neither role nor url is provided', async () => {
+      const context = createMockContext();
+
+      const result = await closeTabTool({} as never, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT);
+      }
+    });
+
+    it('returns tab not found when no matching tab exists', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const context = createMockContext({
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+        ],
+      });
+
+      const result = await closeTabTool({ role: 'dapp' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND);
+        expect(result.error.message).toContain('No tab found matching: dapp');
+      }
+    });
+  });
+});
diff --git a/src/tools/navigation.ts b/src/tools/navigation.ts
new file mode 100644
index 0000000..5844a5f
--- /dev/null
+++ b/src/tools/navigation.ts
@@ -0,0 +1,247 @@
+import {
+  classifyNavigationError,
+  classifyNotificationError,
+  classifyTabError,
+} from './error-classification.js';
+import type {
+  CloseTabInput,
+  CloseTabResult,
+  NavigateInput,
+  NavigateResult,
+  SwitchToTabInput,
+  SwitchToTabResult,
+  WaitForNotificationInput,
+  WaitForNotificationResult,
+} from './types';
+import { ErrorCodes } from './types';
+import { DEFAULT_INTERACTION_TIMEOUT_MS } from './utils/constants.js';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Navigates the browser to a specified screen or URL.
+ *
+ * @param input - The navigation target screen and optional URL.
+ * @param context - The tool execution context.
+ * @returns The navigation result with the current URL.
+ */
+export async function navigateTool(
+  input: NavigateInput,
+  context: ToolContext,
+): Promise<ToolResponse<NavigateResult>> {
+  const missingSession = requireActiveSession<NavigateResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  if (input.screen === 'url' && !input.url) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'url is required when screen is "url"',
+    );
+  }
+
+  const validScreens = ['home', 'settings', 'url', 'notification'];
+  if (!validScreens.includes(input.screen)) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      `Unknown screen: ${String(input.screen)}`,
+    );
+  }
+
+  try {
+    switch (input.screen) {
+      case 'home':
+        await context.sessionManager.navigateToHome();
+        break;
+      case 'settings':
+        await context.sessionManager.navigateToSettings();
+        break;
+      case 'url':
+        await context.sessionManager.navigateToUrl(input.url as string);
+        break;
+      case 'notification':
+        await context.sessionManager.navigateToNotification();
+        break;
+      default:
+        throw new Error(`Unsupported screen: ${String(input.screen)}`);
+    }
+
+    return createToolSuccess({
+      navigated: true,
+      currentUrl: context.page.url(),
+    });
+  } catch (error) {
+    const errorInfo = classifyNavigationError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Waits for a notification page to appear within a timeout.
+ *
+ * @param input - The notification wait options including timeout.
+ * @param context - The tool execution context.
+ * @returns The notification page URL when found.
+ */
+export async function waitForNotificationTool(
+  input: WaitForNotificationInput,
+  context: ToolContext,
+): Promise<ToolResponse<WaitForNotificationResult>> {
+  const missingSession =
+    requireActiveSession<WaitForNotificationResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS;
+
+  try {
+    const notificationPage =
+      await context.sessionManager.waitForNotificationPage(timeoutMs);
+
+    return createToolSuccess({
+      found: true,
+      pageUrl: notificationPage.url(),
+    });
+  } catch (error) {
+    const errorInfo = classifyNotificationError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Switches the active page to a tab matching the given role or URL.
+ *
+ * @param input - The tab selection criteria (role or URL).
+ * @param context - The tool execution context.
+ * @returns The active tab info after switching.
+ */
+export async function switchToTabTool(
+  input: SwitchToTabInput,
+  context: ToolContext,
+): Promise<ToolResponse<SwitchToTabResult>> {
+  const missingSession = requireActiveSession<SwitchToTabResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  if (!input.role && !input.url) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Either role or url must be provided',
+    );
+  }
+
+  try {
+    const trackedPages = context.sessionManager.getTrackedPages();
+    const targetPage = trackedPages.find((trackedPage) => {
+      if (input.role) {
+        return trackedPage.role === input.role;
+      }
+      if (input.url) {
+        return trackedPage.url.startsWith(input.url);
+      }
+      return false;
+    });
+
+    if (!targetPage) {
+      const availableTabs = trackedPages.map((trackedPage) => ({
+        role: trackedPage.role,
+        url: trackedPage.url,
+      }));
+      throw new Error(
+        `No tab found matching: ${input.role ?? input.url}. Available tabs: ${JSON.stringify(availableTabs)}`,
+      );
+    }
+
+    await targetPage.page.bringToFront();
+    context.sessionManager.setActivePage(targetPage.page);
+
+    const activeTabInfo = context.sessionManager
+      .getTrackedPages()
+      .find((trackedPage) => trackedPage.page === targetPage.page);
+
+    return createToolSuccess({
+      switched: true,
+      activeTab: {
+        role: activeTabInfo?.role ?? 'other',
+        url: targetPage.page.url(),
+      },
+    });
+  } catch (error) {
+    const errorInfo = classifyTabError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Closes a browser tab matching the given role or URL.
+ *
+ * @param input - The tab selection criteria (role or URL).
+ * @param context - The tool execution context.
+ * @returns The close result with the closed tab URL.
+ */
+export async function closeTabTool(
+  input: CloseTabInput,
+  context: ToolContext,
+): Promise<ToolResponse<CloseTabResult>> {
+  const missingSession = requireActiveSession<CloseTabResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  if (!input.role && !input.url) {
+    return createToolError(
+      ErrorCodes.MM_INVALID_INPUT,
+      'Either role or url must be provided',
+    );
+  }
+
+  try {
+    const trackedPages = context.sessionManager.getTrackedPages();
+    const targetPage = trackedPages.find((trackedPage) => {
+      if (input.role) {
+        return trackedPage.role === input.role;
+      }
+      if (input.url) {
+        return trackedPage.url.startsWith(input.url);
+      }
+      return false;
+    });
+
+    if (!targetPage) {
+      throw new Error(`No tab found matching: ${input.role ?? input.url}`);
+    }
+
+    const closedUrl = targetPage.url;
+
+    if (targetPage.page === context.page) {
+      const otherPages = trackedPages.filter(
+        (trackedPage) => trackedPage.page !== targetPage.page,
+      );
+      const fallbackPage =
+        otherPages.find((trackedPage) => trackedPage.role === 'extension') ??
+        otherPages[0];
+
+      if (fallbackPage) {
+        await fallbackPage.page.bringToFront();
+        context.sessionManager.setActivePage(fallbackPage.page);
+      }
+    }
+
+    await targetPage.page.close();
+
+    return createToolSuccess({
+      closed: true,
+      closedUrl,
+    });
+  } catch (error) {
+    const errorInfo = classifyTabError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/tools/registry.test.ts b/src/tools/registry.test.ts
new file mode 100644
index 0000000..98f54ff
--- /dev/null
+++ b/src/tools/registry.test.ts
@@ -0,0 +1,82 @@
+import { describe, expect, it } from 'vitest';
+
+import { toolRegistry, TOOL_CATEGORIES, getToolCategory } from './registry.js';
+
+describe('toolRegistry', () => {
+  it('has expected tool entries', () => {
+    const expectedTools = [
+      'build',
+      'launch',
+      'cleanup',
+      'click',
+      'type',
+      'navigate',
+      'screenshot',
+      'describe_screen',
+      'clipboard',
+      'run_steps',
+    ];
+
+    for (const toolName of expectedTools) {
+      expect(toolRegistry.has(toolName)).toBe(true);
+    }
+  });
+
+  it('returns a function for launch', () => {
+    expect(typeof toolRegistry.get('launch')).toBe('function');
+  });
+
+  it('returns undefined for a nonexistent tool', () => {
+    expect(toolRegistry.get('nonexistent')).toBeUndefined();
+  });
+
+  it('has the expected number of entries', () => {
+    expect(toolRegistry.size).toBe(28);
+  });
+
+  it('stores only functions as values', () => {
+    for (const handler of toolRegistry.values()) {
+      expect(typeof handler).toBe('function');
+    }
+  });
+
+  it('uses unprefixed keys', () => {
+    for (const key of toolRegistry.keys()) {
+      expect(key.startsWith('mm_')).toBe(false);
+    }
+  });
+});
+
+describe('TOOL_CATEGORIES and getToolCategory', () => {
+  it('every key in toolRegistry exists in TOOL_CATEGORIES', () => {
+    for (const key of toolRegistry.keys()) {
+      expect(TOOL_CATEGORIES).toHaveProperty(key);
+    }
+  });
+
+  it('every key in TOOL_CATEGORIES exists in toolRegistry', () => {
+    for (const key of Object.keys(TOOL_CATEGORIES)) {
+      expect(toolRegistry.has(key)).toBe(true);
+    }
+  });
+
+  it('getToolCategory returns mutating for nonexistent tool', () => {
+    expect(getToolCategory('nonexistent_tool')).toBe('mutating');
+  });
+
+  it('getToolCategory returns mutating for click', () => {
+    expect(getToolCategory('click')).toBe('mutating');
+  });
+
+  it('getToolCategory returns readonly for knowledge_last', () => {
+    expect(getToolCategory('knowledge_last')).toBe('readonly');
+  });
+
+  it('getToolCategory returns discovery for describe_screen', () => {
+    expect(getToolCategory('describe_screen')).toBe('discovery');
+  });
+
+  it('getToolCategory returns batch for run_steps', () => {
+    expect(getToolCategory('run_steps')).toBe('batch');
+  });
+});
diff --git a/src/tools/registry.ts b/src/tools/registry.ts
new file mode 100644
index 0000000..0df43dd
--- /dev/null
+++ b/src/tools/registry.ts
@@ -0,0 +1,125 @@
+import { runStepsTool } from './batch.js';
+import { buildTool } from './build.js';
+import { cleanupTool } from './cleanup.js';
+import { clipboardTool } from './clipboard.js';
+import { getContextTool, setContextTool } from './context.js';
+import {
+  accessibilitySnapshotTool,
+  describeScreenTool,
+  listTestIdsTool,
+} from './discovery-tools.js';
+import {
+  clickTool,
+  getTextTool,
+  typeTool,
+  waitForTool,
+} from './interaction.js';
+import {
+  knowledgeLastTool,
+  knowledgeSearchTool,
+  knowledgeSessionsTool,
+  knowledgeSummarizeTool,
+} from './knowledge.js';
+import { launchTool } from './launch.js';
+import {
+  closeTabTool,
+  navigateTool,
+  switchToTabTool,
+  waitForNotificationTool,
+} from './navigation.js';
+import { screenshotTool } from './screenshot.js';
+import {
+  getContractAddressTool,
+  listContractsTool,
+  seedContractTool,
+  seedContractsTool,
+} from './seeding.js';
+import { getStateTool } from './state.js';
+import type { ToolFunction } from '../types/http.js';
+
+// holds tools with heterogeneous parameter types. TypeScript's contravariant
+// function parameters prevent assigning ToolFunction<SpecificInput, ...> to
+// ToolFunction<unknown, ...>, so `any` is the standard pattern for type-erased
+// function maps. Input safety is enforced at the Zod validation boundary.
+export const toolRegistry = new Map<string, ToolFunction<any, any>>([
+  ['build', buildTool],
+  ['launch', launchTool],
+  ['cleanup', cleanupTool],
+  ['get_state', getStateTool],
+  ['navigate', navigateTool],
+  ['wait_for_notification', waitForNotificationTool],
+  ['switch_to_tab', switchToTabTool],
+  ['close_tab', closeTabTool],
+  ['list_testids', listTestIdsTool],
+  ['accessibility_snapshot', accessibilitySnapshotTool],
+  ['describe_screen', describeScreenTool],
+  ['screenshot', screenshotTool],
+  ['click', clickTool],
+  ['type', typeTool],
+  ['wait_for', waitForTool],
+  ['get_text', getTextTool],
+  ['knowledge_last', knowledgeLastTool],
+  ['knowledge_search', knowledgeSearchTool],
+  ['knowledge_summarize', knowledgeSummarizeTool],
+  ['knowledge_sessions', knowledgeSessionsTool],
+  ['seed_contract', seedContractTool],
+  ['seed_contracts', seedContractsTool],
+  ['get_contract_address', getContractAddressTool],
+  ['list_contracts', listContractsTool],
+  ['run_steps', runStepsTool],
+  ['set_context', setContextTool],
+  ['get_context', getContextTool],
+  ['clipboard', clipboardTool],
+]);
+
+export type ToolCategory = 'mutating' | 'readonly' | 'discovery' | 'batch';
+
+export const TOOL_CATEGORIES: Record<string, ToolCategory> = {
+  // MUTATING (13)
+  click: 'mutating',
+  type: 'mutating',
+  navigate: 'mutating',
+  launch: 'mutating',
+  cleanup: 'mutating',
+  switch_to_tab: 'mutating',
+  close_tab: 'mutating',
+  clipboard: 'mutating',
+  build: 'mutating',
+  wait_for: 'mutating',
+  wait_for_notification: 'mutating',
+  seed_contract: 'mutating',
+  seed_contracts: 'mutating',
+  // READONLY (9)
+  knowledge_last: 'readonly',
+  knowledge_search: 'readonly',
+  knowledge_summarize: 'readonly',
+  knowledge_sessions: 'readonly',
+  get_text: 'readonly',
+  get_state: 'readonly',
+  get_context: 'readonly',
+  // set_context is blocked while a session is active (MM_CONTEXT_SWITCH_BLOCKED),
+  // so Playwright observations would never be collected. Classified as readonly
+  // since it never runs in a state where page observations are meaningful.
+  set_context: 'readonly',
+  list_contracts: 'readonly',
+  get_contract_address: 'readonly',
+  // DISCOVERY (4)
+  describe_screen: 'discovery',
+  list_testids: 'discovery',
+  accessibility_snapshot: 'discovery',
+  screenshot: 'discovery',
+  // BATCH (1)
+  run_steps: 'batch',
+};
+
+/**
+ * Returns the category for a registered tool name.
+ * Unknown tools default to 'mutating' — the safe default that ensures
+ * new tools get observations until explicitly categorized.
+ *
+ * @param toolName - The registered tool name to look up.
+ * @returns The tool's category, or 'mutating' for unknown tools.
+ */
+export function getToolCategory(toolName: string): ToolCategory {
+  return TOOL_CATEGORIES[toolName] ?? 'mutating';
+}
diff --git a/src/tools/screenshot.test.ts b/src/tools/screenshot.test.ts
new file mode 100644
index 0000000..1b2ee2e
--- /dev/null
+++ b/src/tools/screenshot.test.ts
@@ -0,0 +1,261 @@
+/**
+ * Unit tests for screenshot tool handler.
+ *
+ * Tests screenshotTool with various options including base64 encoding,
+ * selector scoping, and error handling.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import { screenshotTool } from './screenshot.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+  } = {},
+): ToolContext {
+  const { hasActive = true } = options;
+
+  return {
+    sessionManager: createMockSessionManager({ hasActive }),
+    page: {} as ToolContext['page'],
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext;
+}
+
+describe('screenshotTool', () => {
+  describe('basic screenshot', () => {
+    it('captures full page screenshot by default', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/screenshot.png',
+        width: 1280,
+        height: 720,
+        base64: 'mock-base64',
+      });
+
+      const result = await screenshotTool({ name: 'test-screenshot' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.path).toBe('/path/to/screenshot.png');
+        expect(result.result.width).toBe(1280);
+        expect(result.result.height).toBe(720);
+        expect(result.result.base64).toBeUndefined();
+      }
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
+        name: 'test-screenshot',
+        fullPage: true,
+        selector: undefined,
+      });
+    });
+
+    it('captures viewport-only screenshot when fullPage is false', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/screenshot.png',
+        width: 1280,
+        height: 720,
+        base64: 'mock-base64',
+      });
+
+      const result = await screenshotTool(
+        {
+          name: 'viewport-screenshot',
+          fullPage: false,
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
+        name: 'viewport-screenshot',
+        fullPage: false,
+        selector: undefined,
+      });
+    });
+  });
+
+  describe('with base64 encoding', () => {
+    it('includes base64 when includeBase64 is true', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/screenshot.png',
+        width: 1280,
+        height: 720,
+        base64:
+          'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
+      });
+
+      const result = await screenshotTool(
+        {
+          name: 'base64-screenshot',
+          includeBase64: true,
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.base64).toBe(
+          'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
+        );
+      }
+    });
+
+    it('excludes base64 when includeBase64 is false', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/screenshot.png',
+        width: 1280,
+        height: 720,
+        base64:
+          'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
+      });
+
+      const result = await screenshotTool(
+        {
+          name: 'no-base64-screenshot',
+          includeBase64: false,
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.base64).toBeUndefined();
+      }
+    });
+  });
+
+  describe('with selector scoping', () => {
+    it('captures screenshot of specific element', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/element-screenshot.png',
+        width: 400,
+        height: 200,
+        base64: 'mock-base64',
+      });
+
+      const result = await screenshotTool(
+        {
+          name: 'element-screenshot',
+          selector: '[data-testid="account-menu"]',
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.width).toBe(400);
+        expect(result.result.height).toBe(200);
+      }
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
+        name: 'element-screenshot',
+        fullPage: true,
+        selector: '[data-testid="account-menu"]',
+      });
+    });
+
+    it('combines selector with fullPage false', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/element-screenshot.png',
+        width: 400,
+        height: 200,
+        base64: 'mock-base64',
+      });
+
+      const result = await screenshotTool(
+        {
+          name: 'element-viewport-screenshot',
+          selector: '.modal-content',
+          fullPage: false,
+        },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith({
+        name: 'element-viewport-screenshot',
+        fullPage: false,
+        selector: '.modal-content',
+      });
+    });
+  });
+
+  describe('error handling', () => {
+    it('generates default name when not provided', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({
+        path: '/path/to/screenshot.png',
+        width: 1280,
+        height: 720,
+      });
+
+      const result = await screenshotTool({}, context);
+
+      expect(result.ok).toBe(true);
+      expect(context.sessionManager.screenshot).toHaveBeenCalledWith(
+        expect.objectContaining({
+          name: expect.stringMatching(/^screenshot-\d+$/u),
+        }),
+      );
+    });
+
+    it('returns error when no active session', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await screenshotTool({ name: 'test-screenshot' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+
+    it('returns error when screenshot fails', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockRejectedValue(
+        new Error('Screenshot failed'),
+      );
+
+      const result = await screenshotTool({ name: 'test-screenshot' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_SCREENSHOT_FAILED);
+        expect(result.error.message).toContain('Screenshot failed');
+      }
+    });
+
+    it('returns error when page is closed', async () => {
+      const context = createMockContext();
+
+      vi.spyOn(context.sessionManager, 'screenshot').mockRejectedValue(
+        new Error('Target page, context or browser has been closed'),
+      );
+
+      const result = await screenshotTool({ name: 'test-screenshot' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED);
+      }
+    });
+  });
+});
diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts
new file mode 100644
index 0000000..5a842c4
--- /dev/null
+++ b/src/tools/screenshot.ts
@@ -0,0 +1,49 @@
+import { classifyScreenshotError } from './error-classification.js';
+import type { ScreenshotInput, ScreenshotToolResult } from './types';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Captures a screenshot of the current page.
+ *
+ * @param input - The screenshot options including name, selector, and base64 flag.
+ * @param context - The tool execution context.
+ * @returns The screenshot metadata and optional base64 data.
+ */
+export async function screenshotTool(
+  input: ScreenshotInput,
+  context: ToolContext,
+): Promise<ToolResponse<ScreenshotToolResult>> {
+  const missingSession = requireActiveSession<ScreenshotToolResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  try {
+    const screenshotName = input.name ?? `screenshot-${Date.now()}`;
+    const result = await context.sessionManager.screenshot({
+      name: screenshotName,
+      fullPage: input.fullPage ?? true,
+      selector: input.selector,
+    });
+
+    const response: ScreenshotToolResult = {
+      path: result.path,
+      width: result.width,
+      height: result.height,
+    };
+
+    if (input.includeBase64) {
+      response.base64 = result.base64;
+    }
+
+    return createToolSuccess(response);
+  } catch (error) {
+    const errorInfo = classifyScreenshotError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/tools/seeding.test.ts b/src/tools/seeding.test.ts
new file mode 100644
index 0000000..81738a1
--- /dev/null
+++ b/src/tools/seeding.test.ts
@@ -0,0 +1,382 @@
+/**
+ * Unit tests for seeding tool handlers.
+ *
+ * Tests contract deployment handlers including single/multiple contract deployment,
+ * address lookup, and contract listing with ContractSeedingCapability.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import {
+  seedContractTool,
+  seedContractsTool,
+  getContractAddressTool,
+  listContractsTool,
+} from './seeding.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types';
+import type { ContractSeedingCapability } from '../capabilities/types.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockSeedingCapability(): ContractSeedingCapability {
+  return {
+    deployContract: vi.fn(),
+    deployContracts: vi.fn(),
+    getContractAddress: vi.fn(),
+    listDeployedContracts: vi.fn(),
+    getAvailableContracts: vi.fn(),
+    clearRegistry: vi.fn(),
+    initialize: vi.fn(),
+  };
+}
+
+function createMockContext(
+  options: {
+    hasActive?: boolean;
+    workflowCapability?: ContractSeedingCapability;
+    sessionCapability?: ContractSeedingCapability;
+  } = {},
+): ToolContext {
+  const { hasActive = true, workflowCapability, sessionCapability } = options;
+
+  const sessionManager = createMockSessionManager({ hasActive });
+  sessionManager.getContractSeedingCapability.mockReturnValue(
+    sessionCapability,
+  );
+
+  return {
+    sessionManager,
+    page: {} as ToolContext['page'],
+    refMap: new Map(),
+    workflowContext: {
+      config: {
+        environment: 'e2e',
+        extensionName: 'MetaMask',
+      },
+      contractSeeding: workflowCapability,
+    },
+    knowledgeStore: {} as ToolContext['knowledgeStore'],
+    toolRegistry: new Map(),
+  } as unknown as ToolContext;
+}
+
+describe('seeding tools', () => {
+  describe('seedContractTool', () => {
+    it('deploys a single contract using workflowContext capability', async () => {
+      const deployedAt = new Date().toISOString();
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'deployContract').mockResolvedValue({
+        name: 'hst',
+        address: '0x1234567890123456789012345678901234567890',
+        deployedAt,
+      });
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await seedContractTool({ contractName: 'hst' }, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result).toStrictEqual({
+          contractName: 'hst',
+          contractAddress: '0x1234567890123456789012345678901234567890',
+          deployedAt,
+        });
+      }
+      expect(capability.deployContract).toHaveBeenCalledWith('hst', {
+        hardfork: undefined,
+        deployerOptions: undefined,
+      });
+      expect(
+        context.sessionManager.getContractSeedingCapability,
+      ).not.toHaveBeenCalled();
+    });
+
+    it('falls back to session manager capability when workflowContext lacks one', async () => {
+      const deployedAt = new Date().toISOString();
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'deployContract').mockResolvedValue({
+        name: 'nfts',
+        address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
+        deployedAt,
+      });
+      const context = createMockContext({ sessionCapability: capability });
+
+      const result = await seedContractTool(
+        { contractName: 'nfts', hardfork: 'shanghai' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      expect(capability.deployContract).toHaveBeenCalledWith('nfts', {
+        hardfork: 'shanghai',
+        deployerOptions: undefined,
+      });
+      expect(
+        context.sessionManager.getContractSeedingCapability,
+      ).toHaveBeenCalled();
+    });
+
+    it('returns contract not found errors from deployment failures', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'deployContract').mockRejectedValue(
+        new Error('Contract not found: unknown'),
+      );
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await seedContractTool({ contractName: 'hst' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CONTRACT_NOT_FOUND);
+        expect(result.error.message).toContain('Contract not found');
+      }
+    });
+
+    it('returns capability unavailable when no seeding capability exists', async () => {
+      const context = createMockContext();
+
+      const result = await seedContractTool({ contractName: 'hst' }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
+      }
+    });
+  });
+
+  describe('seedContractsTool', () => {
+    it('deploys multiple contracts and maps deployed and failed results', async () => {
+      const deployedAt = new Date().toISOString();
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'deployContracts').mockResolvedValue({
+        deployed: [
+          {
+            name: 'hst',
+            address: '0x1234567890123456789012345678901234567890',
+            deployedAt,
+          },
+        ],
+        failed: [
+          {
+            name: 'nfts',
+            error: 'Contract deployment failed',
+          },
+        ],
+      });
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await seedContractsTool(
+        { contracts: ['hst', 'nfts'] },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result).toStrictEqual({
+          deployed: [
+            {
+              contractName: 'hst',
+              contractAddress: '0x1234567890123456789012345678901234567890',
+              deployedAt,
+            },
+          ],
+          failed: [
+            {
+              contractName: 'nfts',
+              error: 'Contract deployment failed',
+            },
+          ],
+        });
+      }
+      expect(capability.deployContracts).toHaveBeenCalledWith(['hst', 'nfts'], {
+        hardfork: undefined,
+      });
+    });
+
+    it('returns seed failed errors for complete deployment failures', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'deployContracts').mockRejectedValue(
+        new Error('Anvil not running'),
+      );
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await seedContractsTool({ contracts: ['hst'] }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
+        expect(result.error.message).toContain('Anvil not running');
+      }
+    });
+
+    it('returns capability unavailable when no seeding capability exists', async () => {
+      const context = createMockContext();
+
+      const result = await seedContractsTool({ contracts: ['hst'] }, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
+      }
+    });
+  });
+
+  describe('getContractAddressTool', () => {
+    it('returns the contract address when found', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'getContractAddress').mockReturnValue(
+        '0x1234567890123456789012345678901234567890',
+      );
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await getContractAddressTool(
+        { contractName: 'hst' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result).toStrictEqual({
+          contractName: 'hst',
+          contractAddress: '0x1234567890123456789012345678901234567890',
+        });
+      }
+      expect(capability.getContractAddress).toHaveBeenCalledWith('hst');
+    });
+
+    it('returns null when the contract address is missing', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'getContractAddress').mockReturnValue(null);
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await getContractAddressTool(
+        { contractName: 'nfts' },
+        context,
+      );
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result).toStrictEqual({
+          contractName: 'nfts',
+          contractAddress: null,
+        });
+      }
+    });
+
+    it('returns error when getContractAddress throws', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'getContractAddress').mockImplementation(() => {
+        throw new Error('Connection lost');
+      });
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await getContractAddressTool(
+        { contractName: 'hst' },
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
+        expect(result.error.message).toContain('Connection lost');
+      }
+    });
+
+    it('returns capability unavailable when no seeding capability exists', async () => {
+      const context = createMockContext();
+
+      const result = await getContractAddressTool(
+        { contractName: 'hst' },
+        context,
+      );
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
+      }
+    });
+  });
+
+  describe('listContractsTool', () => {
+    it('returns the list of deployed contracts', async () => {
+      const deployedAt1 = new Date().toISOString();
+      const deployedAt2 = new Date(Date.now() + 1000).toISOString();
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'listDeployedContracts').mockReturnValue([
+        {
+          name: 'hst',
+          address: '0x1234567890123456789012345678901234567890',
+          deployedAt: deployedAt1,
+        },
+        {
+          name: 'nfts',
+          address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
+          deployedAt: deployedAt2,
+        },
+      ]);
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await listContractsTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result).toStrictEqual({
+          contracts: [
+            {
+              contractName: 'hst',
+              contractAddress: '0x1234567890123456789012345678901234567890',
+              deployedAt: deployedAt1,
+            },
+            {
+              contractName: 'nfts',
+              contractAddress: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd',
+              deployedAt: deployedAt2,
+            },
+          ],
+        });
+      }
+      expect(capability.listDeployedContracts).toHaveBeenCalled();
+    });
+
+    it('returns capability unavailable when no seeding capability exists', async () => {
+      const context = createMockContext();
+
+      const result = await listContractsTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE);
+        expect(result.error.message).toContain(
+          'ContractSeedingCapability not available',
+        );
+      }
+    });
+
+    it('returns no active session when the session is missing', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await listContractsTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+
+    it('returns error when listDeployedContracts throws', async () => {
+      const capability = createMockSeedingCapability();
+      vi.spyOn(capability, 'listDeployedContracts').mockImplementation(() => {
+        throw new Error('Connection lost');
+      });
+      const context = createMockContext({ workflowCapability: capability });
+
+      const result = await listContractsTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED);
+        expect(result.error.message).toContain('Connection lost');
+      }
+    });
+  });
+});
diff --git a/src/tools/seeding.ts b/src/tools/seeding.ts
new file mode 100644
index 0000000..1a36cbb
--- /dev/null
+++ b/src/tools/seeding.ts
@@ -0,0 +1,187 @@
+import { classifySeedingError } from './error-classification.js';
+import type {
+  GetContractAddressInput,
+  GetContractAddressResult,
+  ListDeployedContractsInput,
+  ListDeployedContractsResult,
+  SeedContractInput,
+  SeedContractResult,
+  SeedContractsInput,
+  SeedContractsResult,
+} from './types';
+import { ErrorCodes } from './types';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type { ContractSeedingCapability } from '../capabilities/types.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Resolves the contract seeding capability or returns an error response.
+ *
+ * @param context - The tool execution context.
+ * @returns The seeding capability or an error response if unavailable.
+ */
+function getSeedingCapability(
+  context: ToolContext,
+): ContractSeedingCapability | ToolResponse<never> {
+  const missingSession = requireActiveSession<never>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  const capability =
+    context.workflowContext.contractSeeding ??
+    context.sessionManager.getContractSeedingCapability();
+
+  if (!capability) {
+    return createToolError(
+      ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE,
+      'ContractSeedingCapability not available. The mm_seed_contract tool requires running in e2e mode with the MetaMask extension wrapper, which provides Anvil chain and contract deployment support.',
+    );
+  }
+
+  return capability;
+}
+
+/**
+ * Type guard that checks if the value is a ToolResponse rather than a capability.
+ *
+ * @param value - The capability or tool response to check.
+ * @returns True if the value is a ToolResponse.
+ */
+function isToolResponse(
+  value: ContractSeedingCapability | ToolResponse<never>,
+): value is ToolResponse<never> {
+  return 'ok' in value;
+}
+
+/**
+ * Deploys a single smart contract to the local Anvil chain.
+ *
+ * @param input - The contract name and deployment options.
+ * @param context - The tool execution context.
+ * @returns The deployed contract address and metadata.
+ */
+export async function seedContractTool(
+  input: SeedContractInput,
+  context: ToolContext,
+): Promise<ToolResponse<SeedContractResult>> {
+  const capability = getSeedingCapability(context);
+  if (isToolResponse(capability)) {
+    return capability;
+  }
+
+  try {
+    const deployed = await capability.deployContract(input.contractName, {
+      hardfork: input.hardfork,
+      deployerOptions: input.deployerOptions,
+    });
+
+    return createToolSuccess({
+      contractName: deployed.name,
+      contractAddress: deployed.address,
+      deployedAt: deployed.deployedAt,
+    });
+  } catch (error) {
+    const errorInfo = classifySeedingError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Deploys multiple smart contracts in batch to the local Anvil chain.
+ *
+ * @param input - The contract list and shared deployment options.
+ * @param context - The tool execution context.
+ * @returns The deployed and failed contract results.
+ */
+export async function seedContractsTool(
+  input: SeedContractsInput,
+  context: ToolContext,
+): Promise<ToolResponse<SeedContractsResult>> {
+  const capability = getSeedingCapability(context);
+  if (isToolResponse(capability)) {
+    return capability;
+  }
+
+  try {
+    const seedResult = await capability.deployContracts(input.contracts, {
+      hardfork: input.hardfork,
+    });
+
+    return createToolSuccess({
+      deployed: seedResult.deployed.map((deployedContract) => ({
+        contractName: deployedContract.name,
+        contractAddress: deployedContract.address,
+        deployedAt: deployedContract.deployedAt,
+      })),
+      failed: seedResult.failed.map((failedDeployment) => ({
+        contractName: failedDeployment.name,
+        error: failedDeployment.error,
+      })),
+    });
+  } catch (error) {
+    const errorInfo = classifySeedingError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Looks up the deployed address of a contract by name.
+ *
+ * @param input - The contract name to look up.
+ * @param context - The tool execution context.
+ * @returns The contract name and its deployed address.
+ */
+export async function getContractAddressTool(
+  input: GetContractAddressInput,
+  context: ToolContext,
+): Promise<ToolResponse<GetContractAddressResult>> {
+  const capability = getSeedingCapability(context);
+  if (isToolResponse(capability)) {
+    return capability;
+  }
+
+  try {
+    return createToolSuccess({
+      contractName: input.contractName,
+      contractAddress: capability.getContractAddress(input.contractName),
+    });
+  } catch (error) {
+    const errorInfo = classifySeedingError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
+
+/**
+ * Lists all currently deployed contracts.
+ *
+ * @param _input - Unused input parameters.
+ * @param context - The tool execution context.
+ * @returns The list of deployed contracts with addresses and timestamps.
+ */
+export async function listContractsTool(
+  _input: ListDeployedContractsInput,
+  context: ToolContext,
+): Promise<ToolResponse<ListDeployedContractsResult>> {
+  const capability = getSeedingCapability(context);
+  if (isToolResponse(capability)) {
+    return capability;
+  }
+
+  try {
+    return createToolSuccess({
+      contracts: capability.listDeployedContracts().map((deployedContract) => ({
+        contractName: deployedContract.name,
+        contractAddress: deployedContract.address,
+        deployedAt: deployedContract.deployedAt,
+      })),
+    });
+  } catch (error) {
+    const errorInfo = classifySeedingError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/tools/state.test.ts b/src/tools/state.test.ts
new file mode 100644
index 0000000..3969f5a
--- /dev/null
+++ b/src/tools/state.test.ts
@@ -0,0 +1,319 @@
+/**
+ * Unit tests for state tool handler.
+ *
+ * Tests handleGetState with various scenarios including state snapshot capability,
+ * tab tracking, and error handling.
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+
+import { getStateTool } from './state.js';
+import type { StateSnapshotCapability } from '../capabilities/types.js';
+import { createMockSessionManager } from './test-utils/mock-factories.js';
+import type { MockSessionManagerOptions } from './test-utils/mock-factories.js';
+import { ErrorCodes } from './types/errors.js';
+import type { ToolContext } from '../types/http.js';
+
+function createMockPage(url = 'chrome-extension://ext-123/home.html') {
+  return {
+    url: vi.fn().mockReturnValue(url),
+  } as never;
+}
+
+function createMockContext(
+  options: MockSessionManagerOptions & {
+    page?: ReturnType<typeof createMockPage>;
+    stateSnapshotCapability?: StateSnapshotCapability;
+  } = {},
+): ToolContext & {
+  sessionManager: ReturnType<typeof createMockSessionManager>;
+} {
+  const page = createMockPage();
+  const sessionManager = createMockSessionManager(options);
+
+  sessionManager.getPage.mockReturnValue(options.page ?? page);
+  sessionManager.getStateSnapshotCapability.mockReturnValue(
+    options.stateSnapshotCapability,
+  );
+
+  return {
+    sessionManager,
+    page: options.page ?? page,
+    refMap: new Map(),
+    workflowContext: {},
+    knowledgeStore: {},
+  } as unknown as ToolContext & {
+    sessionManager: ReturnType<typeof createMockSessionManager>;
+  };
+}
+
+describe('getStateTool', () => {
+  describe('without state snapshot capability', () => {
+    it('returns extension state from session manager', async () => {
+      const page = createMockPage('chrome-extension://ext-123/home.html');
+      const context = createMockContext({
+        hasActive: true,
+        page,
+        extensionState: {
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: true,
+          currentScreen: 'home',
+          accountAddress: '0x1234567890123456789012345678901234567890',
+          networkName: 'Ethereum Mainnet',
+          chainId: 1,
+          balance: '1.5 ETH',
+        },
+        trackedPages: [
+          {
+            page,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+        ],
+      });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.state).toStrictEqual({
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: true,
+          currentScreen: 'home',
+          accountAddress: '0x1234567890123456789012345678901234567890',
+          networkName: 'Ethereum Mainnet',
+          chainId: 1,
+          balance: '1.5 ETH',
+        });
+        expect(result.result.tabs).toStrictEqual({
+          active: {
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          tracked: [
+            {
+              role: 'extension',
+              url: 'chrome-extension://ext-123/home.html',
+            },
+          ],
+        });
+      }
+      expect(context.sessionManager.getExtensionState).toHaveBeenCalled();
+    });
+
+    it('includes multiple tracked pages in tabs', async () => {
+      const extensionPage = createMockPage(
+        'chrome-extension://ext-123/home.html',
+      );
+      const dappPage = createMockPage('https://app.uniswap.org');
+      const context = createMockContext({
+        hasActive: true,
+        page: extensionPage,
+        extensionState: {
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: true,
+          currentScreen: 'home',
+          accountAddress: '0x1234567890123456789012345678901234567890',
+          networkName: 'Ethereum Mainnet',
+          chainId: 1,
+          balance: '1.5 ETH',
+        },
+        trackedPages: [
+          {
+            page: extensionPage,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+          {
+            page: dappPage,
+            role: 'dapp',
+            url: 'https://app.uniswap.org',
+          },
+        ],
+      });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.tabs).toBeDefined();
+        expect(result.result.tabs?.tracked).toHaveLength(2);
+        expect(result.result.tabs?.tracked).toStrictEqual([
+          { role: 'extension', url: 'chrome-extension://ext-123/home.html' },
+          { role: 'dapp', url: 'https://app.uniswap.org' },
+        ]);
+      }
+    });
+
+    it('handles active page without tracked page info', async () => {
+      const page = createMockPage('chrome-extension://ext-123/home.html');
+      const context = createMockContext({
+        hasActive: true,
+        page,
+        extensionState: {
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: false,
+          currentScreen: 'home',
+          accountAddress: null,
+          networkName: null,
+          chainId: null,
+          balance: null,
+        },
+        trackedPages: [],
+      });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.tabs).toBeDefined();
+        expect(result.result.tabs?.active.role).toBe('other');
+        expect(result.result.tabs?.active.url).toBe(
+          'chrome-extension://ext-123/home.html',
+        );
+      }
+    });
+  });
+
+  describe('with state snapshot capability', () => {
+    it('uses state snapshot capability when provided', async () => {
+      const page = createMockPage('chrome-extension://ext-123/home.html');
+      const stateSnapshotCapability: StateSnapshotCapability = {
+        getState: vi.fn().mockResolvedValue({
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: true,
+          currentScreen: 'home',
+          accountAddress: '0x1234567890123456789012345678901234567890',
+          networkName: 'Localhost 8545',
+          chainId: 1337,
+          balance: '25 ETH',
+        }),
+        detectCurrentScreen: vi.fn().mockResolvedValue('home'),
+      };
+      const context = createMockContext({
+        hasActive: true,
+        page,
+        sessionState: {
+          extensionId: 'ext-123',
+          ports: { anvil: 8545 },
+        } as never,
+        trackedPages: [
+          {
+            page,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+        ],
+        stateSnapshotCapability,
+      });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(true);
+      if (result.ok) {
+        expect(result.result.state.chainId).toBe(1337);
+        expect(result.result.state.networkName).toBe('Localhost 8545');
+        expect(result.result.state.balance).toBe('25 ETH');
+      }
+      expect(stateSnapshotCapability.getState).toHaveBeenCalledWith(page, {
+        extensionId: 'ext-123',
+        chainId: 1337,
+      });
+      expect(context.sessionManager.getExtensionState).not.toHaveBeenCalled();
+    });
+
+    it('uses chainId 1 when anvil port not present', async () => {
+      const page = createMockPage('chrome-extension://ext-123/home.html');
+      const stateSnapshotCapability: StateSnapshotCapability = {
+        getState: vi.fn().mockResolvedValue({
+          isLoaded: true,
+          currentUrl: 'chrome-extension://ext-123/home.html',
+          extensionId: 'ext-123',
+          isUnlocked: true,
+          currentScreen: 'home',
+          accountAddress: '0x1234567890123456789012345678901234567890',
+          networkName: 'Ethereum Mainnet',
+          chainId: 1,
+          balance: '1.5 ETH',
+        }),
+        detectCurrentScreen: vi.fn().mockResolvedValue('home'),
+      };
+      const context = createMockContext({
+        hasActive: true,
+        page,
+        sessionState: {
+          extensionId: 'ext-123',
+          ports: {},
+        } as never,
+        trackedPages: [
+          {
+            page,
+            role: 'extension',
+            url: 'chrome-extension://ext-123/home.html',
+          },
+        ],
+        stateSnapshotCapability,
+      });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(true);
+      expect(stateSnapshotCapability.getState).toHaveBeenCalledWith(page, {
+        extensionId: 'ext-123',
+        chainId: 1,
+      });
+    });
+  });
+
+  describe('error handling', () => {
+    it('returns error when no active session', async () => {
+      const context = createMockContext({ hasActive: false });
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION);
+      }
+    });
+
+    it('returns error when getExtensionState fails', async () => {
+      const context = createMockContext({ hasActive: true });
+      context.sessionManager.getExtensionState.mockRejectedValue(
+        new Error('Failed to get state'),
+      );
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_STATE_FAILED);
+        expect(result.error.message).toContain('Failed to get state');
+      }
+    });
+
+    it('returns error when page is closed', async () => {
+      const context = createMockContext({ hasActive: true });
+      context.sessionManager.getExtensionState.mockRejectedValue(
+        new Error('Target page, context or browser has been closed'),
+      );
+
+      const result = await getStateTool({}, context);
+
+      expect(result.ok).toBe(false);
+      if (!result.ok) {
+        expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED);
+      }
+    });
+  });
+});
diff --git a/src/tools/state.ts b/src/tools/state.ts
new file mode 100644
index 0000000..c974cce
--- /dev/null
+++ b/src/tools/state.ts
@@ -0,0 +1,88 @@
+import type { Page } from '@playwright/test';
+
+import { classifyStateError } from './error-classification.js';
+import type { GetStateResult } from './types';
+import {
+  createToolError,
+  createToolSuccess,
+  requireActiveSession,
+} from './utils.js';
+import type {
+  ExtensionState,
+  StateSnapshotCapability,
+} from '../capabilities/types.js';
+import type { ISessionManager } from '../server/session-manager.js';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Retrieves the extension state using the snapshot capability or session manager.
+ *
+ * @param page - The active Playwright page.
+ * @param sessionManager - The session manager instance.
+ * @param stateSnapshotCapability - Optional capability for direct state snapshots.
+ * @returns The current extension state.
+ */
+async function getState(
+  page: Page,
+  sessionManager: ISessionManager,
+  stateSnapshotCapability?: StateSnapshotCapability,
+): Promise<ExtensionState> {
+  if (stateSnapshotCapability) {
+    const extensionId = sessionManager.getSessionState()?.extensionId;
+    return stateSnapshotCapability.getState(page, {
+      extensionId,
+      chainId: sessionManager.getSessionState()?.ports?.anvil ? 1337 : 1,
+    });
+  }
+
+  return sessionManager.getExtensionState();
+}
+
+/**
+ * Retrieves the extension state and tracked tab information.
+ *
+ * @param _input - Unused input parameters.
+ * @param context - The tool execution context.
+ * @returns The extension state and tab details.
+ */
+export async function getStateTool(
+  _input: Record<string, never>,
+  context: ToolContext,
+): Promise<ToolResponse<GetStateResult>> {
+  const missingSession = requireActiveSession<GetStateResult>(context);
+  if (missingSession) {
+    return missingSession;
+  }
+
+  try {
+    const state = await getState(
+      context.page,
+      context.sessionManager,
+      context.workflowContext.stateSnapshot ??
+        context.sessionManager.getStateSnapshotCapability(),
+    );
+
+    const trackedPages = context.sessionManager.getTrackedPages();
+    const activePage = context.sessionManager.getPage();
+    const activeTabInfo = trackedPages.find(
+      (trackedPage) => trackedPage.page === activePage,
+    );
+
+    return createToolSuccess({
+      state,
+      tabs: {
+        active: {
+          role: activeTabInfo?.role ?? 'other',
+          url: activePage.url(),
+        },
+        tracked: trackedPages.map((trackedPage) => ({
+          role: trackedPage.role,
+          url: trackedPage.url,
+        })),
+      },
+    });
+  } catch (error) {
+    const errorInfo = classifyStateError(error);
+    return createToolError(errorInfo.code, errorInfo.message);
+  }
+}
diff --git a/src/mcp-server/test-utils/index.ts b/src/tools/test-utils/index.ts
similarity index 86%
rename from src/mcp-server/test-utils/index.ts
rename to src/tools/test-utils/index.ts
index 4959175..76200db 100644
--- a/src/mcp-server/test-utils/index.ts
+++ b/src/tools/test-utils/index.ts
@@ -13,5 +13,3 @@ export {
   type MockLocatorOptions,
   type MockBrowserContextOptions,
 } from './mock-playwright.js';
-
-export { flushPromises } from './flush-promises.js';
diff --git a/src/mcp-server/test-utils/mock-factories.test.ts b/src/tools/test-utils/mock-factories.test.ts
similarity index 99%
rename from src/mcp-server/test-utils/mock-factories.test.ts
rename to src/tools/test-utils/mock-factories.test.ts
index 9d490cf..0987102 100644
--- a/src/mcp-server/test-utils/mock-factories.test.ts
+++ b/src/tools/test-utils/mock-factories.test.ts
@@ -136,7 +136,7 @@ describe('mock-factories', () => {
     });
 
     it('allows customization via options', async () => {
-      const customSteps = [{ tool: 'mm_click', screen: 'home' }];
+      const customSteps = [{ tool: 'click', screen: 'home' }];
       const mock = createMockKnowledgeStore({
         lastSteps: customSteps,
       });
diff --git a/src/mcp-server/test-utils/mock-factories.ts b/src/tools/test-utils/mock-factories.ts
similarity index 96%
rename from src/mcp-server/test-utils/mock-factories.ts
rename to src/tools/test-utils/mock-factories.ts
index 8540852..f19ee1c 100644
--- a/src/mcp-server/test-utils/mock-factories.ts
+++ b/src/tools/test-utils/mock-factories.ts
@@ -11,8 +11,11 @@
 import { vi } from 'vitest';
 
 import type { ExtensionState } from '../../capabilities/types.js';
-import type { KnowledgeStore } from '../knowledge-store.js';
-import type { TrackedPage, SessionLaunchResult } from '../session-manager.js';
+import type { KnowledgeStore } from '../../knowledge-store/knowledge-store.js';
+import type {
+  TrackedPage,
+  SessionLaunchResult,
+} from '../../server/session-manager.js';
 import type { SessionState } from '../types/session.js';
 import type { SessionMetadata } from '../types/step-record.js';
 
@@ -130,6 +133,7 @@ export function createMockSessionManager(
     getStateSnapshotCapability: vi.fn().mockReturnValue(undefined),
 
     // Environment
+    setWorkflowContext: vi.fn(),
     getEnvironmentMode: vi
       .fn()
       .mockReturnValue(options.environmentMode ?? 'e2e'),
diff --git a/src/mcp-server/test-utils/mock-playwright.ts b/src/tools/test-utils/mock-playwright.ts
similarity index 100%
rename from src/mcp-server/test-utils/mock-playwright.ts
rename to src/tools/test-utils/mock-playwright.ts
diff --git a/src/mcp-server/types/discovery.ts b/src/tools/types/discovery.ts
similarity index 78%
rename from src/mcp-server/types/discovery.ts
rename to src/tools/types/discovery.ts
index 397196d..4d5f35f 100644
--- a/src/mcp-server/types/discovery.ts
+++ b/src/tools/types/discovery.ts
@@ -9,6 +9,16 @@ export const ACTIONABLE_ROLES = [
   'menuitem',
 ] as const;
 
+export const STRUCTURAL_ROLES = [
+  'menu',
+  'listbox',
+  'option',
+  'tab',
+  'tabpanel',
+  'list',
+  'listitem',
+] as const;
+
 export const IMPORTANT_ROLES = [
   'dialog',
   'alert',
@@ -18,10 +28,12 @@ export const IMPORTANT_ROLES = [
 
 export const INCLUDED_ROLES = [
   ...ACTIONABLE_ROLES,
+  ...STRUCTURAL_ROLES,
   ...IMPORTANT_ROLES,
 ] as const;
 
 export type ActionableRole = (typeof ACTIONABLE_ROLES)[number];
+export type StructuralRole = (typeof STRUCTURAL_ROLES)[number];
 export type ImportantRole = (typeof IMPORTANT_ROLES)[number];
 export type IncludedRole = (typeof INCLUDED_ROLES)[number];
 
@@ -40,6 +52,8 @@ export type A11yNodeTrimmed = {
   checked?: boolean;
   expanded?: boolean;
   path: string[];
+  testId?: string;
+  textContent?: string;
 };
 
 export type RawA11yNode = {
diff --git a/src/mcp-server/types/errors.ts b/src/tools/types/errors.ts
similarity index 100%
rename from src/mcp-server/types/errors.ts
rename to src/tools/types/errors.ts
diff --git a/src/mcp-server/types/index.ts b/src/tools/types/index.ts
similarity index 88%
rename from src/mcp-server/types/index.ts
rename to src/tools/types/index.ts
index 7a9edd6..e4c7ce1 100644
--- a/src/mcp-server/types/index.ts
+++ b/src/tools/types/index.ts
@@ -1,4 +1,3 @@
-export type * from './responses.js';
 export * from './errors.js';
 export * from './seeding.js';
 export type * from './tool-inputs.js';
diff --git a/src/mcp-server/types/knowledge.ts b/src/tools/types/knowledge.ts
similarity index 100%
rename from src/mcp-server/types/knowledge.ts
rename to src/tools/types/knowledge.ts
diff --git a/src/mcp-server/types/seeding.ts b/src/tools/types/seeding.ts
similarity index 100%
rename from src/mcp-server/types/seeding.ts
rename to src/tools/types/seeding.ts
diff --git a/src/mcp-server/types/session.ts b/src/tools/types/session.ts
similarity index 100%
rename from src/mcp-server/types/session.ts
rename to src/tools/types/session.ts
diff --git a/src/mcp-server/types/step-record.ts b/src/tools/types/step-record.ts
similarity index 93%
rename from src/mcp-server/types/step-record.ts
rename to src/tools/types/step-record.ts
index 23d220b..175ad89 100644
--- a/src/mcp-server/types/step-record.ts
+++ b/src/tools/types/step-record.ts
@@ -65,6 +65,12 @@ export type StepRecordObservation = {
   testIds: TestIdItem[];
   a11y: {
     nodes: A11yNodeTrimmed[];
+    /** Present only in diff-mode compact observations (Phase 2). */
+    diff?: {
+      added: string[];
+      removed: string[];
+      unchanged: number;
+    };
   };
   priorKnowledge?: PriorKnowledgeV1;
 };
diff --git a/src/mcp-server/types/tool-inputs.ts b/src/tools/types/tool-inputs.ts
similarity index 85%
rename from src/mcp-server/types/tool-inputs.ts
rename to src/tools/types/tool-inputs.ts
index 65bd1ac..fdf5747 100644
--- a/src/mcp-server/types/tool-inputs.ts
+++ b/src/tools/types/tool-inputs.ts
@@ -2,13 +2,6 @@ import type { SmartContractName } from './seeding.js';
 
 export type TabRole = 'extension' | 'notification' | 'dapp' | 'other';
 
-export type ObservationPolicyOverride = 'default' | 'none' | 'failures';
-
-export type HandlerOptions = {
-  signal?: AbortSignal;
-  observationPolicy?: ObservationPolicyOverride;
-};
-
 export type BuildInput = {
   buildType?: 'build:test';
   force?: boolean;
@@ -16,6 +9,7 @@ export type BuildInput = {
 
 export type LaunchInput = {
   autoBuild?: boolean;
+  context?: 'e2e' | 'prod';
   stateMode?: 'default' | 'onboarding' | 'custom';
   fixturePreset?: string;
   fixture?: Record<string, unknown>;
@@ -29,6 +23,7 @@ export type LaunchInput = {
   flowTags?: string[];
   tags?: string[];
   seedContracts?: SmartContractName[];
+  force?: boolean;
 };
 
 export type CleanupInput = {
@@ -59,7 +54,7 @@ export type DescribeScreenInput = {
 };
 
 export type ScreenshotInput = {
-  name: string;
+  name?: string;
   fullPage?: boolean;
   selector?: string;
   includeBase64?: boolean;
@@ -71,17 +66,31 @@ export type TargetSelection = {
   selector?: string;
 };
 
+export type WithinTarget = {
+  a11yRef?: string;
+  testId?: string;
+  selector?: string;
+};
+
 export type ClickInput = TargetSelection & {
   timeoutMs?: number;
+  within?: WithinTarget;
 };
 
 export type TypeInput = TargetSelection & {
   text: string;
   timeoutMs?: number;
+  within?: WithinTarget;
 };
 
 export type WaitForInput = TargetSelection & {
   timeoutMs?: number;
+  within?: WithinTarget;
+};
+
+export type GetTextInput = TargetSelection & {
+  timeoutMs?: number;
+  within?: WithinTarget;
 };
 
 export type KnowledgeScope =
@@ -128,6 +137,7 @@ export type RunStepsInput = {
   }[];
   stopOnError?: boolean;
   includeObservations?: 'none' | 'failures' | 'all';
+  batchTimeoutMs?: number;
 };
 
 export type SwitchToTabInput = {
@@ -144,3 +154,8 @@ export type ClipboardInput = {
   action: 'write' | 'read';
   text?: string;
 };
+
+export type SetContextInput = {
+  context: 'e2e' | 'prod';
+  options?: Record<string, unknown>;
+};
diff --git a/src/mcp-server/types/tool-outputs.ts b/src/tools/types/tool-outputs.ts
similarity index 83%
rename from src/mcp-server/types/tool-outputs.ts
rename to src/tools/types/tool-outputs.ts
index 541bd51..05e56c5 100644
--- a/src/mcp-server/types/tool-outputs.ts
+++ b/src/tools/types/tool-outputs.ts
@@ -59,6 +59,7 @@ export type ScreenshotInfo = {
 
 export type DescribeScreenResult = {
   state: ExtensionState;
+  activeTab?: TabInfo;
   testIds: {
     items: TestIdItem[];
   };
@@ -93,6 +94,12 @@ export type WaitForResult = {
   target: string;
 };
 
+export type GetTextResult = {
+  text: string;
+  target: string;
+  length: number;
+};
+
 export type StepResult = {
   tool: string;
   ok: boolean;
@@ -105,6 +112,7 @@ export type StepResult = {
   meta: {
     durationMs: number;
     timestamp: string;
+    skipped?: boolean;
   };
 };
 
@@ -115,6 +123,7 @@ export type RunStepsResult = {
     total: number;
     succeeded: number;
     failed: number;
+    skipped: number;
     durationMs: number;
   };
 };
@@ -139,3 +148,19 @@ export type ClipboardResult = {
   success: boolean;
   text?: string;
 };
+
+export type SetContextResult = {
+  previousContext: 'e2e' | 'prod';
+  newContext: 'e2e' | 'prod';
+  availableCapabilities: string[];
+};
+
+export type GetContextResult = {
+  currentContext: 'e2e' | 'prod';
+  hasActiveSession: boolean;
+  sessionId: string | null;
+  capabilities: {
+    available: string[];
+  };
+  canSwitchContext: boolean;
+};
diff --git a/src/tools/utils.ts b/src/tools/utils.ts
new file mode 100644
index 0000000..6eae6a2
--- /dev/null
+++ b/src/tools/utils.ts
@@ -0,0 +1,53 @@
+import { ErrorCodes } from './types';
+import type { ToolContext, ToolResponse } from '../types/http.js';
+
+/**
+ * Wraps a result value in a successful tool response.
+ *
+ * @param result - The result payload to return.
+ * @returns A successful tool response containing the result.
+ */
+export function createToolSuccess<TResult>(
+  result: TResult,
+): ToolResponse<TResult> {
+  return { ok: true, result };
+}
+
+/**
+ * Wraps an error code and message in a failed tool response.
+ *
+ * @param code - The error code identifying the failure type.
+ * @param message - A human-readable error description.
+ * @returns A failed tool response containing the error.
+ */
+export function createToolError<TResult = never>(
+  code: string,
+  message: string,
+): ToolResponse<TResult> {
+  return {
+    ok: false,
+    error: {
+      code,
+      message,
+    },
+  };
+}
+
+/**
+ * Returns an error response if no active session exists.
+ *
+ * @param context - The tool execution context.
+ * @returns An error response when no session is active, or undefined.
+ */
+export function requireActiveSession<TResult>(
+  context: ToolContext,
+): ToolResponse<TResult> | undefined {
+  if (!context.sessionManager.hasActiveSession()) {
+    return createToolError(
+      ErrorCodes.MM_NO_ACTIVE_SESSION,
+      'No active session. Call launch first.',
+    );
+  }
+
+  return undefined;
+}
diff --git a/src/mcp-server/constants.ts b/src/tools/utils/constants.ts
similarity index 83%
rename from src/mcp-server/constants.ts
rename to src/tools/utils/constants.ts
index fd1404f..8c65086 100644
--- a/src/mcp-server/constants.ts
+++ b/src/tools/utils/constants.ts
@@ -1,5 +1,5 @@
 /**
- * Constants for MCP server tool operations.
+ * Constants for HTTP daemon tool operations.
  * Centralized to ensure consistency and easy tuning.
  */
 
@@ -22,3 +22,6 @@ export const OBSERVATION_TESTID_LIMIT = 50;
 
 /** Maximum length for text content preview in discovery */
 export const TEXT_PREVIEW_MAX_LENGTH = 100;
+
+/** Minimum number of option nodes under a combobox/listbox to trigger collapsing */
+export const OPTION_COLLAPSE_MIN_COUNT = 3;
diff --git a/src/mcp-server/discovery.test.ts b/src/tools/utils/discovery.test.ts
similarity index 60%
rename from src/mcp-server/discovery.test.ts
rename to src/tools/utils/discovery.test.ts
index 048b43a..d738973 100644
--- a/src/mcp-server/discovery.test.ts
+++ b/src/tools/utils/discovery.test.ts
@@ -197,6 +197,50 @@ describe('collectTestIds', () => {
     expect(result[0].text?.length).toBeLessThanOrEqual(200);
   });
 
+  it('handles isVisible rejection gracefully', async () => {
+    const mockLocators = [
+      {
+        getAttribute: vi.fn().mockResolvedValue('btn-1'),
+        isVisible: vi.fn().mockRejectedValue(new Error('detached')),
+        textContent: vi.fn().mockResolvedValue('OK'),
+      },
+    ];
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn().mockReturnValue({
+        all: vi.fn().mockResolvedValue(mockLocators),
+      }),
+    } as unknown as Page;
+
+    const result = await collectTestIds(page);
+
+    expect(result).toHaveLength(1);
+    expect(result[0].visible).toBe(false);
+  });
+
+  it('handles textContent rejection gracefully', async () => {
+    const mockLocators = [
+      {
+        getAttribute: vi.fn().mockResolvedValue('btn-1'),
+        isVisible: vi.fn().mockResolvedValue(true),
+        textContent: vi.fn().mockRejectedValue(new Error('detached')),
+      },
+    ];
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn().mockReturnValue({
+        all: vi.fn().mockResolvedValue(mockLocators),
+      }),
+    } as unknown as Page;
+
+    const result = await collectTestIds(page);
+
+    expect(result).toHaveLength(1);
+    expect(result[0].text).toBeUndefined();
+  });
+
   it('handles page load state failure', async () => {
     const page = createMockPage({
       testIds: [{ testId: 'test-1', visible: true }],
@@ -308,6 +352,17 @@ describe('collectTrimmedA11ySnapshot', () => {
     expect(result.refMap.size).toBe(0);
   });
 
+  it('handles empty parsed roots from valid yaml', async () => {
+    const page = createMockPage({
+      a11ySnapshot: '- text: just text\n- /url: https://example.com',
+    });
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(0);
+    expect(result.refMap.size).toBe(0);
+  });
+
   it('uses root selector when provided', async () => {
     const a11yTree = `- dialog:\n  - button "Close"`;
 
@@ -339,6 +394,245 @@ describe('collectTrimmedA11ySnapshot', () => {
     expect(result.nodes[1].name).toBe('Child');
     expect(result.nodes[2].name).toBe('Grandchild');
   });
+
+  it('collapses 3+ consecutive identical nodes into summary', async () => {
+    const a11yTree = [
+      '- main:',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "Submit"',
+    ].join('\n');
+
+    const page = createMockPage({ a11ySnapshot: a11yTree });
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(3);
+    expect(result.nodes[0]).toMatchObject({
+      ref: 'e1',
+      role: 'button',
+      name: 'maskicon',
+    });
+    expect(result.nodes[1].name).toContain('3 more');
+    expect(result.nodes[1].name).toContain('maskicon');
+    expect(result.nodes[2]).toMatchObject({
+      ref: 'e5',
+      role: 'button',
+      name: 'Submit',
+    });
+    expect(result.refMap.has('e1')).toBe(true);
+    expect(result.refMap.has('e2')).toBe(true);
+    expect(result.refMap.has('e3')).toBe(true);
+    expect(result.refMap.has('e4')).toBe(true);
+  });
+
+  it('does not collapse nodes with same role and name but different paths', async () => {
+    const a11yTree = [
+      '- main:',
+      '  - dialog "A":',
+      '    - button "OK"',
+      '    - button "OK"',
+      '    - button "OK"',
+      '  - dialog "B":',
+      '    - button "OK"',
+      '    - button "OK"',
+      '    - button "OK"',
+    ].join('\n');
+
+    const page = createMockPage({ a11ySnapshot: a11yTree });
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    const dialogAButtons = result.nodes.filter(
+      (n) => n.role === 'button' && n.path.some((p) => p.includes('dialog:A')),
+    );
+    const dialogBButtons = result.nodes.filter(
+      (n) => n.role === 'button' && n.path.some((p) => p.includes('dialog:B')),
+    );
+    expect(dialogAButtons.length).toBeGreaterThanOrEqual(1);
+    expect(dialogBButtons.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('does not collapse fewer than 3 identical nodes', async () => {
+    const a11yTree = [
+      '- main:',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "Submit"',
+    ].join('\n');
+
+    const page = createMockPage({ a11ySnapshot: a11yTree });
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(3);
+    expect(result.nodes[0].name).toBe('maskicon');
+    expect(result.nodes[1].name).toBe('maskicon');
+    expect(result.nodes[2].name).toBe('Submit');
+  });
+
+  it('enriches nodes with short names using testId from DOM', async () => {
+    const a11yTree = `- main:\n  - button "x"`;
+    const mockGetAttribute = vi.fn().mockResolvedValue('action-button');
+    const mockTextContent = vi.fn().mockResolvedValue('Click me');
+    const mockBodyLocator = {
+      ariaSnapshot: vi.fn().mockResolvedValue(a11yTree),
+    };
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn((selector: string) => {
+        if (selector === 'body') {
+          return { first: vi.fn().mockReturnValue(mockBodyLocator) };
+        }
+        return {
+          first: vi.fn().mockReturnValue({
+            getAttribute: mockGetAttribute,
+            textContent: mockTextContent,
+          }),
+        };
+      }),
+    } as unknown as Page;
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes.length).toBeGreaterThan(0);
+    expect(result.nodes[0].testId).toBe('action-button');
+    expect(result.nodes[0].textContent).toBe('Click me');
+  });
+
+  it('skips textContent enrichment when text matches the node name', async () => {
+    const a11yTree = `- main:\n  - button "maskicon"`;
+    const mockBodyLocator = {
+      ariaSnapshot: vi.fn().mockResolvedValue(a11yTree),
+    };
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn((selector: string) => {
+        if (selector === 'body') {
+          return { first: vi.fn().mockReturnValue(mockBodyLocator) };
+        }
+        return {
+          first: vi.fn().mockReturnValue({
+            getAttribute: vi.fn().mockResolvedValue(null),
+            textContent: vi.fn().mockResolvedValue('maskicon'),
+          }),
+        };
+      }),
+    } as unknown as Page;
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes[0].textContent).toBeUndefined();
+    expect(result.nodes[0].testId).toBeUndefined();
+  });
+
+  it('skips enrichment when all node names exceed threshold', async () => {
+    const a11yTree = `- main:\n  - button "A very long button name that exceeds threshold"`;
+    const page = createMockPage({ a11ySnapshot: a11yTree });
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(1);
+    expect(result.nodes[0].testId).toBeUndefined();
+    expect(result.nodes[0].textContent).toBeUndefined();
+  });
+
+  it('handles enrichment errors when getAttribute/textContent reject', async () => {
+    const a11yTree = `- main:\n  - button "x"`;
+    const mockBodyLocator = {
+      ariaSnapshot: vi.fn().mockResolvedValue(a11yTree),
+    };
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn((selector: string) => {
+        if (selector === 'body') {
+          return { first: vi.fn().mockReturnValue(mockBodyLocator) };
+        }
+        return {
+          first: vi.fn().mockReturnValue({
+            getAttribute: vi.fn().mockRejectedValue(new Error('detached')),
+            textContent: vi.fn().mockRejectedValue(new Error('detached')),
+          }),
+        };
+      }),
+    } as unknown as Page;
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(1);
+    expect(result.nodes[0].testId).toBeUndefined();
+    expect(result.nodes[0].textContent).toBeUndefined();
+  });
+
+  it('handles enrichment errors when locator.first() throws', async () => {
+    const a11yTree = `- main:\n  - button "y"`;
+    const mockBodyLocator = {
+      ariaSnapshot: vi.fn().mockResolvedValue(a11yTree),
+    };
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn((selector: string) => {
+        if (selector === 'body') {
+          return { first: vi.fn().mockReturnValue(mockBodyLocator) };
+        }
+        return {
+          first: vi.fn().mockImplementation(() => {
+            throw new Error('locator disposed');
+          }),
+        };
+      }),
+    } as unknown as Page;
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(1);
+    expect(result.nodes[0].testId).toBeUndefined();
+    expect(result.nodes[0].textContent).toBeUndefined();
+  });
+
+  it('does not collapse nodes with different textContent', async () => {
+    const a11yTree = [
+      '- main:',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+      '  - button "maskicon"',
+    ].join('\n');
+
+    const textValues = ['Rename', 'Account details', 'Hide', 'Remove'];
+    let callIdx = 0;
+    const mockBodyLocator = {
+      ariaSnapshot: vi.fn().mockResolvedValue(a11yTree),
+    };
+
+    const page = {
+      waitForLoadState: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn((selector: string) => {
+        if (selector === 'body') {
+          return { first: vi.fn().mockReturnValue(mockBodyLocator) };
+        }
+        const idx = callIdx;
+        callIdx += 1;
+        return {
+          first: vi.fn().mockReturnValue({
+            getAttribute: vi.fn().mockResolvedValue(null),
+            textContent: vi
+              .fn()
+              .mockResolvedValue(textValues[idx % textValues.length]),
+          }),
+        };
+      }),
+    } as unknown as Page;
+
+    const result = await collectTrimmedA11ySnapshot(page);
+
+    expect(result.nodes).toHaveLength(4);
+    expect(result.nodes[0].textContent).toBe('Rename');
+    expect(result.nodes[1].textContent).toBe('Account details');
+  });
 });
 
 describe('resolveTarget', () => {
@@ -442,6 +736,41 @@ describe('waitForTarget', () => {
 
     expect(page.locator).toHaveBeenCalledWith('.submit-button');
   });
+
+  it('scopes target within a parent when within is provided', async () => {
+    const childLocator = createMockLocator({ visible: true });
+    const firstParentLocator = {
+      waitFor: vi.fn().mockResolvedValue(undefined),
+      locator: vi.fn().mockReturnValue(childLocator),
+    };
+    const parentLocator = {
+      first: vi.fn().mockReturnValue(firstParentLocator),
+    };
+
+    const page = {
+      locator: vi.fn().mockReturnValue(parentLocator),
+    } as unknown as Page;
+
+    const result = await waitForTarget(
+      page,
+      'testId',
+      'end-accessory',
+      new Map(),
+      5000,
+      { type: 'testId', value: 'account-cell' },
+    );
+
+    expect(page.locator).toHaveBeenCalledWith('[data-testid="account-cell"]');
+    expect(parentLocator.first).toHaveBeenCalled();
+    expect(firstParentLocator.waitFor).toHaveBeenCalledWith({
+      state: 'visible',
+      timeout: 5000,
+    });
+    expect(firstParentLocator.locator).toHaveBeenCalledWith(
+      '[data-testid="end-accessory"]',
+    );
+    expect(result).toBe(childLocator);
+  });
 });
 
 describe('parseAriaSnapshotYaml', () => {
diff --git a/src/mcp-server/discovery.ts b/src/tools/utils/discovery.ts
similarity index 59%
rename from src/mcp-server/discovery.ts
rename to src/tools/utils/discovery.ts
index 5df6159..b3d37e7 100644
--- a/src/mcp-server/discovery.ts
+++ b/src/tools/utils/discovery.ts
@@ -1,14 +1,14 @@
 import type { Page, Locator } from '@playwright/test';
 
 import { TEXT_PREVIEW_MAX_LENGTH } from './constants.js';
+import { debugWarn } from '../../utils';
 import type {
   TestIdItem,
   A11yNodeTrimmed,
   RawA11yNode,
   IncludedRole,
-} from './types';
-import { INCLUDED_ROLES } from './types';
-import { debugWarn } from './utils';
+} from '../types';
+import { INCLUDED_ROLES } from '../types';
 
 const INCLUDED_ROLES_SET = new Set<string>(INCLUDED_ROLES);
 
@@ -302,7 +302,154 @@ export async function collectTrimmedA11ySnapshot(
     traverseNode(root, []);
   }
 
-  return { nodes: trimmedNodes, refMap };
+  await enrichNodesWithDOMContext(page, trimmedNodes, refMap);
+
+  const collapsedNodes = collapseIdenticalRuns(trimmedNodes);
+
+  return { nodes: collapsedNodes, refMap };
+}
+
+const GENERIC_NAME_MAX_LENGTH = 20;
+const ENRICHMENT_BATCH_LIMIT = 100;
+const ENRICHMENT_ELEMENT_TIMEOUT_MS = 500;
+const TEXT_CONTENT_MAX_LENGTH = 60;
+
+type EnrichmentResult = {
+  ref: string;
+  testId: string | null;
+  textContent: string | null;
+};
+
+/**
+ * Enriches a11y nodes that have generic or empty names with data-testid
+ * values and visible text content from the corresponding DOM elements.
+ *
+ * @param page - The Playwright page to query.
+ * @param nodes - The trimmed a11y nodes to enrich (mutated in place).
+ * @param refMap - Map of a11y refs to selectors for element lookup.
+ */
+async function enrichNodesWithDOMContext(
+  page: Page,
+  nodes: A11yNodeTrimmed[],
+  refMap: Map<string, string>,
+): Promise<void> {
+  const candidates = nodes.filter(
+    (node) => !node.name || node.name.length <= GENERIC_NAME_MAX_LENGTH,
+  );
+
+  if (candidates.length === 0) {
+    return;
+  }
+
+  const enrichBatch = candidates.slice(0, ENRICHMENT_BATCH_LIMIT);
+
+  const results = await Promise.allSettled(
+    enrichBatch.map(async (node): Promise<EnrichmentResult> => {
+      const selector = refMap.get(node.ref);
+      if (!selector) {
+        return { ref: node.ref, testId: null, textContent: null };
+      }
+      try {
+        const locator = page.locator(selector).first();
+        const [testId, rawText] = await Promise.all([
+          locator
+            .getAttribute('data-testid', {
+              timeout: ENRICHMENT_ELEMENT_TIMEOUT_MS,
+            })
+            .catch(() => null),
+          locator
+            .textContent({ timeout: ENRICHMENT_ELEMENT_TIMEOUT_MS })
+            .catch(() => null),
+        ]);
+        const trimmedText = rawText?.trim().slice(0, TEXT_CONTENT_MAX_LENGTH);
+        const textContent =
+          trimmedText && trimmedText !== node.name ? trimmedText : null;
+        return { ref: node.ref, testId, textContent };
+      } catch {
+        return { ref: node.ref, testId: null, textContent: null };
+      }
+    }),
+  );
+
+  const enrichMap = new Map<string, EnrichmentResult>();
+  for (const result of results) {
+    if (result.status === 'fulfilled') {
+      enrichMap.set(result.value.ref, result.value);
+    }
+  }
+
+  for (const node of enrichBatch) {
+    const data = enrichMap.get(node.ref);
+    if (!data) {
+      continue;
+    }
+    if (data.testId) {
+      node.testId = data.testId;
+    }
+    if (data.textContent) {
+      node.textContent = data.textContent;
+    }
+  }
+}
+
+const COLLAPSE_THRESHOLD = 3;
+
+/**
+ * Checks whether two string arrays contain identical elements in order.
+ *
+ * @param left - First array to compare.
+ * @param right - Second array to compare.
+ * @returns True if both arrays are equal.
+ */
+function arraysEqual(left: string[], right: string[]): boolean {
+  return (
+    left.length === right.length && left.every((val, idx) => val === right[idx])
+  );
+}
+
+/**
+ * Collapses consecutive runs of identical a11y nodes into a summary entry.
+ * The refMap retains individual entries so refs still resolve — collapsing
+ * only affects the agent-facing representation to reduce token waste.
+ *
+ * @param nodes - The flat list of trimmed a11y nodes to collapse.
+ * @returns A new array with runs of 3+ identical nodes collapsed.
+ */
+function collapseIdenticalRuns(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed[] {
+  const collapsed: A11yNodeTrimmed[] = [];
+  let cursor = 0;
+  while (cursor < nodes.length) {
+    const current = nodes[cursor];
+    let runEnd = cursor + 1;
+    while (
+      runEnd < nodes.length &&
+      nodes[runEnd].role === current.role &&
+      nodes[runEnd].name === current.name &&
+      nodes[runEnd].testId === current.testId &&
+      nodes[runEnd].textContent === current.textContent &&
+      arraysEqual(nodes[runEnd].path, current.path)
+    ) {
+      runEnd += 1;
+    }
+
+    const runLength = runEnd - cursor;
+    if (runLength >= COLLAPSE_THRESHOLD) {
+      collapsed.push(current);
+      const lastInRun = nodes[runEnd - 1];
+      collapsed.push({
+        ref: `${current.ref}\u2013${lastInRun.ref}`,
+        role: current.role,
+        name: `\u2026 ${runLength - 1} more "${current.name || current.role}" (refs ${current.ref}\u2013${lastInRun.ref})`,
+        path: current.path,
+      });
+    } else {
+      for (let idx = cursor; idx < runEnd; idx += 1) {
+        collapsed.push(nodes[idx]);
+      }
+    }
+    cursor = runEnd;
+  }
+  return collapsed;
 }
 
 /**
@@ -318,20 +465,33 @@ function buildA11ySelector(role: IncludedRole, name: string): string {
 }
 
 /**
- * Resolve a target element to a Playwright Locator.
+ * Target type for scoping selectors.
+ */
+export type TargetType = 'a11yRef' | 'testId' | 'selector';
+
+/**
+ * Optional parent scope for chained locator resolution.
+ */
+export type WithinScope = {
+  type: TargetType;
+  value: string;
+};
+
+/**
+ * Resolve a target element to a Playwright Locator, optionally scoped within a parent.
  *
- * @param page The Playwright page to search
+ * @param scope The Playwright Page or Locator to search within
  * @param targetType The type of target identifier (a11yRef, testId, or CSS selector)
  * @param targetValue The target value to resolve
  * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef')
  * @returns Playwright Locator for the resolved element
  */
-export async function resolveTarget(
-  page: Page,
-  targetType: 'a11yRef' | 'testId' | 'selector',
+function resolveTargetScoped(
+  scope: Page | Locator,
+  targetType: TargetType,
   targetValue: string,
   refMap: Map<string, string>,
-): Promise<Locator> {
+): Locator {
   switch (targetType) {
     case 'a11yRef': {
       const selector = refMap.get(targetValue);
@@ -341,12 +501,12 @@ export async function resolveTarget(
             `Available refs: ${Array.from(refMap.keys()).join(', ')}`,
         );
       }
-      return page.locator(selector);
+      return scope.locator(selector);
     }
     case 'testId':
-      return page.locator(`[data-testid="${targetValue}"]`);
+      return scope.locator(`[data-testid="${targetValue}"]`);
     case 'selector':
-      return page.locator(targetValue);
+      return scope.locator(targetValue);
     default: {
       const exhaustiveCheck: never = targetType;
       throw new Error(`Unknown target type: ${exhaustiveCheck as string}`);
@@ -355,23 +515,60 @@ export async function resolveTarget(
 }
 
 /**
- * Wait for a target element to become visible.
+ * Resolve a target element to a Playwright Locator (page-level).
+ *
+ * @param page The Playwright page to search
+ * @param targetType The type of target identifier (a11yRef, testId, or CSS selector)
+ * @param targetValue The target value to resolve
+ * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef')
+ * @returns Playwright Locator for the resolved element
+ */
+export async function resolveTarget(
+  page: Page,
+  targetType: TargetType,
+  targetValue: string,
+  refMap: Map<string, string>,
+): Promise<Locator> {
+  return resolveTargetScoped(page, targetType, targetValue, refMap);
+}
+
+/**
+ * Wait for a target element to become visible, optionally scoped within a parent.
  *
  * @param page The Playwright page to search
  * @param targetType The type of target identifier (a11yRef, testId, or CSS selector)
  * @param targetValue The target value to resolve
  * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef')
  * @param timeoutMs Maximum time to wait in milliseconds
+ * @param within Optional parent scope — resolves the target within this element
  * @returns Playwright Locator for the visible element
  */
 export async function waitForTarget(
   page: Page,
-  targetType: 'a11yRef' | 'testId' | 'selector',
+  targetType: TargetType,
   targetValue: string,
   refMap: Map<string, string>,
   timeoutMs: number,
+  within?: WithinScope,
 ): Promise<Locator> {
-  const locator = await resolveTarget(page, targetType, targetValue, refMap);
+  let scope: Page | Locator = page;
+  if (within) {
+    const parentLocator = resolveTargetScoped(
+      page,
+      within.type,
+      within.value,
+      refMap,
+    );
+    await parentLocator
+      .first()
+      .waitFor({ state: 'visible', timeout: timeoutMs });
+    // Use .first() to guarantee the child search is scoped to exactly one
+    // parent element.  Without this, Playwright chains the child locator
+    // across ALL matching parents, producing phantom multi-matches
+    // (e.g. 63 "end-accessory" buttons across 63 account cells).
+    scope = parentLocator.first();
+  }
+  const locator = resolveTargetScoped(scope, targetType, targetValue, refMap);
   await locator.waitFor({ state: 'visible', timeout: timeoutMs });
   return locator;
 }
diff --git a/src/mcp-server/utils/targets.ts b/src/tools/utils/targets.ts
similarity index 100%
rename from src/mcp-server/utils/targets.ts
rename to src/tools/utils/targets.ts
diff --git a/src/mcp-server/utils/type-guards.test.ts b/src/tools/utils/type-guards.test.ts
similarity index 100%
rename from src/mcp-server/utils/type-guards.test.ts
rename to src/tools/utils/type-guards.test.ts
diff --git a/src/mcp-server/utils/type-guards.ts b/src/tools/utils/type-guards.ts
similarity index 100%
rename from src/mcp-server/utils/type-guards.ts
rename to src/tools/utils/type-guards.ts
diff --git a/src/types/http.ts b/src/types/http.ts
new file mode 100644
index 0000000..d1cac1c
--- /dev/null
+++ b/src/types/http.ts
@@ -0,0 +1,97 @@
+/**
+ * HTTP Server Type Definitions
+ *
+ * Types for standalone tool functions and HTTP response shapes.
+ */
+
+import type { Page } from '@playwright/test';
+
+import type { PortMap, WorkflowContext } from '../capabilities/context.js';
+import type { KnowledgeStore } from '../knowledge-store/knowledge-store.js';
+import type { ISessionManager } from '../server/session-manager.js';
+
+/**
+ * Context passed to standalone tool functions.
+ *
+ * This context provides access to the session manager, current page,
+ * accessibility reference map, workflow capabilities, and knowledge store.
+ */
+export type ToolContext = {
+  /** Session manager for browser session control */
+  sessionManager: ISessionManager;
+  /** Current active Playwright page (lazy — throws if no session) */
+  get page(): Page;
+  /** Accessibility reference map (lazy — returns empty map if no session) */
+  get refMap(): Map<string, string>;
+  /** Workflow context with capabilities and environment config */
+  workflowContext: WorkflowContext;
+  /** Knowledge store for session history and prior knowledge */
+  knowledgeStore: KnowledgeStore;
+  /** Tool registry for batch execution (run_steps) */
+  toolRegistry: Map<string, ToolFunction<unknown, unknown>>;
+};
+
+/**
+ * Result shape for tool responses.
+ *
+ * @template T The type of the successful result
+ */
+// eslint-disable-next-line @typescript-eslint/naming-convention
+export type ToolResponse<T = unknown> =
+  | { ok: true; result: T }
+  | { ok: false; error: { code: string; message: string } };
+
+/**
+ * Standalone tool function signature.
+ *
+ * Tool functions receive parameters and a context, and return a ToolResponse.
+ *
+ * @template TParams The type of parameters the tool accepts
+ * @template TResult The type of the successful result
+ */
+export type ToolFunction<TParams = unknown, TResult = unknown> = (
+  params: TParams,
+  context: ToolContext,
+) => Promise<ToolResponse<TResult>>;
+
+/**
+ * Configuration for createServer().
+ *
+ * This configuration is used to initialize the HTTP server with
+ * session management, context factory, and optional settings.
+ */
+export type ServerConfig = {
+  /** Session manager instance */
+  sessionManager: ISessionManager;
+  /** Factory function to create workflow context (may be sync or async) */
+  contextFactory: () => WorkflowContext | Promise<WorkflowContext>;
+  /** Shared knowledge store instance (optional — a new instance is created if omitted) */
+  knowledgeStore?: KnowledgeStore;
+  /** Idle timeout for daemon auto-shutdown in milliseconds (default: 1_800_000 = 30 min) */
+  idleShutdownMs?: number;
+  /** Per-request execution timeout in milliseconds (default: 30_000) */
+  requestTimeoutMs?: number;
+  /** Path to log file (optional) */
+  logFilePath?: string;
+};
+
+/**
+ * Shape of the .mm-server daemon state file.
+ *
+ * This file is created when the daemon starts and contains
+ * the port, PID, and port configuration for the running server.
+ */
+export type DaemonState = {
+  /** HTTP server port */
+  port: number;
+  /** Process ID of the daemon */
+  pid: number;
+  /** ISO 8601 timestamp when daemon started */
+  startedAt: string;
+  /** Nonce for daemon identification */
+  nonce: string;
+  /** Package version of the daemon process (absent in state files written before version tracking) */
+  version?: string;
+  /** Port configuration for sub-services */
+  subPorts: PortMap;
+};
diff --git a/src/mcp-server/utils/errors.ts b/src/utils/errors.ts
similarity index 100%
rename from src/mcp-server/utils/errors.ts
rename to src/utils/errors.ts
diff --git a/src/utils/index.ts b/src/utils/index.ts
index 6eae751..99c0548 100644
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -7,3 +7,6 @@ export {
   waitForServiceReady,
   type WaitForServiceReadyOptions,
 } from './service-readiness.js';
+export { generateFilesafeTimestamp, generateSessionId } from './time.js';
+export { extractErrorMessage } from './errors.js';
+export { debugWarn } from './logger.js';
diff --git a/src/mcp-server/utils/logger.test.ts b/src/utils/logger.test.ts
similarity index 82%
rename from src/mcp-server/utils/logger.test.ts
rename to src/utils/logger.test.ts
index 9824771..37ce500 100644
--- a/src/mcp-server/utils/logger.test.ts
+++ b/src/utils/logger.test.ts
@@ -13,21 +13,21 @@ describe('debugWarn', () => {
     vi.resetModules();
   });
 
-  describe('when MCP_DEBUG is true', () => {
+  describe('when DEBUG is true', () => {
     it('logs warning with context and error message', async () => {
-      process.env.MCP_DEBUG = 'true';
+      process.env.DEBUG = 'true';
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
       debugWarn('test.context', new Error('test error'));
 
       expect(consoleWarnSpy).toHaveBeenCalledWith(
-        expect.stringContaining('[MCP:test.context]'),
+        expect.stringContaining('[Server:test.context] test error'),
       );
     });
 
     it('extracts error message from Error objects', async () => {
-      process.env.MCP_DEBUG = 'true';
+      process.env.DEBUG = 'true';
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
@@ -40,7 +40,7 @@ describe('debugWarn', () => {
     });
 
     it('handles string error messages', async () => {
-      process.env.MCP_DEBUG = 'true';
+      process.env.DEBUG = 'true';
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
@@ -52,7 +52,7 @@ describe('debugWarn', () => {
     });
 
     it('handles unknown error types', async () => {
-      process.env.MCP_DEBUG = 'true';
+      process.env.DEBUG = 'true';
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
@@ -62,9 +62,9 @@ describe('debugWarn', () => {
     });
   });
 
-  describe('when MCP_DEBUG is false or unset', () => {
+  describe('when DEBUG is false or unset', () => {
     it('does not log anything', async () => {
-      delete process.env.MCP_DEBUG;
+      delete process.env.DEBUG;
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
@@ -73,8 +73,8 @@ describe('debugWarn', () => {
       expect(consoleWarnSpy).not.toHaveBeenCalled();
     });
 
-    it('does not log when MCP_DEBUG is empty string', async () => {
-      process.env.MCP_DEBUG = '';
+    it('does not log when DEBUG is empty string', async () => {
+      process.env.DEBUG = '';
       vi.resetModules();
       const { debugWarn } = await import('./logger.js');
 
diff --git a/src/mcp-server/utils/logger.ts b/src/utils/logger.ts
similarity index 56%
rename from src/mcp-server/utils/logger.ts
rename to src/utils/logger.ts
index f363068..0f1c414 100644
--- a/src/mcp-server/utils/logger.ts
+++ b/src/utils/logger.ts
@@ -1,16 +1,16 @@
 import { extractErrorMessage } from './errors.js';
 
 /**
- * Debug logging for MCP server operations.
- * Enabled via MCP_DEBUG=true environment variable.
+ * Debug logging for server operations.
+ * Enabled via DEBUG=true environment variable.
  *
- * By default, logging is disabled to avoid polluting MCP protocol stdout.
+ * By default, logging is disabled to avoid noise in HTTP daemon logs.
  */
 
-const DEBUG = process.env.MCP_DEBUG === 'true';
+const DEBUG = process.env.DEBUG === 'true';
 
 /**
- * Log a debug warning message. Only outputs when MCP_DEBUG=true.
+ * Log a debug warning message. Only outputs when DEBUG=true.
  * Use this for caught errors that are intentionally suppressed.
  *
  * @param context - A short identifier for where the warning occurred (e.g., "discovery.collectTestIds")
@@ -19,6 +19,6 @@ const DEBUG = process.env.MCP_DEBUG === 'true';
 export function debugWarn(context: string, error: unknown): void {
   if (DEBUG) {
     const message = extractErrorMessage(error);
-    console.warn(`[MCP:${context}] ${message}`);
+    console.warn(`[Server:${context}] ${message}`);
   }
 }
diff --git a/src/mcp-server/utils/time.test.ts b/src/utils/time.test.ts
similarity index 100%
rename from src/mcp-server/utils/time.test.ts
rename to src/utils/time.test.ts
diff --git a/src/mcp-server/utils/time.ts b/src/utils/time.ts
similarity index 100%
rename from src/mcp-server/utils/time.ts
rename to src/utils/time.ts
diff --git a/src/mcp-server/schemas.test.ts b/src/validation/schemas.test.ts
similarity index 86%
rename from src/mcp-server/schemas.test.ts
rename to src/validation/schemas.test.ts
index e2d915c..f38417a 100644
--- a/src/mcp-server/schemas.test.ts
+++ b/src/validation/schemas.test.ts
@@ -13,6 +13,7 @@ import {
   switchToTabInputSchema,
   closeTabInputSchema,
   clipboardInputSchema,
+  navigateInputSchema,
 } from './schemas.js';
 
 describe('switchToTabInputSchema', () => {
@@ -278,3 +279,47 @@ describe('clipboardInputSchema', () => {
     });
   });
 });
+
+describe('navigateInputSchema', () => {
+  describe('refine validation: url required when screen is "url"', () => {
+    it('passes with screen "home"', () => {
+      const input = { screen: 'home' as const };
+      const result = navigateInputSchema.safeParse(input);
+
+      expect(result.success).toBe(true);
+    });
+
+    it('passes with screen "settings"', () => {
+      const input = { screen: 'settings' as const };
+      const result = navigateInputSchema.safeParse(input);
+
+      expect(result.success).toBe(true);
+    });
+
+    it('passes with screen "url" and url provided', () => {
+      const input = { screen: 'url' as const, url: 'https://example.com' };
+      const result = navigateInputSchema.safeParse(input);
+
+      expect(result.success).toBe(true);
+    });
+
+    it('fails with screen "url" and no url', () => {
+      const input = { screen: 'url' as const };
+      const result = navigateInputSchema.safeParse(input);
+
+      expect(result.success).toBe(false);
+      if (!result.success) {
+        expect(result.error.issues[0].message).toBe(
+          'url is required when screen is "url"',
+        );
+      }
+    });
+
+    it('fails with screen "url" and empty url', () => {
+      const input = { screen: 'url' as const, url: '' };
+      const result = navigateInputSchema.safeParse(input);
+
+      expect(result.success).toBe(false);
+    });
+  });
+});
diff --git a/src/mcp-server/schemas.ts b/src/validation/schemas.ts
similarity index 88%
rename from src/mcp-server/schemas.ts
rename to src/validation/schemas.ts
index 489e6bf..5269cd4 100644
--- a/src/mcp-server/schemas.ts
+++ b/src/validation/schemas.ts
@@ -1,6 +1,6 @@
 import { z } from 'zod';
 
-import { SMART_CONTRACT_NAMES, HARDFORKS } from './types/seeding.js';
+import { SMART_CONTRACT_NAMES, HARDFORKS } from '../tools/types/seeding.js';
 
 export const a11yRefPattern = z
   .string()
@@ -90,6 +90,14 @@ export const launchInputSchema = z.object({
     .boolean()
     .default(true)
     .describe('Automatically run build if extension is not found'),
+  context: z
+    .enum(['e2e', 'prod'])
+    .describe(
+      'Environment context to use for this session. ' +
+        'Sets the context before launching so you can start in prod mode directly: ' +
+        'mm launch --context prod --state onboarding',
+    )
+    .optional(),
   stateMode: z
     .enum(['default', 'onboarding', 'custom'])
     .default('default')
@@ -159,6 +167,10 @@ export const launchInputSchema = z.object({
     .array(z.enum(smartContractNames))
     .describe('Smart contracts to deploy on launch (before extension loads)')
     .optional(),
+  force: z
+    .boolean()
+    .default(false)
+    .describe('Force replace an existing active session (runs cleanup first)'),
 });
 
 export const cleanupInputSchema = z.object({
@@ -241,7 +253,11 @@ export const describeScreenInputSchema = z.object({
 });
 
 export const screenshotInputSchema = z.object({
-  name: z.string().min(1).describe('Screenshot filename (without extension)'),
+  name: z
+    .string()
+    .min(1)
+    .describe('Screenshot filename (without extension)')
+    .optional(),
   fullPage: z
     .boolean()
     .default(true)
@@ -257,6 +273,29 @@ export const screenshotInputSchema = z.object({
     .describe('Include base64-encoded image in response'),
 });
 
+export const withinTargetSchema = z
+  .object({
+    a11yRef: a11yRefPattern.optional(),
+    testId: z.string().min(1).optional(),
+    selector: z.string().min(1).optional(),
+  })
+  .refine(
+    (data) => {
+      const provided = [data.a11yRef, data.testId, data.selector].filter(
+        Boolean,
+      );
+      return provided.length === 1;
+    },
+    {
+      message:
+        'Exactly one of a11yRef, testId, or selector must be provided in within',
+    },
+  )
+  .describe(
+    'Scope the target search within a parent element. ' +
+      'Accepts the same targeting options (a11yRef, testId, or selector).',
+  );
+
 export const clickInputSchema = targetSelectionSchema.and(
   z.object({
     timeoutMs: z
@@ -266,6 +305,7 @@ export const clickInputSchema = targetSelectionSchema.and(
       .max(60000)
       .default(15000)
       .describe('Timeout to wait for element to become visible'),
+    within: withinTargetSchema.optional(),
   }),
 );
 
@@ -279,6 +319,7 @@ export const typeInputSchema = targetSelectionSchema.and(
       .max(60000)
       .default(15000)
       .describe('Timeout to wait for element to become visible'),
+    within: withinTargetSchema.optional(),
   }),
 );
 
@@ -291,6 +332,20 @@ export const waitForInputSchema = targetSelectionSchema.and(
       .max(120000)
       .default(15000)
       .describe('Timeout to wait for element'),
+    within: withinTargetSchema.optional(),
+  }),
+);
+
+export const getTextInputSchema = targetSelectionSchema.and(
+  z.object({
+    timeoutMs: z
+      .number()
+      .int()
+      .min(0)
+      .max(60000)
+      .default(15000)
+      .describe('Timeout to wait for element to become visible'),
+    within: withinTargetSchema.optional(),
   }),
 );
 
@@ -441,6 +496,16 @@ export const runStepsInputSchema = z.object({
       'When to include observations in results: ' +
         'none = never (fastest), failures = only for failed steps, all = always',
     ),
+  batchTimeoutMs: z
+    .number()
+    .int()
+    .min(1000)
+    .max(300_000)
+    .describe(
+      'Overall timeout for the batch in milliseconds. ' +
+        'When exceeded, remaining steps are marked as skipped and partial results are returned.',
+    )
+    .optional(),
 });
 
 export const setContextInputSchema = z.object({
@@ -500,6 +565,7 @@ export const toolSchemas = {
   click: clickInputSchema,
   type: typeInputSchema,
   wait_for: waitForInputSchema,
+  get_text: getTextInputSchema,
   knowledge_last: knowledgeLastInputSchema,
   knowledge_search: knowledgeSearchInputSchema,
   knowledge_summarize: knowledgeSummarizeInputSchema,
@@ -533,6 +599,7 @@ export type ScreenshotInputZ = z.infer<typeof screenshotInputSchema>;
 export type ClickInputZ = z.infer<typeof clickInputSchema>;
 export type TypeInputZ = z.infer<typeof typeInputSchema>;
 export type WaitForInputZ = z.infer<typeof waitForInputSchema>;
+export type GetTextInputZ = z.infer<typeof getTextInputSchema>;
 export type KnowledgeLastInputZ = z.infer<typeof knowledgeLastInputSchema>;
 export type KnowledgeSearchInputZ = z.infer<typeof knowledgeSearchInputSchema>;
 export type KnowledgeSummarizeInputZ = z.infer<
diff --git a/vitest.config.mts b/vitest.config.mts
index 0470a62..004999a 100644
--- a/vitest.config.mts
+++ b/vitest.config.mts
@@ -27,7 +27,7 @@ export default defineConfig({
 
       // The files to exclude from the coverage report. Vitest excludes test
       // files by default, but not `test-d.ts` files.
-      exclude: ['src/**/*.test-d.ts', 'src/mcp-server/test-utils/'],
+      exclude: ['src/**/*.test-d.ts', 'src/tools/test-utils/'],
 
       // Coverage thresholds. If the coverage is below these thresholds, the
       // test will fail.
@@ -35,10 +35,10 @@ export default defineConfig({
         // Auto-update the coverage thresholds when running locally.
         // Disabled in CI to prevent non-deterministic config changes.
         autoUpdate: !process.env.CI,
-        branches: 87.38,
-        functions: 92.98,
-        lines: 94.67,
-        statements: 94.46,
+        branches: 88.46,
+        functions: 91.66,
+        lines: 94.51,
+        statements: 94.26,
       },
     },
 
diff --git a/yarn.lock b/yarn.lock
index dfdade7..4ecedee 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -727,15 +727,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@hono/node-server@npm:^1.19.9":
-  version: 1.19.9
-  resolution: "@hono/node-server@npm:1.19.9"
-  peerDependencies:
-    hono: ^4
-  checksum: 10/d4915c2e736ee1e3934b5538cde92b19914dc71346340528a04e4c7219afc7367965080cd1a5291ac9cbda7b0780b89b6ca93472a9418aa105d6d1183033dc8a
-  languageName: node
-  linkType: hard
-
 "@humanfs/core@npm:^0.19.1":
   version: 0.19.1
   resolution: "@humanfs/core@npm:0.19.1"
@@ -930,14 +921,15 @@ __metadata:
     "@metamask/eslint-config-nodejs": "npm:^15.0.0"
     "@metamask/eslint-config-typescript": "npm:^15.0.0"
     "@metamask/eslint-config-vitest": "npm:^15.0.0"
-    "@modelcontextprotocol/sdk": "npm:^1.26.0"
     "@playwright/test": "npm:^1.49.0"
     "@ts-bridge/cli": "npm:^0.6.3"
+    "@types/express": "npm:^5.0.6"
     "@types/node": "npm:^20.0.0"
     "@typescript-eslint/utils": "npm:^8.6.0"
     "@vitest/coverage-istanbul": "npm:^3.0.7"
     "@vitest/eslint-plugin": "npm:^1.1.4"
     "@yarnpkg/types": "npm:^4.0.0-rc.52"
+    cosmiconfig: "npm:^9.0.0"
     depcheck: "npm:^1.4.3"
     eslint: "npm:^9.11.0"
     eslint-config-prettier: "npm:^9.1.0"
@@ -947,6 +939,7 @@ __metadata:
     eslint-plugin-n: "npm:^17.10.3"
     eslint-plugin-prettier: "npm:^5.2.1"
     eslint-plugin-promise: "npm:^7.1.0"
+    express: "npm:^5.2.1"
     playwright: "npm:^1.49.0"
     prettier: "npm:^3.3.3"
     prettier-plugin-packagejson: "npm:^2.5.8"
@@ -960,6 +953,8 @@ __metadata:
   peerDependencies:
     "@playwright/test": ^1.49.0
     playwright: ^1.49.0
+  bin:
+    mm: ./dist/cli/mm.cjs
   languageName: unknown
   linkType: soft
 
@@ -1026,39 +1021,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@modelcontextprotocol/sdk@npm:^1.26.0":
-  version: 1.26.0
-  resolution: "@modelcontextprotocol/sdk@npm:1.26.0"
-  dependencies:
-    "@hono/node-server": "npm:^1.19.9"
-    ajv: "npm:^8.17.1"
-    ajv-formats: "npm:^3.0.1"
-    content-type: "npm:^1.0.5"
-    cors: "npm:^2.8.5"
-    cross-spawn: "npm:^7.0.5"
-    eventsource: "npm:^3.0.2"
-    eventsource-parser: "npm:^3.0.0"
-    express: "npm:^5.2.1"
-    express-rate-limit: "npm:^8.2.1"
-    hono: "npm:^4.11.4"
-    jose: "npm:^6.1.3"
-    json-schema-typed: "npm:^8.0.2"
-    pkce-challenge: "npm:^5.0.0"
-    raw-body: "npm:^3.0.0"
-    zod: "npm:^3.25 || ^4.0"
-    zod-to-json-schema: "npm:^3.25.1"
-  peerDependencies:
-    "@cfworker/json-schema": ^4.1.1
-    zod: ^3.25 || ^4.0
-  peerDependenciesMeta:
-    "@cfworker/json-schema":
-      optional: true
-    zod:
-      optional: false
-  checksum: 10/a206b2a4d61a23be8b8f4c886528dd9348d11b17ce36013b350edf5c082b1c1f07941d52ea098f721daf3828085b6f6276bb844c484a0e9913edbc028517a3d5
-  languageName: node
-  linkType: hard
-
 "@napi-rs/wasm-runtime@npm:^0.2.11":
   version: 0.2.12
   resolution: "@napi-rs/wasm-runtime@npm:0.2.12"
@@ -1623,6 +1585,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/body-parser@npm:*":
+  version: 1.19.6
+  resolution: "@types/body-parser@npm:1.19.6"
+  dependencies:
+    "@types/connect": "npm:*"
+    "@types/node": "npm:*"
+  checksum: 10/33041e88eae00af2cfa0827e951e5f1751eafab2a8b6fce06cd89ef368a988907996436b1325180edaeddd1c0c7d0d0d4c20a6c9ff294a91e0039a9db9e9b658
+  languageName: node
+  linkType: hard
+
 "@types/chai@npm:^5.2.2":
   version: 5.2.3
   resolution: "@types/chai@npm:5.2.3"
@@ -1633,6 +1605,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/connect@npm:*":
+  version: 3.4.38
+  resolution: "@types/connect@npm:3.4.38"
+  dependencies:
+    "@types/node": "npm:*"
+  checksum: 10/7eb1bc5342a9604facd57598a6c62621e244822442976c443efb84ff745246b10d06e8b309b6e80130026a396f19bf6793b7cecd7380169f369dac3bfc46fb99
+  languageName: node
+  linkType: hard
+
 "@types/deep-eql@npm:*":
   version: 4.0.2
   resolution: "@types/deep-eql@npm:4.0.2"
@@ -1647,6 +1628,29 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/express-serve-static-core@npm:^5.0.0":
+  version: 5.1.1
+  resolution: "@types/express-serve-static-core@npm:5.1.1"
+  dependencies:
+    "@types/node": "npm:*"
+    "@types/qs": "npm:*"
+    "@types/range-parser": "npm:*"
+    "@types/send": "npm:*"
+  checksum: 10/7f3d8cf7e68764c9f3e8f6a12825b69ccf5287347fc1c20b29803d4f08a4abc1153ae11d7258852c61aad50f62ef72d4c1b9c97092b0a90462c3dddec2f6026c
+  languageName: node
+  linkType: hard
+
+"@types/express@npm:^5.0.6":
+  version: 5.0.6
+  resolution: "@types/express@npm:5.0.6"
+  dependencies:
+    "@types/body-parser": "npm:*"
+    "@types/express-serve-static-core": "npm:^5.0.0"
+    "@types/serve-static": "npm:^2"
+  checksum: 10/da2cc3de1b1a4d7f20ed3fb6f0a8ee08e99feb3c2eb5a8d643db77017d8d0e70fee9e95da38a73f51bcdf5eda3bb6435073c0271dc04fb16fda92e55daf911fa
+  languageName: node
+  linkType: hard
+
 "@types/hast@npm:^3.0.0, @types/hast@npm:^3.0.4":
   version: 3.0.4
   resolution: "@types/hast@npm:3.0.4"
@@ -1656,6 +1660,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/http-errors@npm:*":
+  version: 2.0.5
+  resolution: "@types/http-errors@npm:2.0.5"
+  checksum: 10/a88da669366bc483e8f3b3eb3d34ada5f8d13eeeef851b1204d77e2ba6fc42aba4566d877cca5c095204a3f4349b87fe397e3e21288837bdd945dd514120755b
+  languageName: node
+  linkType: hard
+
 "@types/json-schema@npm:^7.0.15":
   version: 7.0.15
   resolution: "@types/json-schema@npm:7.0.15"
@@ -1679,6 +1690,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/node@npm:*":
+  version: 25.5.2
+  resolution: "@types/node@npm:25.5.2"
+  dependencies:
+    undici-types: "npm:~7.18.0"
+  checksum: 10/11782030f910ecf600cd537791980bd8b68496570ecd633d512d713b5b8a16ea3740fce85c82d0593305f809a7c205d7e86c07f179063fc98f014a7f9b013166
+  languageName: node
+  linkType: hard
+
 "@types/node@npm:^20.0.0":
   version: 20.19.31
   resolution: "@types/node@npm:20.19.31"
@@ -1695,6 +1715,39 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/qs@npm:*":
+  version: 6.15.0
+  resolution: "@types/qs@npm:6.15.0"
+  checksum: 10/871162881f1c83e61d0c8c243c65549be5dddf33a6911f3324edeebd4087207b1174644da9a3afaa20cf494c5288d2a1ece09e10e4822f755339f14a05c339ea
+  languageName: node
+  linkType: hard
+
+"@types/range-parser@npm:*":
+  version: 1.2.7
+  resolution: "@types/range-parser@npm:1.2.7"
+  checksum: 10/95640233b689dfbd85b8c6ee268812a732cf36d5affead89e806fe30da9a430767af8ef2cd661024fd97e19d61f3dec75af2df5e80ec3bea000019ab7028629a
+  languageName: node
+  linkType: hard
+
+"@types/send@npm:*":
+  version: 1.2.1
+  resolution: "@types/send@npm:1.2.1"
+  dependencies:
+    "@types/node": "npm:*"
+  checksum: 10/81ef5790037ba1d2d458392e4241501f0f8b4838cc8797e169e179e099410e12069ec68e8dbd39211cb097c4a9b1ff1682dbcea897ab4ce21dad93438b862d27
+  languageName: node
+  linkType: hard
+
+"@types/serve-static@npm:^2":
+  version: 2.2.0
+  resolution: "@types/serve-static@npm:2.2.0"
+  dependencies:
+    "@types/http-errors": "npm:*"
+    "@types/node": "npm:*"
+  checksum: 10/f2bad1304c7d0d3b7221faff3e490c40129d3803f4fb1b2fb84f31f561071c5e6a4b876c41bbbe82d5645034eea936e946bcaaf993dac1093ce68b56effad6e0
+  languageName: node
+  linkType: hard
+
 "@types/unist@npm:*, @types/unist@npm:^3.0.0":
   version: 3.0.3
   resolution: "@types/unist@npm:3.0.3"
@@ -2218,20 +2271,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"ajv-formats@npm:^3.0.1":
-  version: 3.0.1
-  resolution: "ajv-formats@npm:3.0.1"
-  dependencies:
-    ajv: "npm:^8.0.0"
-  peerDependencies:
-    ajv: ^8.0.0
-  peerDependenciesMeta:
-    ajv:
-      optional: true
-  checksum: 10/5679b9f9ced9d0213a202a37f3aa91efcffe59a6de1a6e3da5c873344d3c161820a1f11cc29899661fee36271fd2895dd3851b6461c902a752ad661d1c1e8722
-  languageName: node
-  linkType: hard
-
 "ajv@npm:^6.12.4":
   version: 6.12.6
   resolution: "ajv@npm:6.12.6"
@@ -2244,18 +2283,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"ajv@npm:^8.0.0, ajv@npm:^8.17.1":
-  version: 8.17.1
-  resolution: "ajv@npm:8.17.1"
-  dependencies:
-    fast-deep-equal: "npm:^3.1.3"
-    fast-uri: "npm:^3.0.1"
-    json-schema-traverse: "npm:^1.0.0"
-    require-from-string: "npm:^2.0.2"
-  checksum: 10/ee3c62162c953e91986c838f004132b6a253d700f1e51253b99791e2dbfdb39161bc950ebdc2f156f8568035bb5ed8be7bd78289cd9ecbf3381fe8f5b82e3f33
-  languageName: node
-  linkType: hard
-
 "ansi-escapes@npm:^7.0.0":
   version: 7.2.0
   resolution: "ansi-escapes@npm:7.2.0"
@@ -2773,16 +2800,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"cors@npm:^2.8.5":
-  version: 2.8.6
-  resolution: "cors@npm:2.8.6"
-  dependencies:
-    object-assign: "npm:^4"
-    vary: "npm:^1"
-  checksum: 10/aa7174305b21ceb90f9c84f4eaa32f04432d333addbfdc0d1eb7310393c48902e5364aada5ac2f5d054528d63b3179238444475426fcb74e1e345077de485727
-  languageName: node
-  linkType: hard
-
 "cosmiconfig@npm:^7.1.0":
   version: 7.1.0
   resolution: "cosmiconfig@npm:7.1.0"
@@ -2796,6 +2813,23 @@ __metadata:
   languageName: node
   linkType: hard
 
+"cosmiconfig@npm:^9.0.0":
+  version: 9.0.1
+  resolution: "cosmiconfig@npm:9.0.1"
+  dependencies:
+    env-paths: "npm:^2.2.1"
+    import-fresh: "npm:^3.3.0"
+    js-yaml: "npm:^4.1.0"
+    parse-json: "npm:^5.2.0"
+  peerDependencies:
+    typescript: ">=4.9.5"
+  peerDependenciesMeta:
+    typescript:
+      optional: true
+  checksum: 10/89fcac84d062f0710091bb2d6a6175bcde22f5448877db9c43429694408191d3d4e215193b3ac4d54f7f89ef188d55cd481c7a2295b0dc572e65b528bf6fec01
+  languageName: node
+  linkType: hard
+
 "create-require@npm:^1.1.0":
   version: 1.1.1
   resolution: "create-require@npm:1.1.1"
@@ -2803,7 +2837,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"cross-spawn@npm:^7.0.3, cross-spawn@npm:^7.0.5, cross-spawn@npm:^7.0.6":
+"cross-spawn@npm:^7.0.3, cross-spawn@npm:^7.0.6":
   version: 7.0.6
   resolution: "cross-spawn@npm:7.0.6"
   dependencies:
@@ -3045,7 +3079,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"env-paths@npm:^2.2.0":
+"env-paths@npm:^2.2.0, env-paths@npm:^2.2.1":
   version: 2.2.1
   resolution: "env-paths@npm:2.2.1"
   checksum: 10/65b5df55a8bab92229ab2b40dad3b387fad24613263d103a97f91c9fe43ceb21965cd3392b1ccb5d77088021e525c4e0481adb309625d0cb94ade1d1fb8dc17e
@@ -3623,22 +3657,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"eventsource-parser@npm:^3.0.0, eventsource-parser@npm:^3.0.1":
-  version: 3.0.6
-  resolution: "eventsource-parser@npm:3.0.6"
-  checksum: 10/febf7058b9c2168ecbb33e92711a1646e06bd1568f60b6eb6a01a8bf9f8fcd29cc8320d57247059cacf657a296280159f21306d2e3ff33309a9552b2ef889387
-  languageName: node
-  linkType: hard
-
-"eventsource@npm:^3.0.2":
-  version: 3.0.7
-  resolution: "eventsource@npm:3.0.7"
-  dependencies:
-    eventsource-parser: "npm:^3.0.1"
-  checksum: 10/e034915bc97068d1d38617951afd798e6776d6a3a78e36a7569c235b177c7afc2625c9fe82656f7341ab72c7eeecb3fd507b7f88e9328f2448872ff9c4742bb6
-  languageName: node
-  linkType: hard
-
 "execa@npm:^5.1.1":
   version: 5.1.1
   resolution: "execa@npm:5.1.1"
@@ -3679,17 +3697,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"express-rate-limit@npm:^8.2.1":
-  version: 8.2.1
-  resolution: "express-rate-limit@npm:8.2.1"
-  dependencies:
-    ip-address: "npm:10.0.1"
-  peerDependencies:
-    express: ">= 4.11"
-  checksum: 10/7cbf70df2e88e590e463d2d8f93380775b2ea181d97f2c50c2ff9f2c666c247f83109a852b21d9c99ccc5762119101f281f54a27252a2f1a0a918be6d71f955b
-  languageName: node
-  linkType: hard
-
 "express@npm:^5.2.1":
   version: 5.2.1
   resolution: "express@npm:5.2.1"
@@ -3754,13 +3761,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"fast-uri@npm:^3.0.1":
-  version: 3.1.0
-  resolution: "fast-uri@npm:3.1.0"
-  checksum: 10/818b2c96dc913bcf8511d844c3d2420e2c70b325c0653633f51821e4e29013c2015387944435cd0ef5322c36c9beecc31e44f71b257aeb8e0b333c1d62bb17c2
-  languageName: node
-  linkType: hard
-
 "fdir@npm:^6.4.4, fdir@npm:^6.5.0":
   version: 6.5.0
   resolution: "fdir@npm:6.5.0"
@@ -4166,13 +4166,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"hono@npm:^4.11.4":
-  version: 4.11.7
-  resolution: "hono@npm:4.11.7"
-  checksum: 10/16f5a715f70430bd4050b250207adf7c567774c1d91386d5454577fbc191fc4a50b912628845ce8392fae0e3fd9f364a947412961e3747a9f0b2f714790b738e
-  languageName: node
-  linkType: hard
-
 "hosted-git-info@npm:^9.0.0":
   version: 9.0.2
   resolution: "hosted-git-info@npm:9.0.2"
@@ -4275,7 +4268,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"import-fresh@npm:^3.2.1":
+"import-fresh@npm:^3.2.1, import-fresh@npm:^3.3.0":
   version: 3.3.1
   resolution: "import-fresh@npm:3.3.1"
   dependencies:
@@ -4313,13 +4306,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"ip-address@npm:10.0.1":
-  version: 10.0.1
-  resolution: "ip-address@npm:10.0.1"
-  checksum: 10/09731acda32cd8e14c46830c137e7e5940f47b36d63ffb87c737331270287d631cf25aa95570907a67d3f919fdb25f4470c404eda21e62f22e0a55927f4dd0fb
-  languageName: node
-  linkType: hard
-
 "ip-address@npm:^10.0.1":
   version: 10.1.0
   resolution: "ip-address@npm:10.1.0"
@@ -4496,13 +4482,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"jose@npm:^6.1.3":
-  version: 6.1.3
-  resolution: "jose@npm:6.1.3"
-  checksum: 10/9626c51e8c3792b505e954f3094698c182208617b62dfb27269230f31e57560b083985ed8128b8a9753aa92daf18d3a2341cc826d149503f14569abe87d42389
-  languageName: node
-  linkType: hard
-
 "js-tokens@npm:^4.0.0":
   version: 4.0.0
   resolution: "js-tokens@npm:4.0.0"
@@ -4529,7 +4508,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"js-yaml@npm:^4.1.1":
+"js-yaml@npm:^4.1.0, js-yaml@npm:^4.1.1":
   version: 4.1.1
   resolution: "js-yaml@npm:4.1.1"
   dependencies:
@@ -4584,20 +4563,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"json-schema-traverse@npm:^1.0.0":
-  version: 1.0.0
-  resolution: "json-schema-traverse@npm:1.0.0"
-  checksum: 10/02f2f466cdb0362558b2f1fd5e15cce82ef55d60cd7f8fa828cf35ba74330f8d767fcae5c5c2adb7851fa811766c694b9405810879bc4e1ddd78a7c0e03658ad
-  languageName: node
-  linkType: hard
-
-"json-schema-typed@npm:^8.0.2":
-  version: 8.0.2
-  resolution: "json-schema-typed@npm:8.0.2"
-  checksum: 10/fa866d1fe91e3a94aa4fe007861475cd03dcaf47b719861cab171ef2f8598478007c634d29ae45de94ee34ddff4e13414c63ea5ff06c5b868b613142c699d511
-  languageName: node
-  linkType: hard
-
 "json-stable-stringify-without-jsonify@npm:^1.0.1":
   version: 1.0.1
   resolution: "json-stable-stringify-without-jsonify@npm:1.0.1"
@@ -5214,7 +5179,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"object-assign@npm:^4, object-assign@npm:^4.0.1":
+"object-assign@npm:^4.0.1":
   version: 4.1.1
   resolution: "object-assign@npm:4.1.1"
   checksum: 10/fcc6e4ea8c7fe48abfbb552578b1c53e0d194086e2e6bbbf59e0a536381a292f39943c6e9628af05b5528aa5e3318bb30d6b2e53cadaf5b8fe9e12c4b69af23f
@@ -5330,7 +5295,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"parse-json@npm:^5.0.0":
+"parse-json@npm:^5.0.0, parse-json@npm:^5.2.0":
   version: 5.2.0
   resolution: "parse-json@npm:5.2.0"
   dependencies:
@@ -5476,13 +5441,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"pkce-challenge@npm:^5.0.0":
-  version: 5.0.1
-  resolution: "pkce-challenge@npm:5.0.1"
-  checksum: 10/51d11f68d5a78617cfb2e9c2706dadcc2cbe55ffb55b21d42a6ed848ac5159db2657bf6c966a5a414119aa839ceb64240afea35e9e1c06946b57606ed0b43789
-  languageName: node
-  linkType: hard
-
 "playwright-core@npm:1.58.1":
   version: 1.58.1
   resolution: "playwright-core@npm:1.58.1"
@@ -5630,7 +5588,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"raw-body@npm:^3.0.0, raw-body@npm:^3.0.1":
+"raw-body@npm:^3.0.1":
   version: 3.0.2
   resolution: "raw-body@npm:3.0.2"
   dependencies:
@@ -5691,13 +5649,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"require-from-string@npm:^2.0.2":
-  version: 2.0.2
-  resolution: "require-from-string@npm:2.0.2"
-  checksum: 10/839a3a890102a658f4cb3e7b2aa13a1f80a3a976b512020c3d1efc418491c48a886b6e481ea56afc6c4cb5eef678f23b2a4e70575e7534eccadf5e30ed2e56eb
-  languageName: node
-  linkType: hard
-
 "require-package-name@npm:^2.0.1":
   version: 2.0.1
   resolution: "require-package-name@npm:2.0.1"
@@ -6649,6 +6600,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"undici-types@npm:~7.18.0":
+  version: 7.18.2
+  resolution: "undici-types@npm:7.18.2"
+  checksum: 10/e61a5918f624d68420c3ca9d301e9f15b61cba6e97be39fe2ce266dd6151e4afe424d679372638826cb506be33952774e0424141200111a9857e464216c009af
+  languageName: node
+  linkType: hard
+
 "unicode-emoji-modifier-base@npm:^1.0.0":
   version: 1.0.0
   resolution: "unicode-emoji-modifier-base@npm:1.0.0"
@@ -6857,7 +6815,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"vary@npm:^1, vary@npm:^1.1.2":
+"vary@npm:^1.1.2":
   version: 1.1.2
   resolution: "vary@npm:1.1.2"
   checksum: 10/31389debef15a480849b8331b220782230b9815a8e0dbb7b9a8369559aed2e9a7800cd904d4371ea74f4c3527db456dc8e7ac5befce5f0d289014dbdf47b2242
@@ -7258,16 +7216,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"zod-to-json-schema@npm:^3.25.1":
-  version: 3.25.1
-  resolution: "zod-to-json-schema@npm:3.25.1"
-  peerDependencies:
-    zod: ^3.25 || ^4
-  checksum: 10/744dd370f4452c8db120de1475ea4d484a11df884c4636111d630e5e1351b8a7590d99cf14a2b9f21e7906f8b78721d958663a7973a40994e7d28770876674cc
-  languageName: node
-  linkType: hard
-
-"zod@npm:^3.25 || ^4.0, zod@npm:^4.3.5":
+"zod@npm:^4.3.5":
   version: 4.3.6
   resolution: "zod@npm:4.3.6"
   checksum: 10/25fc0f62e01b557b4644bf0b393bbaf47542ab30877c37837ea8caf314a8713d220c7d7fe51f68ffa72f0e1018ddfa34d96f1973d23033f5a2a5a9b6b9d9da01