diff --git a/README.md b/README.md index ef9fbea..a36e491 100644 --- a/README.md +++ b/README.md @@ -1,219 +1,312 @@ # @metamask/client-mcp-core -MCP (Model Context Protocol) server for MetaMask Extension visual testing with LLM agents. +HTTP daemon and CLI architecture for agent-driven browser extension testing with Playwright. ## Overview -This package provides the core MCP server infrastructure for enabling LLM agents to interact with the MetaMask browser extension through Playwright. +This package provides the core infrastructure for enabling LLM agents to interact with browser extensions through Playwright. It ships a persistent HTTP daemon that manages browser lifecycle and a unified `mm` CLI that agents (and developers) use to drive sessions. + +The design is **consumer-agnostic**: the core handles protocol, tooling, and knowledge — consumers provide extension-specific logic by implementing the `ISessionManager` interface and injecting capabilities. + +``` + ┌─────────────────────────────────┐ + │ LLM Agent / Dev │ + └────────────┬────────────────────┘ + │ mm CLI commands + ▼ + ┌─────────────────────────────────┐ + │ mm CLI (src/cli/mm.ts) │ + │ discover / auto-start daemon │ + └────────────┬────────────────────┘ + │ HTTP (127.0.0.1) + ▼ + ┌───────────────────────────────────────────────────────────────────┐ + │ HTTP Daemon (createServer) │ + │ │ + │ ┌──────────┐ ┌──────────────┐ ┌────────────┐ ┌────────────┐ │ + │ │ Routes │ │ RequestQueue │ │ Tool │ │ Knowledge │ │ + │ │ /health │ │ (async mutex)│ │ Registry │ │ Store │ │ + │ │ /status │ │ │ │ 25+ tools │ │ │ │ + │ │ /launch │ └──────────────┘ └─────┬──────┘ └────────────┘ │ + │ │ /cleanup │ │ │ + │ │ /tool/:n │ ▼ │ + │ └──────────┘ ┌──────────────────┐ │ + │ │ ToolContext │ │ + │ │ sessionManager │ │ + │ │ page / refMap │ │ + │ │ workflowContext │ │ + │ │ knowledgeStore │ │ + │ └────────┬─────────┘ │ + └──────────────────────────────────────┼───────────────────────────┘ + │ + ┌─────────────────────┼─────────────────────┐ + │ ISessionManager │ + │ (consumer implementation) │ + │ │ + │ Session lifecycle Page management │ + │ Extension state A11y reference map │ + │ Navigation Screenshots │ + │ Capabilities (opt) Environment config │ + └─────────────────────┬─────────────────────┘ + │ + ┌─────────────────────┼─────────────────────┐ + │ WorkflowContext │ + │ │ + │ build? fixture? │ + │ chain? contractSeeding? │ + │ stateSnapshot? mockServer? │ + │ config: EnvironmentConfig │ + └─────────────────────┬─────────────────────┘ + │ + ▼ + ┌───────────────────────────────────────────┐ + │ Playwright → Chrome Browser │ + │ Browser Extension │ + └───────────────────────────────────────────┘ +``` ## Requirements -- **Node.js ^20 || ^22 || >=24** (required) -- **TypeScript >=5.0** (for consumer type definitions) -- Playwright `^1.49.0` (peer dependency) +- **Node.js** `^20 || ^22 || >=24` +- **TypeScript** `>=5.0` (for consumer type definitions) +- **Playwright** `^1.49.0` (peer dependency) ## Installation +As a project dependency (the CLI is available via `npx mm` or `yarn mm`): + ```bash yarn add @metamask/client-mcp-core ``` -## Architecture +As a global CLI (puts `mm` directly on your PATH — recommended for LLM agents): + +```bash +npm install -g @metamask/client-mcp-core +``` + +The global CLI can target any project via `--project` or `MM_PROJECT` (see [Project Targeting](#project-targeting)). + +## Getting Started -### High-Level Overview +Consuming this package requires two things: a **daemon entry point** and a **configuration file**. + +### 1. Create a daemon entry point + +```typescript +// daemon.ts +import { createServer, allocatePort } from '@metamask/client-mcp-core'; +import { MySessionManager } from './my-session-manager'; +import { createMyContext } from './my-context'; + +const server = createServer({ + sessionManager: new MySessionManager(), + contextFactory: async () => { + // Consumer owns port allocation — use the allocatePort() helper + // or any other strategy that fits your infrastructure. + const anvil = await allocatePort(); + const fixture = await allocatePort(); + await Promise.all([ + new Promise((r) => anvil.server.close(() => r())), + new Promise((r) => fixture.server.close(() => r())), + ]); + + return createMyContext({ + ports: { anvil: anvil.port, fixture: fixture.port }, + }); + }, +}); +server.start().then((state) => { + console.error(`Daemon started on port ${state.port}`); +}); ``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ LLM Agent │ -│ (Claude, GPT, etc.) │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - │ MCP Protocol (stdio) - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ @metamask/client-mcp-core │ -│ │ -│ Core MCP Server + Generic Tools │ -│ - Session management │ -│ - Element interaction (click, type, wait) │ -│ - Discovery (testIds, accessibility tree) │ -│ - Screenshots │ -│ - Knowledge store (cross-session learning) │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - │ Capability Injection - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ MetaMask Extension Provider │ -│ │ -│ - Build capability (yarn build:test) │ -│ - Fixture/state management │ -│ - Anvil blockchain integration │ -│ - Contract seeding │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - │ Playwright - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ Headed Chrome Browser │ -│ + MetaMask Extension │ -└─────────────────────────────────────────────────────────────────────────┘ + +### 2. Create a configuration file + +Create `mm-client-cli.config.ts` in your project root: + +```typescript +export default { + daemon: 'path/to/daemon.ts', + runtime: 'tsx', +}; ``` -### Detailed Architecture +The `daemon` field tells the CLI where the daemon entry point lives. The `runtime` field specifies the TypeScript runner (defaults to `tsx`). -The package follows a **capability-based dependency injection** pattern that separates concerns between: +The CLI uses [cosmiconfig](https://github.com/cosmiconfig/cosmiconfig) for config discovery, so you can also use `mm-client-cli.config.js`, `.mm-client-clirc.json`, or other supported formats. -1. **Core MCP Server** - Protocol handling, tool routing, and generic browser interactions -2. **Session Manager Interface** - Abstract contract for extension-specific session management -3. **Capabilities** - Optional features injected by consumer implementations +### 3. Use the CLI +```bash +mm launch # auto-starts daemon, opens browser session +mm describe-screen # get element references +mm click e3 # interact using a11y refs +mm cleanup --shutdown # stop browser and daemon ``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ createMcpServer() │ -│ │ -│ ┌─────────────────────┐ ┌─────────────────────────────────────┐ │ -│ │ Tool Definitions │───▶│ Tool Handlers │ │ -│ │ (mm_click, etc.) │ │ (registry.ts + individual tools) │ │ -│ └─────────────────────┘ └──────────────┬──────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ ISessionManager Interface │ │ -│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌───────────┐ │ │ -│ │ │ Page Mgmt │ │ Navigation │ │ Screenshots │ │ A11y Refs │ │ │ -│ │ └─────────────┘ └─────────────┘ └─────────────┘ └───────────┘ │ │ -│ │ ┌──────────────────────────────────────────────────────────┐ │ │ -│ │ │ Optional Capabilities │ │ │ -│ │ │ • BuildCapability • FixtureCapability │ │ │ -│ │ │ • ChainCapability • ContractSeedingCapability │ │ │ -│ │ │ • StateSnapshotCapability │ │ │ -│ │ └──────────────────────────────────────────────────────────┘ │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - │ setSessionManager() - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ Consumer Implementation (e.g., MetaMask) │ -│ │ -│ class MetaMaskSessionManager implements ISessionManager { │ -│ // Browser context, page tracking, extension-specific logic │ -│ // Capability implementations for build, fixtures, chain, etc. │ -│ } │ -└─────────────────────────────────────────────────────────────────────────┘ + +If running from outside the project directory (e.g., a parent folder containing multiple repos): + +```bash +mm --project ./my-extension launch +mm --project ./my-extension describe-screen + +# Or set once via environment variable +export MM_PROJECT=/path/to/my-extension +mm launch ``` -### Core Components +## Core Concepts -| Component | Description | -| --------------------- | -------------------------------------------------------------- | -| `createMcpServer()` | Factory function that creates the MCP server instance | -| `ISessionManager` | Interface that consumers must implement for session management | -| `setSessionManager()` | Injects the consumer's session manager into the core | -| `WorkflowContext` | Container for browser capability and optional capabilities | -| `EnvironmentConfig` | Configuration discriminated by `'e2e'` or `'prod'` mode | +### Daemon Model -### Capability System +The architecture relies on a persistent background HTTP daemon that manages the browser lifecycle: -The package defines several capabilities that consumers can provide. +- **Worktree Isolation**: Each git worktree runs its own daemon instance, tracked via a `.mm-server` state file in the project root. This allows parallel work across branches. +- **Port Allocation**: The daemon allocates its own HTTP port automatically. Sub-service ports (Anvil, fixture server, etc.) are allocated by the consumer's `contextFactory` and reported back via `allocatedPorts`. The `allocatePort()` helper is exported for convenience. +- **Auto-Start**: The daemon starts automatically on `mm launch` if not already running, and shuts down after a period of inactivity (default: 30 minutes). +- **Request Serialization**: A `RequestQueue` (async mutex) ensures only one tool executes at a time, preventing race conditions on shared browser state. +- **Health Checks**: Each daemon generates a unique nonce on startup. The CLI verifies daemon identity via `GET /health` to detect stale `.mm-server` files from crashed processes. +- **Logs**: Daemon activity is logged to `.mm-daemon.log`. -#### BuildCapability (Optional) +### Session Manager Interface -Enables the `mm_build` tool. Implement this to allow LLM agents to build the extension from source. +`ISessionManager` is the core abstraction boundary between this package and consumer implementations. Consumers must implement this interface to provide extension-specific browser control. ```typescript -type BuildCapability = { - // Build the extension (e.g., yarn build:test) - build(options?: BuildOptions): Promise; +type ISessionManager = { + // Session Lifecycle + hasActiveSession(): boolean; + getSessionId(): string | undefined; + launch(input: SessionLaunchInput): Promise; + cleanup(): Promise; - // Get path to built extension directory - getExtensionPath(): string; + // Page Management + getPage(): Page; + setActivePage(page: Page): void; + getTrackedPages(): TrackedPage[]; + classifyPageRole(page: Page, extensionId?: string): TabRole; + getContext(): BrowserContext; - // Check if extension is already built - isBuilt(): Promise; -}; + // Extension State + getExtensionState(): Promise; + + // A11y Reference Map + setRefMap(map: Map): void; + getRefMap(): Map; + resolveA11yRef(ref: string): string | undefined; -type BuildOptions = { - buildType?: string; // e.g., "build:test" - force?: boolean; // Force rebuild even if exists + // Navigation + navigateToHome(): Promise; + navigateToSettings(): Promise; + navigateToUrl(url: string): Promise; + navigateToNotification(): Promise; + waitForNotificationPage(timeoutMs: number): Promise; + + // Screenshots + screenshot(options: SessionScreenshotOptions): Promise; + + // Capabilities (optional, extension-specific) + getBuildCapability(): BuildCapability | undefined; + getFixtureCapability(): FixtureCapability | undefined; + getChainCapability(): ChainCapability | undefined; + getContractSeedingCapability(): ContractSeedingCapability | undefined; + getStateSnapshotCapability(): StateSnapshotCapability | undefined; + + // Environment + getEnvironmentMode(): EnvironmentMode; + setContext(context: 'e2e' | 'prod', options?: Record): void; + getContextInfo(): { currentContext: 'e2e' | 'prod'; ... }; }; +``` -type BuildResult = { - success: boolean; - extensionPath: string; - durationMs: number; - error?: string; +### Workflow Context & Capabilities + +The `WorkflowContext` aggregates optional capabilities that consumers inject through the `contextFactory`. The tool system checks for capabilities at runtime — tools that depend on missing capabilities return clear errors. + +```typescript +type WorkflowContext = { + build?: BuildCapability; + fixture?: FixtureCapability; + chain?: ChainCapability; + contractSeeding?: ContractSeedingCapability; + stateSnapshot?: StateSnapshotCapability; + mockServer?: MockServerCapability; + config: EnvironmentConfig; + allocatedPorts?: PortMap; // reported to /status and persisted in .mm-server }; ``` ---- +Capabilities are created by the consumer's `contextFactory` function. The factory is responsible for allocating any sub-service ports it needs (the `allocatePort()` helper is exported for convenience): -#### FixtureCapability (Optional) +```typescript +async function createMyContext(options: { + ports: { anvil: number; fixture: number }; +}): Promise { + return { + build: new MyBuildCapability(), + fixture: new MyFixtureCapability(options.ports.fixture), + chain: new MyChainCapability(options.ports.anvil), + allocatedPorts: { + anvil: options.ports.anvil, + fixture: options.ports.fixture, + }, + config: { + environment: 'e2e', + extensionName: 'MyExtension', + defaultPassword: 'test-password', + artifactsDir: './test-artifacts', + defaultChainId: 1337, + ports: { + anvil: options.ports.anvil, + fixtureServer: options.ports.fixture, + }, + }, + }; +} +``` -Enables wallet state management through fixtures. Essential for E2E testing where you need reproducible wallet states. +### Capability Reference + +| Capability | Purpose | Enables Tools | +| --------------------------- | --------------------------------------- | --------------------------------------------------------------------------- | +| `BuildCapability` | Build extension from source | `build` | +| `FixtureCapability` | Manage wallet state via fixtures | `launch` (state modes) | +| `ChainCapability` | Local blockchain (Anvil) lifecycle | Chain interactions | +| `ContractSeedingCapability` | Deploy smart contracts to Anvil | `seed_contract`, `seed_contracts`, `get_contract_address`, `list_contracts` | +| `StateSnapshotCapability` | Read extension state and detect screens | `get_state` | +| `MockServerCapability` | HTTP mock server for API stubbing | Mock-dependent tests | + +Each capability interface is defined in `src/capabilities/types.ts`: ```typescript +type BuildCapability = { + build(options?: BuildOptions): Promise; + getExtensionPath(): string; + isBuilt(): Promise; +}; + type FixtureCapability = { - // Start fixture server with given wallet state start(state: WalletState): Promise; - - // Stop fixture server stop(): Promise; - - // Get default pre-onboarded wallet state (25 ETH, unlocked) getDefaultState(): WalletState; - - // Get fresh onboarding state (no wallet configured) getOnboardingState(): WalletState; - - // Resolve a named preset to fixture data resolvePreset(presetName: string): WalletState; }; -type WalletState = { - data: Record; // Extension storage state - meta?: { version: number }; -}; -``` - ---- - -#### ChainCapability (Optional) - -Manages local blockchain (Anvil) for E2E testing. Required for contract interactions. - -```typescript type ChainCapability = { - // Start the local Anvil node start(): Promise; - - // Stop the Anvil node stop(): Promise; - - // Check if Anvil is running isRunning(): boolean; - - // Set the port for the Anvil node setPort(port: number): void; }; -``` - ---- -#### ContractSeedingCapability (Optional) - -Enables smart contract deployment tools (`mm_seed_contract`, `mm_seed_contracts`, etc.). - -```typescript type ContractSeedingCapability = { - // Deploy a single contract deployContract( name: string, options?: DeployOptions, ): Promise; - - // Deploy multiple contracts in sequence deployContracts( names: string[], options?: DeployOptions, @@ -221,1264 +314,317 @@ type ContractSeedingCapability = { deployed: ContractDeployment[]; failed: { name: string; error: string }[]; }>; - - // Get deployed contract address by name getContractAddress(name: string): string | null; - - // List all deployed contracts in this session listDeployedContracts(): ContractInfo[]; - - // Get available contract names getAvailableContracts(): string[]; - - // Clear the deployment registry clearRegistry(): void; - - // Initialize the capability (called during session launch) initialize(): void; }; -type DeployOptions = { - hardfork?: string; // EVM hardfork (default: "prague") - deployerOptions?: { - fromAddress?: string; // Impersonate address - fromPrivateKey?: string; // Deploy from specific key - }; -}; -``` - ---- - -#### StateSnapshotCapability (Optional) - -```typescript type StateSnapshotCapability = { - // Get detailed state snapshot getState(page: Page, options: StateOptions): Promise; - - // Detect current screen from page content detectCurrentScreen(page: Page): Promise; }; -type StateOptions = { - extensionId?: string; - chainId?: number; -}; -``` - ---- - -#### MockServerCapability (Optional) - -Enables mock server for API testing scenarios. - -```typescript type MockServerCapability = { - // Start the mock server start(): Promise; - - // Stop the mock server stop(): Promise; - - // Check if mock server is running isRunning(): boolean; - - // Get the server instance getServer(): unknown; - - // Get the port the server is running on getPort(): number; }; ``` -## Client Integration - -### How to Consume the Package - -Consumers must: - -1. **Implement `ISessionManager`** - The core interface for session management -2. **Inject the session manager** - Call `setSessionManager()` before starting the server -3. **Start the MCP server** - Call `server.start()` - -### McpServerConfig - -The `createMcpServer()` function accepts a configuration object: +### Tool System -```typescript -export type McpServerConfig = { - name: string; - version: string; - onCleanup?: () => Promise; - logger?: (message: string) => void; -}; -``` - -### Minimal Integration Example +Tools are standalone functions registered in a central `toolRegistry`. Each tool receives a `ToolContext` and returns a `ToolResponse`. ```typescript -import { - createMcpServer, - setSessionManager, - ISessionManager, - type McpServerConfig, -} from '@metamask/client-mcp-core'; - -// 1. Implement the ISessionManager interface -class MyExtensionSessionManager implements ISessionManager { - // ... implement all required methods - // See ISessionManager interface for full contract -} - -// 2. Create and inject your session manager -const sessionManager = new MyExtensionSessionManager(); -setSessionManager(sessionManager); +type ToolFunction = ( + params: TParams, + context: ToolContext, +) => Promise>; -// 3. Create and start the MCP server -const config: McpServerConfig = { - name: 'my-extension-mcp', - version: '1.0.0', - onCleanup: async () => { - // Optional cleanup logic - }, +type ToolContext = { + sessionManager: ISessionManager; + page: Page; + refMap: Map; + workflowContext: WorkflowContext; + knowledgeStore: KnowledgeStore; }; - -const server = createMcpServer(config); -await server.start(); ``` -### Full Integration Example - -```typescript -import { - createMcpServer, - setSessionManager, - ISessionManager, - SessionLaunchInput, - SessionLaunchResult, - TrackedPage, - type ExtensionState, - type BuildCapability, - type FixtureCapability, - type ChainCapability, - type ContractSeedingCapability, - type EnvironmentMode, -} from '@metamask/client-mcp-core'; -import type { Page, BrowserContext } from '@playwright/test'; - -class MetaMaskSessionManager implements ISessionManager { - private context?: BrowserContext; - private activePage?: Page; - private extensionId?: string; - private sessionId?: string; - private refMap = new Map(); - - // Capabilities (inject via constructor or lazy-load) - private buildCapability?: BuildCapability; - private fixtureCapability?: FixtureCapability; - private chainCapability?: ChainCapability; - private contractSeedingCapability?: ContractSeedingCapability; - - // Session Lifecycle - hasActiveSession(): boolean { - return this.context !== undefined; - } - - getSessionId(): string | undefined { - return this.sessionId; - } - - async launch(input: SessionLaunchInput): Promise { - // 1. Start local chain if needed - if (this.chainCapability) { - await this.chainCapability.start(); - } - - // 2. Start fixture server if needed - if (this.fixtureCapability && input.stateMode !== 'onboarding') { - const fixture = input.fixture ?? this.fixtureCapability.getDefaultState(); - await this.fixtureCapability.start(fixture); - } - - // 3. Launch browser with extension - // ... Playwright browser launch logic - - // 4. Return session info - return { - sessionId: this.sessionId!, - extensionId: this.extensionId!, - state: await this.getExtensionState(), - }; - } - - async cleanup(): Promise { - if (!this.hasActiveSession()) return false; - - // Close browser, stop services - await this.context?.close(); - await this.chainCapability?.stop(); - await this.fixtureCapability?.stop(); - - this.context = undefined; - this.activePage = undefined; - return true; - } - - // Page Management - getPage(): Page { - if (!this.activePage) throw new Error('No active session'); - return this.activePage; - } - - setActivePage(page: Page): void { - this.activePage = page; - } - - getTrackedPages(): TrackedPage[] { - // Return all tracked pages with roles - return []; - } +The daemon routes `POST /tool/:name` requests through the registry, applies Zod validation on inputs, executes the tool through the request queue, and captures observations (extension state, test IDs, a11y snapshot) after each execution. - getContext(): BrowserContext { - if (!this.context) throw new Error('No active session'); - return this.context; - } +**Registered tools:** - // Extension State - async getExtensionState(): Promise { - // Query extension for current state - return { - isLoaded: true, - currentUrl: this.activePage?.url() ?? '', - extensionId: this.extensionId ?? '', - isUnlocked: false, - currentScreen: 'unknown', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - } +| Tool | Description | +| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Lifecycle** | | +| `build` | Triggers an extension build using the configured `BuildCapability`. Accepts build type and force options. | +| `launch` | Launches a new browser session with the configured extension. Supports state modes (`default`, `onboarding`, `custom`), fixture presets, goal/tag metadata, and optional contract seeding on start. | +| `cleanup` | Tears down the active browser session and cleans up all resources (browser, services, fixtures). | +| **Interaction** | | +| `click` | Clicks an element identified by a11y ref, test ID, or CSS selector. Waits for the element to be visible before clicking. Supports `within` to scope the target inside a parent element. | +| `type` | Types text into an input element identified by a11y ref, test ID, or CSS selector. Clears the field first, then sets the new value (uses Playwright's `fill()`). Supports `within` scoping. | +| `wait_for` | Waits for an element to become visible on the page within a configurable timeout. Supports `within` to scope the target inside a parent element. | +| `get_text` | Reads the text content of an element identified by a11y ref, test ID, or CSS selector. Returns the text, target descriptor, and character length. Supports `within` scoping. Categorized as read-only (no observations in response). | +| `clipboard` | Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses. | +| **Navigation** | | +| `navigate` | Navigates the browser to a named screen (`home`, `settings`, `notification`) or an arbitrary URL. | +| `switch_to_tab` | Switches the active page to a tab matching a given role (e.g., `extension`, `dapp`) or URL prefix. | +| `close_tab` | Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed. | +| `wait_for_notification` | Waits for the extension notification popup to appear within a timeout. Returns the notification page URL. | +| **Discovery** | | +| `describe_screen` | Captures a comprehensive screen snapshot: extension state, visible test IDs, trimmed a11y tree with refs, optional screenshot, and prior knowledge from historical sessions. | +| `accessibility_snapshot` | Captures a trimmed accessibility tree of the current page with deterministic refs (`e1`, `e2`, ...). Supports scoping to a root CSS selector. | +| `list_testids` | Collects all visible `data-testid` attributes from the current page with text previews and visibility status. | +| **State** | | +| `get_state` | Retrieves the current extension state (URL, screen, network, balance, account) and tracked tab information. | +| `get_context` | Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed. | +| `set_context` | Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active. | +| **Screenshots** | | +| `screenshot` | Captures a screenshot of the current page. Supports naming, full-page capture, scoping to a CSS selector, and optional base64 output. | +| **Knowledge** | | +| `knowledge_last` | Retrieves the N most recent step records from the knowledge store, with optional scope and filter parameters. | +| `knowledge_search` | Searches step records by query string with token-based matching and synonym expansion. Scores results by relevance to screen, URL, test IDs, and a11y nodes. | +| `knowledge_summarize` | Generates a recipe-style summary of a session's tool invocations, showing the step sequence with targets and outcomes. | +| `knowledge_sessions` | Lists available knowledge sessions with metadata (goal, flow tags, timestamps), with optional filtering. | +| **Contracts** | | +| `seed_contract` | Deploys a single smart contract to the local Anvil chain by name. Requires `ContractSeedingCapability`. | +| `seed_contracts` | Deploys multiple smart contracts in sequence. Returns both successful deployments and individual failures. | +| `get_contract_address` | Looks up the deployed address of a contract by name from the session's deployment registry. | +| `list_contracts` | Lists all contracts deployed in the current session with addresses and deployment timestamps. | +| **Batching** | | +| `run_steps` | Executes a batch of tool invocations sequentially. Supports `stopOnError` to halt on first failure, `includeObservations` (`'all'`, `'none'`, `'failures'`) to control observations, and `batchTimeoutMs` to set an overall deadline (remaining steps are skipped on timeout). Accepts tool aliases like `navigate_home` / `navigate-home`. Returns per-step results with timing. | - // A11y Reference Map - setRefMap(map: Map): void { - this.refMap = map; - } +### Accessibility References - getRefMap(): Map { - return this.refMap; - } +The core uses Playwright's `ariaSnapshot()` to build a deterministic reference map of interactive elements. Each element gets a short ref like `e1`, `e2`, etc., mapped to an ARIA selector. - clearRefMap(): void { - this.refMap.clear(); - } - - resolveA11yRef(ref: string): string | undefined { - return this.refMap.get(ref); - } - - // Navigation - async navigateToHome(): Promise { - // Navigate to extension home page - } +Agents call `describe_screen` to get the current reference map, then use refs for interaction: - async navigateToSettings(): Promise { - // Navigate to extension settings page - } +``` +mm describe-screen → { ..., a11y: [{ ref: "e1", role: "button", name: "Submit" }, ...] } +mm click e1 → clicks the "Submit" button +mm type e3 "hello" → types into the element mapped to e3 +``` - async navigateToUrl(url: string): Promise { - // Open URL in new tab and return the page - return this.activePage!; - } +This accessibility-first approach provides reliable element targeting that survives minor UI changes. - async navigateToNotification(): Promise { - // Navigate to notification page - return this.activePage!; - } +### Knowledge Store - async waitForNotificationPage(timeoutMs: number): Promise { - // Wait for notification popup to appear - return this.activePage!; - } +The `KnowledgeStore` provides cross-session learning by recording every tool execution as a structured step record: - // Screenshots - async screenshot(options: { name: string; fullPage?: boolean }) { - // ... screenshot logic - return { path: '', base64: '', width: 0, height: 0 }; - } - - // Capabilities - getBuildCapability() { - return this.buildCapability; - } - getFixtureCapability() { - return this.fixtureCapability; - } - getChainCapability() { - return this.chainCapability; - } - getContractSeedingCapability() { - return this.contractSeedingCapability; - } - getStateSnapshotCapability() { - return undefined; - } +- **Step Recording**: Each tool invocation captures the tool name, input, outcome, observation (extension state, visible test IDs, a11y nodes), and timing. +- **Session Metadata**: Sessions are tagged with goals, flow tags, and free-form tags for filtering. +- **Prior Knowledge**: Before tool execution, the store can generate context from historical sessions — similar steps, suggested actions, and patterns to avoid — based on the current screen state. +- **Search**: Token-based search with synonym expansion across sessions, scored by relevance to screen, URL, test IDs, and a11y nodes. +- **Sensitive Data Handling**: Input text for password fields and other sensitive inputs is automatically redacted. - // Environment - getEnvironmentMode(): EnvironmentMode { - return 'e2e'; - } - - // Required by interface but implementation-specific - classifyPageRole( - page: Page, - ): 'extension' | 'notification' | 'dapp' | 'other' { - return 'extension'; - } - getSessionState() { - return undefined; - } - getSessionMetadata() { - return undefined; - } - - // Context Management - setContext(context: 'e2e' | 'prod', options?: Record): void { - if (this.hasActiveSession()) { - throw new Error('Cannot switch context while session is active'); - } - // Switch environment context and apply optional context-specific config - void options; - } - - getContextInfo() { - return { - currentContext: this.getEnvironmentMode(), - hasActiveSession: this.hasActiveSession(), - sessionId: this.sessionId ?? null, - capabilities: { - available: [ - this.buildCapability && 'build', - this.fixtureCapability && 'fixture', - this.chainCapability && 'chain', - this.contractSeedingCapability && 'contractSeeding', - ].filter(Boolean) as string[], - }, - canSwitchContext: !this.hasActiveSession(), - }; - } -} +Knowledge artifacts are stored on disk at `test-artifacts/llm-knowledge/` organized by session ID. -// Bootstrap the server -async function main() { - const sessionManager = new MetaMaskSessionManager(); - setSessionManager(sessionManager); +### Environment Modes - const server = createMcpServer({ - name: 'metamask-mcp', - version: '1.0.0', - }); +The package supports two environment modes via discriminated union configuration: - await server.start(); -} - -main().catch(console.error); -``` - -### Environment Configuration - -The package supports two environment modes: +**E2E Testing** — Full test infrastructure with local chain, fixtures, and contract seeding: ```typescript -// E2E Testing Environment const e2eConfig: E2EEnvironmentConfig = { environment: 'e2e', extensionName: 'MetaMask', defaultPassword: 'password123', - toolPrefix: 'mm', artifactsDir: './test-artifacts', defaultChainId: 1337, - ports: { - anvil: 8545, - fixtureServer: 12345, - }, + ports: { anvil: 8545, fixtureServer: 12345 }, }; +``` + +**Production-like** — Minimal configuration without test infrastructure: -// Production-like Environment +```typescript const prodConfig: ProdEnvironmentConfig = { environment: 'prod', extensionName: 'MetaMask', - toolPrefix: 'mm', }; ``` -### Context Switching Options +Use `set_context` / `get_context` tools to switch between modes at runtime (requires no active session). -`mm_set_context` supports an optional `options` payload that is forwarded to the session manager's `setContext(context, options)` implementation. +## Server Configuration -```typescript -type SetContextInput = { - context: 'e2e' | 'prod'; - options?: Record; -}; - -// Example: switch to e2e and pass context-specific overrides -await handleSetContext({ - context: 'e2e', - options: { - mockServer: { - enabled: true, - port: 18000, - }, - }, -}); -``` - -Use `options` only for context-specific configuration your `ISessionManager` implementation understands. - -### Custom Tool Definitions - -The package provides a fixed set of tools prefixed with `mm_`. Custom tool injection is currently not supported. You can inspect the available tool definitions using `getToolDefinitions()`: +The `createServer()` function accepts a `ServerConfig` object: ```typescript -import { getToolDefinitions } from '@metamask/client-mcp-core'; - -const tools = getToolDefinitions(); -console.log(`Available tools: ${tools.map((t) => t.name).join(', ')}`); -``` - -### Registering Custom Tool Handlers - -Custom tool handlers are not supported. The server uses a fixed set of handlers for the provided tools. - -## Available Tools - -All tools are prefixed with `mm_` and return a standardized response format: - -```typescript -type ToolResponse = - | { - ok: true; - meta: { - timestamp: string; // ISO timestamp - sessionId?: string; // Current session ID - durationMs: number; // Operation duration - }; - result: Result; // Success payload - } - | { - ok: false; - meta: { - timestamp: string; - sessionId?: string; - durationMs: number; - }; - error: { - code: string; - message: string; - details?: Record; - }; - }; -``` - ---- - -### Session Management Tools - -#### `mm_build` - -Build the extension from source. Requires `BuildCapability`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `buildType` | `"build:test"` | `"build:test"` | Build script to run | -| `force` | `boolean` | `false` | Force rebuild even if build exists | - -**Output:** - -```typescript -{ - buildType: 'build:test'; - extensionPathResolved: string; // Absolute path to built extension -} -``` - -**Example:** - -```json -{ "buildType": "build:test", "force": true } -``` - ---- - -#### `mm_launch` - -Launch a headed Chrome browser with the extension loaded. This is typically the first tool called. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `autoBuild` | `boolean` | `true` | Auto-build if extension not found | -| `stateMode` | `"default" \| "onboarding" \| "custom"` | `"default"` | Wallet initialization mode | -| `fixturePreset` | `string` | - | Named preset when `stateMode="custom"` | -| `fixture` | `object` | - | Direct fixture object when `stateMode="custom"` | -| `ports.anvil` | `number` | `8545` | Anvil RPC port | -| `ports.fixtureServer` | `number` | `12345` | Fixture server port | -| `slowMo` | `number` | `0` | Slow down actions (ms) for debugging | -| `extensionPath` | `string` | - | Custom extension directory path | -| `goal` | `string` | - | Session goal for knowledge store | -| `flowTags` | `string[]` | - | Flow categorization tags | -| `tags` | `string[]` | - | Free-form tags | -| `seedContracts` | `string[]` | - | Contracts to deploy on launch | - -**State Modes:** - -- `default` - Pre-onboarded wallet with 25 ETH, ready to use -- `onboarding` - Fresh state, requires wallet setup flow -- `custom` - Use provided fixture or preset - -**Output:** - -```typescript -{ - sessionId: string; // Unique session identifier - extensionId: string; // Extension's Chrome ID - state: ExtensionState; // Initial extension state - prerequisites?: [{ // Steps taken before launch - step: string; - description: string; - }]; -} -``` - -**Example:** - -```json -{ - "stateMode": "default", - "goal": "Test send flow", - "flowTags": ["send"], - "seedContracts": ["hst"] -} -``` - ---- - -#### `mm_cleanup` - -Stop the browser and all services (Anvil, fixture server). Always call when done. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `sessionId` | `string` | - | Optional session ID to clean up | - -**Output:** - -```typescript -{ - cleanedUp: boolean; // Whether cleanup was performed -} -``` - ---- - -### Discovery Tools - -#### `mm_get_state` - -Get current extension state including screen, balance, network, and account. - -**Input:** None - -**Output:** - -```typescript -{ - state: { - isLoaded: boolean; - currentUrl: string; - extensionId: string; - isUnlocked: boolean; - currentScreen: ScreenName; - accountAddress: string | null; - networkName: string | null; - chainId: number | null; - balance: string | null; - }; - tabs?: { - active: { role: TabRole; url: string }; - tracked: { role: TabRole; url: string }[]; - }; -} -``` - ---- - -#### `mm_list_testids` - -List all visible `data-testid` attributes on the current page. Use to discover interaction targets. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `limit` | `number` | `150` | Maximum items to return (1-500) | - -**Output:** - -```typescript -{ - items: [{ - testId: string; // The data-testid value - tag: string; // HTML tag (button, input, div, etc.) - text?: string; // Visible text content - visible: boolean; // Whether element is visible - }]; -} -``` - -**Example Output:** - -```json -{ - "items": [ - { - "testId": "account-menu-icon", - "tag": "button", - "text": "", - "visible": true - }, - { - "testId": "eth-overview-send", - "tag": "button", - "text": "Send", - "visible": true - }, - { - "testId": "token-balance", - "tag": "span", - "text": "25 ETH", - "visible": true - } - ] -} -``` - ---- - -#### `mm_accessibility_snapshot` - -Get a trimmed accessibility tree with deterministic refs (e1, e2, ...). Refs can be used with `mm_click` and `mm_type`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `rootSelector` | `string` | - | CSS selector to scope the snapshot | - -**Included Roles:** - -- **Actionable:** button, link, checkbox, radio, switch, textbox, combobox, menuitem -- **Important:** dialog, alert, status, heading - -**Output:** - -```typescript -{ - nodes: [{ - ref: string; // Deterministic ref (e1, e2, e3, ...) - role: string; // ARIA role - name: string; // Accessible name - disabled?: boolean; - checked?: boolean; - expanded?: boolean; - path: string[]; // Ancestor path for context - }]; -} -``` - -**Example Output:** - -```json -{ - "nodes": [ - { "ref": "e1", "role": "button", "name": "Send", "path": ["main", "div"] }, - { "ref": "e2", "role": "button", "name": "Swap", "path": ["main", "div"] }, - { "ref": "e3", "role": "textbox", "name": "Amount", "path": ["form"] } - ] -} -``` - ---- - -#### `mm_describe_screen` - -Comprehensive screen state combining extension state, testIds, and accessibility snapshot. Optionally includes screenshot. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `includeScreenshot` | `boolean` | `false` | Capture screenshot | -| `screenshotName` | `string` | - | Screenshot filename | -| `includeScreenshotBase64` | `boolean` | `false` | Include base64 in response | - -**Output:** - -```typescript -{ - state: ExtensionState; - testIds: { items: TestIdItem[] }; - a11y: { nodes: A11yNodeTrimmed[] }; - screenshot: { - path: string; - width: number; - height: number; - base64?: string; - } | null; - priorKnowledge?: PriorKnowledgeV1; // Past session hints -} -``` - ---- - -### Interaction Tools - -#### `mm_click` - -Click an element. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `a11yRef` | `string` | - | Accessibility ref from `mm_accessibility_snapshot` (e.g., "e5") | -| `testId` | `string` | - | `data-testid` attribute value | -| `selector` | `string` | - | CSS selector | -| `timeoutMs` | `number` | `15000` | Max wait time (0-60000) | - -**Output:** - -```typescript -{ - clicked: boolean; - target: string; // Resolved selector - pageClosedAfterClick?: boolean; // True if click caused page close -} -``` - -**Examples:** - -```json -{ "a11yRef": "e5" } -{ "testId": "confirm-btn" } -{ "selector": "button.primary" } -``` - ---- - -#### `mm_type` - -Type text into an input element. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `a11yRef` | `string` | - | Accessibility ref | -| `testId` | `string` | - | `data-testid` value | -| `selector` | `string` | - | CSS selector | -| `text` | `string` | **required** | Text to type | -| `timeoutMs` | `number` | `15000` | Max wait time | - -**Output:** - -```typescript -{ - typed: boolean; - target: string; - textLength: number; -} -``` - -**Example:** - -```json -{ "testId": "amount-input", "text": "0.5" } -``` - ---- - -#### `mm_wait_for` - -Wait for an element to become visible. Specify exactly ONE of: `a11yRef`, `testId`, or `selector`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `a11yRef` | `string` | - | Accessibility ref | -| `testId` | `string` | - | `data-testid` value | -| `selector` | `string` | - | CSS selector | -| `timeoutMs` | `number` | `15000` | Max wait time (100-120000) | - -**Output:** - -```typescript -{ - found: boolean; - target: string; -} -``` - ---- - -#### `mm_clipboard` - -Read from or write to the browser clipboard. Useful for pasting content (e.g., Secret Recovery Phrase) into components that support paste functionality. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `action` | `"write" \| "read"` | **required** | Clipboard action | -| `text` | `string` | - | Text to write (required when `action="write"`) | - -**Output:** - -```typescript -{ - action: "write" | "read"; - success: boolean; - text?: string; // Present when action="read" and successful -} -``` - -**Examples:** - -```json -{ "action": "write", "text": "word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11 word12" } -{ "action": "read" } -``` - -**Use Case - Fast SRP Entry:** - -``` -1. mm_clipboard { "action": "write", "text": "abandon abandon ... about" } -2. mm_click { "testId": "srp-input-import__paste-button" } -→ All 12 words populated instantly via paste -``` - ---- - -#### `mm_navigate` - -Navigate to a specific screen in the extension. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `screen` | `"home" \| "settings" \| "notification" \| "url"` | **required** | Target screen | -| `url` | `string` | - | Required when `screen="url"` | - -**Output:** - -```typescript -{ - navigated: boolean; - currentUrl: string; -} -``` - -**Examples:** - -```json -{ "screen": "home" } -{ "screen": "settings" } -{ "screen": "url", "url": "https://app.uniswap.org" } -``` - ---- - -### Multi-Tab Tools - -#### `mm_wait_for_notification` - -Wait for a notification popup to appear (e.g., after dApp interaction). Sets the notification page as active. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `timeoutMs` | `number` | `15000` | Max wait time (1000-60000) | - -**Output:** - -```typescript -{ - found: boolean; - pageUrl: string; -} -``` - ---- - -#### `mm_switch_to_tab` - -Switch the active page for subsequent interactions. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `role` | `"extension" \| "notification" \| "dapp" \| "other"` | - | Tab role to switch to | -| `url` | `string` | - | URL prefix to match | - -**Output:** - -```typescript -{ - switched: boolean; - activeTab: { - role: TabRole; - url: string; - } -} -``` - -**Example:** - -```json -{ "role": "dapp" } -{ "url": "https://app.uniswap.org" } -``` - ---- - -#### `mm_close_tab` - -Close a specific tab. Cannot close the extension home page. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `role` | `"notification" \| "dapp" \| "other"` | - | Tab role to close | -| `url` | `string` | - | URL prefix to match | - -**Output:** - -```typescript -{ - closed: boolean; - closedUrl: string; -} -``` - ---- - -### Screenshot Tools - -#### `mm_screenshot` - -Capture a screenshot and save to `test-artifacts/screenshots/`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `name` | `string` | **required** | Filename (without extension) | -| `fullPage` | `boolean` | `true` | Capture full page | -| `selector` | `string` | - | Capture specific element only | -| `includeBase64` | `boolean` | `false` | Include base64 in response | - -**Output:** - -```typescript -{ - path: string; // File path - width: number; - height: number; - base64?: string; // If includeBase64=true -} -``` - ---- - -### Smart Contract Tools - -#### `mm_seed_contract` - -Deploy a smart contract to the local Anvil node. Requires `ContractSeedingCapability`. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `contractName` | `string` | **required** | Contract to deploy (see list below) | -| `hardfork` | `string` | `"prague"` | EVM hardfork | -| `deployerOptions.fromAddress` | `string` | - | Impersonate address | -| `deployerOptions.fromPrivateKey` | `string` | - | Deploy from specific key | - -**Available Contracts:** -| Name | Description | -|------|-------------| -| `hst` | ERC-20 TST token | -| `nfts` | ERC-721 NFT collection | -| `erc1155` | ERC-1155 multi-token | -| `piggybank` | Simple ETH storage | -| `failing` | Always reverts (error testing) | -| `multisig` | Multi-signature wallet | -| `entrypoint` | ERC-4337 EntryPoint | -| `simpleAccountFactory` | ERC-4337 account factory | -| `verifyingPaymaster` | ERC-4337 paymaster | - -**Output:** - -```typescript -{ - contractName: string; - contractAddress: string; - deployedAt: string; // ISO timestamp -} -``` - ---- - -#### `mm_seed_contracts` - -Deploy multiple contracts in sequence. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `contracts` | `string[]` | **required** | Contracts to deploy (1-9) | -| `hardfork` | `string` | `"prague"` | EVM hardfork | - -**Output:** - -```typescript -{ - deployed: [{ contractName, contractAddress, deployedAt }]; - failed: [{ contractName, error }]; -} -``` - ---- - -#### `mm_get_contract_address` - -Get the deployed address of a contract. - -**Input:** -| Parameter | Type | Description | -|-----------|------|-------------| -| `contractName` | `string` | Contract name to look up | - -**Output:** - -```typescript -{ - contractName: string; - contractAddress: string | null; -} -``` - ---- - -#### `mm_list_contracts` - -List all contracts deployed in this session. - -**Input:** None - -**Output:** - -```typescript -{ - contracts: [{ - contractName: string; - contractAddress: string; - deployedAt: string; - }]; -} -``` - ---- - -### Knowledge Store Tools - -The knowledge store enables cross-session learning by recording tool invocations and their context. - -#### `mm_knowledge_last` - -Get the last N step records from the knowledge store. - -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `n` | `number` | `20` | Number of steps (1-200) | -| `scope` | `"current" \| "all" \| { sessionId }` | `"current"` | Which sessions to query | -| `filters.flowTag` | `string` | - | Filter by flow tag | -| `filters.tag` | `string` | - | Filter by tag | -| `filters.screen` | `string` | - | Filter by screen | -| `filters.sinceHours` | `number` | - | Only steps from last N hours | - -**Output:** - -```typescript -{ - steps: [{ - timestamp: string; - tool: string; - screen: ScreenName; - snippet: string; // Human-readable summary - sessionId?: string; - matchedFields?: string[]; - sessionGoal?: string; - }]; -} +type ServerConfig = { + /** Session manager instance (required) */ + sessionManager: ISessionManager; + /** Factory function to create workflow context (may be sync or async) */ + contextFactory: () => WorkflowContext | Promise; + /** Idle timeout in milliseconds (optional, defaults to 1_800_000 = 30 min) */ + idleShutdownMs?: number; + /** Per-request execution timeout in milliseconds (default: 30_000) */ + requestTimeoutMs?: number; + /** Path to log file (optional) */ + logFilePath?: string; +}; ``` ---- - -#### `mm_knowledge_search` +The `contextFactory` is called once during `start()`. It is responsible for allocating any sub-service ports and returning a `WorkflowContext`. The core validates the returned shape at runtime — `config.environment` must be a string and every value in `allocatedPorts` (if provided) must be a finite number. -Search step records by tool name, screen, testId, or accessibility names. +The `allocatePort()` utility is exported as a convenience for consumers who need ephemeral port allocation inside their factory. -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `query` | `string` | **required** | Search query (1-200 chars) | -| `limit` | `number` | `20` | Max results (1-100) | -| `scope` | `"current" \| "all" \| { sessionId }` | `"all"` | Which sessions to search | -| `filters` | `KnowledgeFilters` | - | Additional filters | - -**Output:** - -```typescript -{ - matches: KnowledgeStepSummary[]; - query: string; -} -``` +The returned `ServerInstance` exposes: ---- +- `start(): Promise` — Calls `contextFactory`, starts HTTP server, writes `.mm-server` state, sets up idle timeout and signal handlers. +- `stop(): Promise` — Stops accepting connections, cleans up session, removes `.mm-server` state. -#### `mm_knowledge_summarize` +## HTTP API -Generate a recipe-like summary of steps taken in a session. +The daemon exposes the following endpoints on `127.0.0.1`: -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `scope` | `"current" \| { sessionId }` | `"current"` | Session to summarize | +| Method | Path | Description | +| ------ | ------------- | -------------------------------------------- | +| `GET` | `/health` | Health check with nonce verification | +| `GET` | `/status` | Daemon status (PID, port, uptime, sub-ports) | +| `POST` | `/launch` | Start a browser session | +| `POST` | `/cleanup` | Stop the current browser session | +| `POST` | `/tool/:name` | Execute a registered tool with JSON body | -**Output:** +All responses follow a consistent shape: ```typescript -{ - sessionId: string; - stepCount: number; - recipe: [{ - stepNumber: number; - tool: string; - notes: string; - }]; -} -``` - ---- - -#### `mm_knowledge_sessions` - -List recent sessions with metadata. +// Success +{ ok: true, result: T, observations?: { state, testIds, a11y } } -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `limit` | `number` | `10` | Max sessions (1-50) | -| `filters` | `KnowledgeFilters` | - | Filter options | - -**Output:** - -```typescript -{ - sessions: [{ - sessionId: string; - createdAt: string; - goal?: string; - flowTags: string[]; - tags: string[]; - }]; -} +// Error +{ ok: false, error: { code: string, message: string } } ``` ---- +The `observations` field is included for **mutating** tools (click, type, navigate, launch, cleanup, build, etc.) and for `run_steps` when its `includeObservations` parameter is `'all'` (default) or `'failures'`. **Read-only** and **discovery** tools omit observations from the response. -### Batching Tools +## CLI Reference -#### `mm_run_steps` +The `mm` CLI provides a unified interface for agents and developers. All commands communicate with the daemon over HTTP — the daemon is auto-started on `mm launch` if not already running. -Execute multiple tools in sequence. Reduces round trips for multi-step flows. +### Global Options -**Input:** -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `steps` | `array` | **required** | Tool calls to execute (1-50) | -| `steps[].tool` | `string` | **required** | Tool name (e.g., `mm_click`) | -| `steps[].args` | `object` | `{}` | Tool arguments | -| `stopOnError` | `boolean` | `false` | Stop on first error | -| `includeObservations` | `"none" \| "failures" \| "all"` | `"all"` | When to include state observations | +| Option | Description | +| ------------------ | ----------------------------------------------------------------------------------------------------------- | +| `--project ` | Target a specific project directory (absolute or relative). Overrides `MM_PROJECT` and git-based discovery. | -**Output:** +| Environment Variable | Description | +| -------------------- | -------------------------------------------------------------------------------------------------------- | +| `MM_PROJECT` | Default project directory when `--project` is not provided. Falls back to the current git worktree root. | -```typescript -{ - steps: [{ - tool: string; - ok: boolean; - result?: unknown; - error?: { code: string; message: string; details?: unknown }; - meta: { durationMs: number; timestamp: string }; - }]; - summary: { - ok: boolean; // All steps succeeded - total: number; - succeeded: number; - failed: number; - durationMs: number; - }; -} -``` +### Project Targeting -**Example:** +By default, the CLI resolves the target project from the current git worktree. This works when running from inside the project directory. For other scenarios, the resolution order is: -```json -{ - "steps": [ - { "tool": "mm_click", "args": { "testId": "send-button" } }, - { "tool": "mm_type", "args": { "testId": "amount-input", "text": "0.1" } }, - { "tool": "mm_click", "args": { "testId": "confirm-button" } } - ], - "stopOnError": true -} -``` - -## Development - -### Building +1. **`--project `** — Explicit flag, highest priority. Accepts absolute or relative paths. +2. **`MM_PROJECT`** — Environment variable. Useful for setting once in agent config or shell profile. +3. **Git worktree** — `git rev-parse --show-toplevel` from the current working directory (existing behavior). ```bash -yarn build -``` +# From inside the project (unchanged) +mm launch + +# From a parent folder containing multiple repos +mm --project ./metamask-extension launch + +# Via environment variable +export MM_PROJECT=/path/to/metamask-extension +mm describe-screen +``` + +### Lifecycle + +| Command | Description | +| ------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `mm launch [--context e2e\|prod] [--state default\|onboarding\|custom] [--extension-path ] [--goal ] [--force] [--flow-tags ]` | Auto-starts the daemon if needed, then launches a headed Chrome session with the configured extension. Use `--context` to set the environment context before launching. Use `--state` to control wallet initialization. Use `--extension-path` to override the extension directory. Use `--goal` and `--flow-tags` for knowledge tagging. Use `--force` to replace an existing session. | +| `mm cleanup [--shutdown]` | Stops the browser, tears down test services (fixture server, Anvil, mock server), and releases session resources. Add `--shutdown` to also terminate the daemon process. | +| `mm status` | Displays the daemon's current status: PID, port, uptime, allocated sub-ports, and whether a browser session is active. | +| `mm serve [--background]` | Manually starts the HTTP daemon without launching a browser session. Use `--background` to detach the process. Fails if a daemon is already running for this worktree. | + +### Interaction + +| Command | Description | +| ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `mm click [--selector ] [--testid ] [--within ]` | Clicks an element by its accessibility reference (e.g., `e3`). The ref comes from a prior `describe-screen` call. Waits for the element to be visible before clicking. Use `--within` to scope the target inside a parent element (`testid:`, `selector:`, or a bare a11y ref). | +| `mm type [--selector ] [--testid ] [--within ]` | Types text into an input element identified by its accessibility reference. Clears the field first, then sets the new value (uses Playwright's `fill()`). Use `--within` to scope the target inside a parent element. | +| `mm get-text [--selector ] [--testid ] [--within ]` | Reads the text content of an element. Returns the inner text, target descriptor, and character length. Useful for asserting visible values without screenshots. | +| `mm describe-screen` | Captures the full screen state: extension info, visible test IDs, a trimmed accessibility tree with deterministic refs (`e1`, `e2`, ...), and prior knowledge from historical sessions. This is the primary command for understanding what's on screen before interacting. | +| `mm screenshot [--name ]` | Takes a full-page screenshot of the current page. Saves to the artifacts directory. Use `--name` to set a descriptive filename. | +| `mm wait-for [--timeout ] [--selector ] [--testid ] [--within ]` | Blocks until an element identified by its accessibility reference becomes visible, or the timeout expires. Default timeout is 15 seconds. Use `--within` to scope the target inside a parent element. | +| `mm wait-for-notification [--timeout ]` | Waits for the extension notification popup to appear within a timeout. Returns the notification page URL. | +| `mm clipboard [text]` | Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses. | + +### Navigation + +| Command | Description | +| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `mm navigate ` | Opens a new tab and navigates to the given URL. Useful for navigating to dApps or external pages. | +| `mm navigate-home` | Navigates the extension tab to the wallet home screen. | +| `mm navigate-settings` | Navigates the extension tab to the settings page. | +| `mm switch-to-tab \| --role \| --url ` | Switches the active page to a tab matching a given role (e.g., `extension`, `dapp`) or URL prefix. Supports a positional role as first argument. | +| `mm close-tab --role \| --url ` | Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed. | + +### State & Context + +| Command | Description | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `mm get-state` | Returns the current extension state: loaded status, current URL, screen name, network, chain ID, account address, and balance. Also lists all tracked browser tabs. | +| `mm get-context` | Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed. | +| `mm set-context ` | Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active — run `mm cleanup` first. | + +### Knowledge + +| Command | Description | +| ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | +| `mm knowledge-search ` | Searches the knowledge store for past tool invocations matching the query. Results are scored by relevance to screen, URL, test IDs, and a11y nodes. | +| `mm knowledge-last` | Retrieves the most recent step records from the current session's knowledge store. | +| `mm knowledge-sessions` | Lists recent knowledge sessions with metadata (goal, flow tags, timestamps). | +| `mm knowledge-summarize [--session ]` | Generates a recipe-style summary of a session's tool invocations, showing the step sequence with targets and outcomes. | + +### Batching + +| Command | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------ | +| `mm run-steps ` | Executes a batch of tool invocations sequentially from a JSON definition. Each step specifies a tool name and arguments. | + +For the full agent-facing reference and workflow guidelines, see [SKILL.md](./SKILL.md). + +## Error Classification + +Tool errors are classified into specific error codes for structured handling: + +| Code | Meaning | +| --------------------------- | ----------------------------------------------- | +| `MM_TARGET_NOT_FOUND` | Element not found by ref, testId, or selector | +| `MM_WAIT_TIMEOUT` | Timeout waiting for element or condition | +| `MM_CLICK_FAILED` | Click operation failed | +| `MM_TYPE_FAILED` | Type operation failed | +| `MM_NAVIGATION_FAILED` | Navigation error or network failure | +| `MM_PAGE_CLOSED` | Browser page was closed unexpectedly | +| `MM_NOTIFICATION_TIMEOUT` | Notification popup did not appear | +| `MM_TAB_NOT_FOUND` | Tab not found by role or URL | +| `MM_DISCOVERY_FAILED` | Discovery tool failure | +| `MM_SCREENSHOT_FAILED` | Screenshot capture failure | +| `MM_BATCH_TIMEOUT` | `batchTimeoutMs` deadline exceeded in run_steps | +| `MM_CONTRACT_NOT_FOUND` | Unknown contract name | +| `MM_SEED_FAILED` | Contract deployment failure | +| `MM_CONTEXT_SWITCH_BLOCKED` | Context switch while session is active | -### Testing - -```bash -yarn test -``` - -### Local Development with yalc +## Development ```bash -# In this repo -yarn build && yalc publish - -# In consumer repo -yalc add @metamask/client-mcp-core +yarn build # Build the package +yarn test # Run tests and type checks +yarn lint # Lint everything +yarn lint:fix # Auto-fix lint issues ``` ## License -MIT +(MIT OR Apache-2.0) diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..c0855a9 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,538 @@ +# mm CLI — Agent Reference + +You control a browser extension through the `mm` CLI. Every command talks to a local HTTP daemon that manages Playwright and the extension lifecycle. The daemon auto-starts when you run `mm launch`. + +If you are running outside the target project directory, use `--project ` or set the `MM_PROJECT` environment variable to point at the project root. All commands accept `--project` before the command name (e.g., `mm --project ../metamask-extension launch`). + +## Core Loop + +``` +mm launch # 1. Start browser + extension +mm describe-screen # 2. See what's on screen (ALWAYS do this before interacting) +mm click # 3. Interact using refs from describe-screen +mm describe-screen # 4. Re-describe after every action to get fresh refs +mm cleanup --shutdown # 5. Clean up when done +``` + +**Critical rules:** + +- **Always `describe-screen` before interacting.** Refs like `e1`, `e2` are ephemeral — they change after every action. +- **Always `describe-screen` after interacting** — OR use inline `observations` from mutating tool responses. Mutating tools (click, type, navigate, etc.) return an `observations` object with fresh `state`, `testIds`, and `a11y` refs. You can use these refs directly for the next interaction without calling `describe-screen`. Call `describe-screen` when you need `priorKnowledge` or screenshots. +- **One target per command.** Specify exactly ONE of: a11y ref (`e5`), testId, or CSS selector. +- **Errors are structured.** Check the `error.code` field to decide recovery strategy (see Error Codes below). + +## Observation Behavior + +Tool responses include different data based on the tool's category: + +| Category | Examples | Observations in response? | +| ------------- | ----------------------------------------------------------------- | ---------------------------------------------- | +| **Mutating** | click, type, navigate, launch, cleanup, build, clipboard | Yes — `state` + `a11y` (compacted) + `testIds` | +| **Read-only** | get_state, get_text, knowledge\_\*, get_context, set_context | No — faster response | +| **Discovery** | describe_screen, list_testids, accessibility_snapshot, screenshot | Data is already in `result` | +| **Batch** | run_steps | Controlled by `includeObservations` param | + +**Observation Compaction:** Mutating tool observations are **compacted** before returning: option runs of 3 or more under a combobox or listbox are replaced with a single summary node (e.g., `"55 options (refs e2–e56)"`). The `describe-screen` tool always returns the **full, unfiltered** a11y tree — use it when you need the complete option list or `priorKnowledge`. + +**Diff-Based Observations:** After the first mutating tool call sets a baseline, subsequent mutations return **diff-based** observations. The `observations.a11y.diff` field (when present) shows what changed: + +```json +{ + "added": ["e4", "e5"], // new node refs + "removed": ["e2"], // disappeared node refs + "unchanged": 3 // count of unchanged nodes +} +``` + +The `observations.a11y.nodes` field contains **only the changed and new nodes** (not all nodes). The baseline resets after `describe-screen`, `launch`, or `cleanup` — the next mutation returns a full compact observation (no `diff` field). When the diff would be larger than the full observation, the full option-filtered observation is returned instead (no `diff` field). + +### Using inline observations (mutating tools) + +After a mutating action, the response includes fresh screen state: + +```json +{ + "ok": true, + "result": { ... }, + "observations": { + "state": { "screen": "send", "url": "...", "balance": "1.5 ETH" }, + "testIds": ["send-amount-input", "send-button"], + "a11y": { + "nodes": [ + { "ref": "e1", "role": "textbox", "name": "Amount" }, + { "ref": "e2", "role": "button", "name": "Send" } + ] + } + } +} +``` + +You can use the `ref` values from `observations.a11y.nodes` for the next interaction — no `describe-screen` needed. Note that refs in compacted observations may be summary nodes (e.g., `"55 options (refs e2–e56)"`) when there are 3+ options under a combobox or listbox. + +**Quick reference:** + +- Use `observations.state` for quick checks (screen name, loading status, balance, etc.) +- Use `observations.a11y.nodes` with the compact refs for the next interaction +- Call `describe-screen` only when you need the full tree or `priorKnowledge` + +```bash +mm click e3 # mutating: response includes fresh observations +# observations.a11y.nodes has updated refs — use them directly: +mm type e1 "0.01" # use ref from previous response +``` + +Call `describe-screen` explicitly when you need: + +- `priorKnowledge` (historical actions for this screen) +- A screenshot via `includeScreenshot` +- Full context after unexpected navigation +- The complete, unfiltered a11y tree (e.g., all options in a dropdown) + +### `run_steps` and `includeObservations` + +The `run_steps` tool collects observations once after all steps complete. Control inclusion with the `includeObservations` parameter: + +| Value | Behavior | +| ----------------- | --------------------------------------------- | +| `'all'` (default) | Always include final state observations | +| `'none'` | Never include observations (fastest response) | +| `'failures'` | Include observations only if any step failed | + +```json +{ + "steps": [ + { "tool": "click", "args": { "a11yRef": "e3" } }, + { "tool": "type", "args": { "a11yRef": "e5", "text": "0.01" } } + ], + "includeObservations": "failures" +} +``` + +## Commands + +### Session Lifecycle + +#### `mm launch` + +Starts the daemon (if not running) and launches a headed Chrome session with the extension. + +``` +mm launch [--context e2e|prod] [--state default|onboarding|custom] [--extension-path ] [--goal ] [--force] [--flow-tags ] +``` + +| Flag | Description | +| ------------------------- | --------------------------------------------------------------- | +| `--context e2e\|prod` | Set the environment context before launching | +| `--state default` | Pre-onboarded wallet with 25 ETH on local Anvil chain (default) | +| `--state onboarding` | Fresh wallet requiring manual onboarding setup | +| `--state custom` | Use a custom fixture for wallet state | +| `--extension-path ` | Override the extension build directory | +| `--goal ` | Tag the session with a goal for knowledge store | +| `--force` | Replace an existing active session | +| `--flow-tags ` | Comma-separated flow tags for cross-session knowledge | + +Returns: `sessionId`, `extensionId`, `state` (current extension state). + +#### `mm cleanup` + +Stops the browser, tears down test services, and releases session resources. + +``` +mm cleanup [--shutdown] +``` + +| Flag | Description | +| ------------ | --------------------------------- | +| `--shutdown` | Also terminate the daemon process | + +Without `--shutdown`, the daemon stays running for the next `mm launch`. + +#### `mm status` + +Shows daemon status: PID, port, uptime, allocated sub-ports. + +``` +mm status +``` + +#### `mm serve` + +Manually starts the daemon without launching a browser. Useful for debugging. + +``` +mm serve [--background] +``` + +### Screen Discovery + +#### `mm describe-screen` + +**Your primary observation tool.** Returns the complete screen state: + +- **Extension state**: current URL, screen name, network, account, balance +- **Active tab**: the currently focused tab's role and URL (if tracked) +- **Test IDs**: visible `data-testid` attributes with text previews +- **A11y tree**: interactive elements with deterministic refs (`e1`, `e2`, ...) +- **Prior knowledge**: suggested actions from past sessions on this screen + +``` +mm describe-screen +``` + +The a11y tree includes actionable roles: `button`, `link`, `checkbox`, `radio`, `switch`, `textbox`, `combobox`, `menuitem`; structural roles: `menu`, `listbox`, `option`, `tab`, `tabpanel`, `list`, `listitem`; and important roles: `dialog`, `alert`, `status`, `heading`. + +Each node looks like: + +```json +{ + "ref": "e3", + "role": "button", + "name": "Confirm", + "path": ["dialog:Transaction"], + "testId": "confirm-footer-button", + "textContent": "Confirm" +} +``` + +The `testId` and `textContent` fields appear only on nodes with short or generic names — they provide extra context from the DOM to help identify ambiguous elements. Nodes with clear names omit these fields. + +When 3+ consecutive identical nodes appear (same role, name, and path), they are collapsed into a summary like `… 3 more "maskicon" (refs e2–e4)` to reduce token waste. Individual refs still work for targeting. + +Use the `ref` value (`e3`) for click/type/get-text/wait-for commands. + +#### `mm screenshot` + +Captures a screenshot of the current page. + +``` +mm screenshot [--name ] +``` + +Returns: file path, dimensions. + +### Element Interaction + +All interaction commands accept an element reference from `describe-screen`. + +#### `mm click ` + +Clicks an element. Waits up to 15s for it to become visible. + +``` +mm click e3 +mm click --testid end-accessory --within "testid:account-list-item/0" +``` + +Use `--within` to scope the target inside a parent element. Values use the format `testid:`, `selector:`, or a bare a11y ref (`e5`). + +If the page closes after clicking (e.g., confirmation popup), the response includes `pageClosedAfterClick: true` — this is normal, not an error. + +#### `mm type ` + +Types text into an input field. **Clears the field first**, then sets the new value (uses Playwright's `fill()`). No `clearFirst` flag needed — clearing is always implicit. + +``` +mm type e5 "0x1234abcd..." +``` + +#### `mm get-text ` + +Reads the text content of an element. Returns the inner text, target descriptor, and character length. Useful for asserting visible values without screenshots. Categorized as read-only (no observations in response). + +``` +mm get-text e5 +mm get-text --testid balance-amount +mm get-text --testid amount --within "testid:tx-row" +``` + +Returns: `text` (string content), `target` (descriptor like `testId:balance-amount`), `length` (character count). + +#### `mm wait-for ` + +Blocks until an element becomes visible. Default timeout: 15s. + +``` +mm wait-for e7 [--timeout ] +mm wait-for --testid confirm-btn --within "testid:dialog-container" +``` + +#### `mm wait-for-notification` + +Waits for the extension notification popup to appear within a timeout. Returns the notification page URL. + +``` +mm wait-for-notification [--timeout ] +``` + +#### `mm clipboard` + +Reads from or writes to the system clipboard via Chrome DevTools Protocol. Useful for pasting seed phrases or copying addresses. + +``` +mm clipboard read +mm clipboard write "0x1234abcd..." +``` + +### Navigation + +#### `mm navigate ` + +Opens a new tab and navigates to the given URL. + +``` +mm navigate https://app.uniswap.org +``` + +#### `mm navigate-home` + +Navigates the extension tab to the wallet home screen. + +``` +mm navigate-home +``` + +#### `mm navigate-settings` + +Navigates the extension tab to the settings page. + +``` +mm navigate-settings +``` + +#### `mm switch-to-tab` + +Switches the active page to a tab matching a given role or URL prefix. Supports a positional role as the first argument. + +``` +mm switch-to-tab dapp +mm switch-to-tab --role extension +mm switch-to-tab --url https://app.uniswap.org +``` + +#### `mm close-tab` + +Closes a browser tab matching a given role or URL. Falls back to the extension tab if the active tab is closed. + +``` +mm close-tab --role dapp +mm close-tab --url https://app.uniswap.org +``` + +### State & Context + +#### `mm get-state` + +Returns extension state and tracked tabs without the full a11y tree. + +``` +mm get-state +``` + +Returns: `state` (extension state) and `tabs` (active + tracked tabs with roles and URLs). + +#### `mm get-context` + +Returns the current environment context (`e2e` or `prod`), session status, available capabilities, and whether context switching is allowed. + +``` +mm get-context +``` + +#### `mm set-context` + +Switches the session environment between `e2e` and `prod` modes. Blocked while a session is active — run `mm cleanup` first. + +``` +mm set-context +``` + +### Knowledge Store + +The knowledge store records every tool invocation and uses past sessions to suggest actions. + +#### `mm knowledge-search ` + +Searches past sessions for steps matching the query. Matches against tool names, screen names, test IDs, and a11y node names. + +``` +mm knowledge-search "confirm transaction" +``` + +#### `mm knowledge-last` + +Gets the most recent step records from the current session. + +``` +mm knowledge-last +``` + +#### `mm knowledge-sessions` + +Lists recent sessions with metadata (goal, flow tags, timestamps). + +``` +mm knowledge-sessions +``` + +### Batch Execution + +#### `mm run-steps ` + +Executes multiple tool invocations in sequence from a JSON array. Each step specifies a tool name and arguments. + +``` +mm run-steps '{"steps":[{"tool":"click","args":{"a11yRef":"e3"}},{"tool":"wait_for","args":{"a11yRef":"e5"}}]}' +``` + +Supports `stopOnError` (halt on first failure) and returns per-step results with timing. The `includeObservations` param controls whether final-state observations appear in the response: `'all'` (default), `'none'`, or `'failures'` (only on partial failure). Use `batchTimeoutMs` to set an overall deadline — if exceeded, remaining steps are marked as skipped and partial results are returned immediately. The summary includes a `skipped` count alongside `succeeded` and `failed`. + +Tool aliases are supported in steps: `navigate_home` / `navigate-home`, `navigate_settings` / `navigate-settings`, and `navigate_notification` / `navigate-notification` resolve to `navigate` with the appropriate `screen` argument. You can also use `ref` as shorthand for `a11yRef` in step args and within targets. + +## Element Targeting + +Every interaction command (`click`, `type`, `get-text`, `wait-for`) needs a target. You must provide exactly ONE of: + +| Method | Format | Stability | When to use | +| ---------------- | ------------------- | ------------------------------- | ---------------------------------------------------- | +| **a11y ref** | `e1`, `e2`, ... | Ephemeral (per describe-screen) | Default — use refs from the latest `describe-screen` | +| **testId** | `data-testid` value | Stable across sessions | When you know the testId from prior knowledge | +| **CSS selector** | Any CSS selector | Fragile | Last resort fallback | + +**Prefer a11y refs.** They come directly from the accessibility tree and map to ARIA selectors, making them the most reliable for the current screen state. + +## Prior Knowledge + +When you call `describe-screen`, the response may include a `priorKnowledge` section with: + +- **`similarSteps`**: Past tool invocations on the same screen with confidence scores +- **`suggestedNextActions`**: Ranked actions based on historical success (e.g., "click confirm button") +- **`avoid`**: Targets that frequently fail on this screen — skip these + +Use prior knowledge to guide your actions, but always verify against the current a11y tree. + +## Error Codes + +When a command fails, the response includes `error.code`. Use this to decide what to do: + +| Code | Meaning | Recovery | +| ----------------------------- | -------------------------------------------- | --------------------------------------------------------- | +| `MM_NO_ACTIVE_SESSION` | No browser session running | Run `mm launch` first | +| `MM_SESSION_ALREADY_RUNNING` | Session already exists | Run `mm cleanup` first, or use `--force` | +| `MM_TARGET_NOT_FOUND` | Element ref/testId/selector not found | Run `mm describe-screen` to get fresh refs | +| `MM_WAIT_TIMEOUT` | Element didn't appear in time | Increase timeout or verify you're on the right screen | +| `MM_CLICK_FAILED` | Click failed after finding element | Element may be obscured; try waiting or scrolling | +| `MM_TYPE_FAILED` | Type failed after finding element | Element may not be an input; verify with describe-screen | +| `MM_PAGE_CLOSED` | Page was closed unexpectedly | Normal after some confirmations; run describe-screen | +| `MM_NAVIGATION_FAILED` | Navigation error or network failure | Check URL validity; retry once | +| `MM_NOTIFICATION_TIMEOUT` | Extension notification popup didn't appear | Action may not have triggered a notification; check state | +| `MM_TAB_NOT_FOUND` | Tab role/URL not found | Run `mm get-state` to see available tabs | +| `MM_CAPABILITY_NOT_AVAILABLE` | Feature requires a capability not configured | Check environment mode (e2e vs prod) | +| `MM_CONTEXT_SWITCH_BLOCKED` | Can't switch context with active session | Run `mm cleanup` first | +| `MM_INVALID_INPUT` | Bad parameters | Fix input and retry | +| `MM_BATCH_TIMEOUT` | `batchTimeoutMs` deadline exceeded | Remaining steps were skipped; check partial results | +| `MM_CONTRACT_NOT_FOUND` | Unknown contract name for seeding | See available contracts below | + +## Available Contracts (E2E only) + +These contracts can be deployed to the local Anvil chain via `seed_contract` / `seed_contracts`: + +| Name | Type | +| ---------------------- | --------------------------------------------------- | +| `hst` | ERC-20 token | +| `nfts` | ERC-721 NFT | +| `erc1155` | ERC-1155 multi-token | +| `piggybank` | Simple deposit contract | +| `failing` | Contract that always reverts (for testing failures) | +| `multisig` | Multi-signature wallet | +| `entrypoint` | ERC-4337 EntryPoint | +| `simpleAccountFactory` | ERC-4337 account factory | +| `verifyingPaymaster` | ERC-4337 paymaster | + +## Flow Tags + +When launching, tag your session with flow tags for cross-session knowledge: + +| Tag | Use for | +| ----------------- | ------------------------------ | +| `send` | Token send flows | +| `swap` | Token swap flows | +| `connect` | dApp connection flows | +| `sign` | Message/transaction signing | +| `onboarding` | Wallet setup/onboarding | +| `settings` | Settings configuration | +| `tx-confirmation` | Transaction confirmation flows | + +## Daemon Model + +- Daemon runs per project, state tracked in `.mm-server` at the project root +- Auto-starts on `mm launch` if not running +- Shuts down after 30 minutes of inactivity +- Logs to `.mm-daemon.log` +- One tool executes at a time (requests are queued) +- Project resolution: `--project` flag → `MM_PROJECT` env var → current git worktree + +## Workflow Examples + +### Basic Interaction + +```bash +mm launch --state default +mm describe-screen +# Response includes a11y nodes: [{ ref: "e1", role: "button", name: "Send" }, ...] +mm click e1 +mm describe-screen +# Now on send screen — get new refs +mm type e3 "0.01" +mm click e5 +mm cleanup --shutdown +``` + +### Transaction with Notification + +```bash +mm launch --state default +mm navigate https://app.uniswap.org +mm describe-screen +# Interact with dApp... +mm click e4 # triggers wallet popup +mm wait-for e2 --timeout 10000 # wait for confirm button in notification +mm click e2 # confirm +mm describe-screen # check result +mm cleanup --shutdown +``` + +### Running From a Parent Folder + +```bash +# Set once — all subsequent mm commands target this project +export MM_PROJECT=/path/to/metamask-extension + +mm launch --state default +mm describe-screen +mm click e1 +mm cleanup --shutdown + +# Or use --project per command +mm --project ../metamask-extension launch +mm --project ../metamask-extension describe-screen +``` + +### Using Prior Knowledge + +```bash +mm launch --state default --goal "Test send flow" --flow-tags send +mm describe-screen +# Response includes priorKnowledge.suggestedNextActions: +# [{ action: "click", preferredTarget: { type: "testId", value: "send-button" }, confidence: 0.85 }] +# Use the suggestion but verify the target exists in the current a11y tree +mm click e3 +mm cleanup --shutdown +``` + +## Project-Specific Commands + + + +## Project-Specific Workflow Examples + + diff --git a/package.json b/package.json index e7a0560..8221fe2 100644 --- a/package.json +++ b/package.json @@ -1,13 +1,13 @@ { "name": "@metamask/client-mcp-core", "version": "0.1.1", - "description": "MCP server for MetaMask Extension visual testing with LLM agents", + "description": "HTTP daemon and CLI for agent-driven browser extension testing with Playwright", "keywords": [ - "mcp", "playwright", "llm", "visual-testing", - "browser-extension" + "browser-extension", + "cli" ], "homepage": "https://github.com/MetaMask/client-mcp-core#readme", "bugs": { @@ -35,6 +35,9 @@ "main": "./dist/index.cjs", "module": "./dist/index.mjs", "types": "./dist/index.d.cts", + "bin": { + "mm": "./dist/cli/mm.cjs" + }, "files": [ "dist" ], @@ -57,7 +60,8 @@ "@isaacs/brace-expansion": "5.0.1" }, "dependencies": { - "@modelcontextprotocol/sdk": "^1.26.0", + "cosmiconfig": "^9.0.0", + "express": "^5.2.1", "zod": "^4.3.5" }, "devDependencies": { @@ -71,6 +75,7 @@ "@metamask/eslint-config-vitest": "^15.0.0", "@playwright/test": "^1.49.0", "@ts-bridge/cli": "^0.6.3", + "@types/express": "^5.0.6", "@types/node": "^20.0.0", "@typescript-eslint/utils": "^8.6.0", "@vitest/coverage-istanbul": "^3.0.7", diff --git a/scripts/prepack.sh b/scripts/prepack.sh index ad99af5..e741983 100755 --- a/scripts/prepack.sh +++ b/scripts/prepack.sh @@ -9,3 +9,5 @@ if [[ -n $SKIP_PREPACK ]]; then fi yarn build + +chmod +x dist/cli/mm.cjs diff --git a/src/capabilities/context.test.ts b/src/capabilities/context.test.ts index 25410cc..e953b93 100644 --- a/src/capabilities/context.test.ts +++ b/src/capabilities/context.test.ts @@ -20,7 +20,6 @@ describe('isE2EConfig', () => { environment: 'e2e', extensionName: 'MetaMask', defaultPassword: 'password123', - toolPrefix: 'mm', artifactsDir: './test-artifacts', defaultChainId: 1337, ports: { @@ -54,7 +53,6 @@ describe('isE2EConfig', () => { environment: 'prod', extensionName: 'MetaMask', defaultPassword: 'password123', - toolPrefix: 'mm', defaultChainId: 1, }; @@ -93,7 +91,6 @@ describe('isProdConfig', () => { environment: 'prod', extensionName: 'MetaMask', defaultPassword: 'password123', - toolPrefix: 'mm', artifactsDir: './artifacts', defaultChainId: 1, }; @@ -119,7 +116,6 @@ describe('isProdConfig', () => { environment: 'e2e', extensionName: 'MetaMask', defaultPassword: 'password123', - toolPrefix: 'mm', artifactsDir: './test-artifacts', defaultChainId: 1337, ports: { @@ -438,3 +434,34 @@ describe('hasCapability', () => { expect(hasCapability(context, 'mockServer')).toBe(true); }); }); + +describe('WorkflowContext with allocatedPorts', () => { + it('accepts allocatedPorts with port mappings', () => { + const context: WorkflowContext = { + config: { + environment: 'e2e', + extensionName: 'MetaMask', + }, + allocatedPorts: { + anvil: 3000, + fixture: 4000, + }, + }; + + expect(context.allocatedPorts).toStrictEqual({ + anvil: 3000, + fixture: 4000, + }); + }); + + it('allows WorkflowContext without allocatedPorts (field is optional)', () => { + const context: WorkflowContext = { + config: { + environment: 'e2e', + extensionName: 'MetaMask', + }, + }; + + expect(context.allocatedPorts).toBeUndefined(); + }); +}); diff --git a/src/capabilities/context.ts b/src/capabilities/context.ts index a88fa1b..25b4d9e 100644 --- a/src/capabilities/context.ts +++ b/src/capabilities/context.ts @@ -7,6 +7,9 @@ import type { MockServerCapability, } from './types.js'; +/** Sparse port-name → port-number map. `Partial` ensures lookups resolve to `number | undefined`. */ +export type PortMap = Partial>; + /** * Environment mode discriminator. * - 'e2e': End-to-end testing environment with local chain, fixtures, and contract seeding @@ -22,8 +25,6 @@ export type BaseEnvironmentConfig = { extensionName: string; /** Default password for wallet unlock operations */ defaultPassword?: string; - /** Prefix for MCP tool names (e.g., "mm" -> "mm_build", "mm_launch") */ - toolPrefix?: string; /** Directory for storing screenshots and other artifacts */ artifactsDir?: string; }; @@ -112,6 +113,8 @@ export type WorkflowContext = { stateSnapshot?: StateSnapshotCapability; mockServer?: MockServerCapability; config: EnvironmentConfig; + /** Port metadata reported back to core from the contextFactory. Used for DaemonState persistence and /status endpoint. */ + allocatedPorts?: PortMap; }; /** diff --git a/src/capabilities/types.ts b/src/capabilities/types.ts index 6ac77c5..8a15177 100644 --- a/src/capabilities/types.ts +++ b/src/capabilities/types.ts @@ -55,6 +55,11 @@ export type ExtensionState = { networkName: string | null; chainId: number | null; balance: string | null; + activeTab?: { + role: string; + url: string; + title?: string; + }; }; export type LaunchOptions = { diff --git a/src/cli/mm.test.ts b/src/cli/mm.test.ts new file mode 100644 index 0000000..a73a2b4 --- /dev/null +++ b/src/cli/mm.test.ts @@ -0,0 +1,1973 @@ +/* eslint-disable n/no-unsupported-features/node-builtins */ +/* eslint-disable n/no-process-env */ +/* eslint-disable n/no-sync */ +/* eslint-disable require-atomic-updates */ +import { cosmiconfig } from 'cosmiconfig'; +import { existsSync } from 'node:fs'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import type { MockInstance } from 'vitest'; + +import { + extractProjectFlag, + resolveTargetFromArgs, + resolveWithinFromArgs, + getPositionalTarget, + isTransientError, + parseIntFlag, + parseStringFlag, + parseLaunchArgs, + printHelp, + resolveRuntime, + sendRequest, + routeCommand, + resolveWorktreeRoot, + readDaemonConfig, + shutdownDaemon, + waitForDaemon, + discoverDaemon, + autoStartDaemon, + handleServe, + sleep, + main, +} from './mm.js'; + +vi.mock('node:child_process', () => ({ + execSync: vi.fn(() => Buffer.from('/mock/worktree\n')), + spawn: vi.fn(() => { + const child = { + unref: vi.fn(), + on: vi.fn( + (event: string, handler: (code: number | null) => void) => + event === 'exit' && setTimeout(() => handler(0), 10), + ), + }; + return child; + }), +})); + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, existsSync: vi.fn(() => true) }; +}); + +vi.mock('node:fs/promises', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + realpath: vi.fn(async (p: string) => p), + stat: vi.fn(async () => ({ isDirectory: () => true })), + readFile: vi.fn(), + }; +}); + +vi.mock('../server/daemon-state.js', () => ({ + readDaemonState: vi.fn(async () => null), + isDaemonAlive: vi.fn(async () => false), + isDaemonVersionMatch: vi.fn(() => true), + removeDaemonState: vi.fn(async () => {}), + acquireStartupLock: vi.fn(async () => true), + releaseStartupLock: vi.fn(async () => {}), +})); + +const mockSearch = vi.fn(); + +vi.mock('cosmiconfig', () => ({ + cosmiconfig: vi.fn(() => ({ + search: mockSearch, + })), +})); + +let exitSpy: MockInstance; +let stderrSpy: MockInstance; +let stdoutSpy: MockInstance; + +// eslint-disable-next-line vitest/require-top-level-describe +beforeEach(() => { + vi.clearAllMocks(); + mockSearch.mockResolvedValue({ + config: { daemon: './daemon.ts', runtime: 'tsx' }, + filepath: '/mock/worktree/mm-client-cli.config.ts', + isEmpty: false, + }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => { + throw new Error('process.exit'); + }) as never); + stderrSpy = vi.spyOn(process.stderr, 'write').mockReturnValue(true); + stdoutSpy = vi.spyOn(process.stdout, 'write').mockReturnValue(true); +}); + +// eslint-disable-next-line vitest/require-top-level-describe +afterEach(() => { + exitSpy.mockRestore(); + stderrSpy.mockRestore(); + stdoutSpy.mockRestore(); + vi.restoreAllMocks(); +}); + +describe('extractProjectFlag', () => { + it('returns args unchanged when no --project flag', () => { + const result = extractProjectFlag(['launch', '--force']); + expect(result).toStrictEqual({ + args: ['launch', '--force'], + projectPath: undefined, + }); + }); + + it('extracts project path and removes flag from args', () => { + const result = extractProjectFlag([ + '--project', + '/path/to/project', + 'launch', + ]); + expect(result).toStrictEqual({ + args: ['launch'], + projectPath: '/path/to/project', + }); + }); + + it('handles --project in the middle of args', () => { + const result = extractProjectFlag([ + 'launch', + '--project', + '/my/path', + '--force', + ]); + expect(result).toStrictEqual({ + args: ['launch', '--force'], + projectPath: '/my/path', + }); + }); + + it('exits when --project has no value', () => { + expect(() => extractProjectFlag(['--project'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --project requires a path value\n', + ); + }); + + it('exits when --project value starts with --', () => { + expect(() => extractProjectFlag(['--project', '--force'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --project requires a path value\n', + ); + }); +}); + +describe('resolveTargetFromArgs', () => { + it('returns selector for --selector flag', () => { + expect(resolveTargetFromArgs(['--selector', '.my-button'])).toStrictEqual({ + selector: '.my-button', + }); + }); + + it('returns testId for --testid flag', () => { + expect(resolveTargetFromArgs(['--testid', 'my-btn'])).toStrictEqual({ + testId: 'my-btn', + }); + }); + + it('returns a11yRef for e-number patterns', () => { + expect(resolveTargetFromArgs(['e3'])).toStrictEqual({ a11yRef: 'e3' }); + expect(resolveTargetFromArgs(['e123'])).toStrictEqual({ a11yRef: 'e123' }); + }); + + it('returns testId for non-e-number strings', () => { + expect(resolveTargetFromArgs(['submit-button'])).toStrictEqual({ + testId: 'submit-button', + }); + expect(resolveTargetFromArgs(['eabc'])).toStrictEqual({ + testId: 'eabc', + }); + }); + + it('exits when --selector has no value', () => { + expect(() => resolveTargetFromArgs(['--selector'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --selector requires a value\n', + ); + }); + + it('exits when --selector value starts with --', () => { + expect(() => resolveTargetFromArgs(['--selector', '--other'])).toThrowError( + 'process.exit', + ); + }); + + it('exits when --testid has no value', () => { + expect(() => resolveTargetFromArgs(['--testid'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --testid requires a value\n', + ); + }); + + it('exits when no target provided', () => { + expect(() => resolveTargetFromArgs([])).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: element target is required\n', + ); + }); +}); + +describe('resolveWithinFromArgs', () => { + it('returns undefined when --within is not present', () => { + expect(resolveWithinFromArgs(['e1', '--timeout', '5000'])).toBeUndefined(); + expect(resolveWithinFromArgs([])).toBeUndefined(); + }); + + it('returns testId when value starts with "testid:"', () => { + expect(resolveWithinFromArgs(['--within', 'testid:parent'])).toStrictEqual({ + testId: 'parent', + }); + }); + + it('returns selector when value starts with "selector:"', () => { + expect( + resolveWithinFromArgs(['--within', 'selector:.container']), + ).toStrictEqual({ + selector: '.container', + }); + }); + + it('returns a11yRef when value matches /^e\\d+$/', () => { + expect(resolveWithinFromArgs(['--within', 'e1'])).toStrictEqual({ + a11yRef: 'e1', + }); + expect(resolveWithinFromArgs(['--within', 'e123'])).toStrictEqual({ + a11yRef: 'e123', + }); + }); + + it('returns testId for bare non-ref value', () => { + expect(resolveWithinFromArgs(['--within', 'parent-id'])).toStrictEqual({ + testId: 'parent-id', + }); + expect(resolveWithinFromArgs(['--within', 'eabc'])).toStrictEqual({ + testId: 'eabc', + }); + }); + + it('exits when --within has no value', () => { + expect(() => resolveWithinFromArgs(['--within'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --within requires a value\n', + ); + }); + + it('exits when --within value starts with --', () => { + expect(() => resolveWithinFromArgs(['--within', '--other'])).toThrowError( + 'process.exit', + ); + }); +}); + +describe('getPositionalTarget', () => { + it('returns first non-flag argument', () => { + expect(getPositionalTarget(['e1', '--timeout', '5000'])).toBe('e1'); + }); + + it('skips flag-value pairs', () => { + expect(getPositionalTarget(['--timeout', '5000', 'e1'])).toBe('e1'); + }); + + it('returns undefined for empty args', () => { + expect(getPositionalTarget([])).toBeUndefined(); + }); + + it('returns undefined when only flags present', () => { + expect(getPositionalTarget(['--timeout', '5000'])).toBeUndefined(); + }); +}); + +describe('isTransientError', () => { + it('returns true for ECONNREFUSED', () => { + expect(isTransientError(new Error('ECONNREFUSED'))).toBe(true); + }); + + it('returns true for ECONNRESET', () => { + expect(isTransientError(new Error('ECONNRESET'))).toBe(true); + }); + + it('returns true for EPIPE', () => { + expect(isTransientError(new Error('EPIPE'))).toBe(true); + }); + + it('returns true for UND_ERR_SOCKET', () => { + expect(isTransientError(new Error('UND_ERR_SOCKET'))).toBe(true); + }); + + it('returns true for fetch failed', () => { + expect(isTransientError(new Error('fetch failed'))).toBe(true); + }); + + it('returns false for other errors', () => { + expect(isTransientError(new Error('timeout'))).toBe(false); + expect(isTransientError(new Error('404 not found'))).toBe(false); + }); +}); + +describe('parseIntFlag', () => { + it('returns parsed integer value', () => { + expect(parseIntFlag(['--timeout', '5000'], '--timeout')).toBe(5000); + }); + + it('returns undefined when flag is absent', () => { + expect(parseIntFlag(['--other', '5000'], '--timeout')).toBeUndefined(); + }); + + it('returns undefined for NaN values', () => { + expect(parseIntFlag(['--timeout', 'abc'], '--timeout')).toBeUndefined(); + }); + + it('returns undefined when no value follows flag', () => { + expect(parseIntFlag(['--timeout'], '--timeout')).toBeUndefined(); + }); +}); + +describe('parseStringFlag', () => { + it('returns string value', () => { + expect(parseStringFlag(['--role', 'extension'], '--role')).toBe( + 'extension', + ); + }); + + it('returns undefined when flag is absent', () => { + expect(parseStringFlag(['--other', 'val'], '--role')).toBeUndefined(); + }); + + it('returns undefined when value starts with --', () => { + expect(parseStringFlag(['--role', '--other'], '--role')).toBeUndefined(); + }); + + it('returns undefined when no value follows', () => { + expect(parseStringFlag(['--role'], '--role')).toBeUndefined(); + }); +}); + +describe('parseLaunchArgs', () => { + it('returns empty object for no args', () => { + expect(parseLaunchArgs([])).toStrictEqual({}); + }); + + it('parses --force flag', () => { + expect(parseLaunchArgs(['--force'])).toStrictEqual({ force: true }); + }); + + it('parses --state value', () => { + expect(parseLaunchArgs(['--state', 'onboarding'])).toStrictEqual({ + stateMode: 'onboarding', + }); + }); + + it('parses --extension-path value', () => { + expect(parseLaunchArgs(['--extension-path', '/ext'])).toStrictEqual({ + extensionPath: '/ext', + }); + }); + + it('parses --goal value', () => { + expect(parseLaunchArgs(['--goal', 'test swap'])).toStrictEqual({ + goal: 'test swap', + }); + }); + + it('parses --flow-tags as comma-separated array', () => { + expect(parseLaunchArgs(['--flow-tags', 'send, swap'])).toStrictEqual({ + flowTags: ['send', 'swap'], + }); + }); + + it('parses multiple flags together', () => { + expect( + parseLaunchArgs(['--state', 'default', '--force', '--goal', 'test it']), + ).toStrictEqual({ + stateMode: 'default', + force: true, + goal: 'test it', + }); + }); + + it('exits for --state without value', () => { + expect(() => parseLaunchArgs(['--state'])).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --state requires a value (default|onboarding|custom)\n', + ); + }); + + it('exits for --state with flag as value', () => { + expect(() => parseLaunchArgs(['--state', '--force'])).toThrowError( + 'process.exit', + ); + }); + + it('exits for --extension-path without value', () => { + expect(() => parseLaunchArgs(['--extension-path'])).toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --extension-path requires a value\n', + ); + }); + + it('exits for --goal without value', () => { + expect(() => parseLaunchArgs(['--goal'])).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith('Error: --goal requires a value\n'); + }); + + it('exits for --flow-tags without value', () => { + expect(() => parseLaunchArgs(['--flow-tags'])).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --flow-tags requires a comma-separated value\n', + ); + }); + + it('parses --context value', () => { + expect(parseLaunchArgs(['--context', 'prod'])).toStrictEqual({ + context: 'prod', + }); + }); + + it('parses --context with other flags', () => { + expect( + parseLaunchArgs(['--context', 'prod', '--state', 'onboarding']), + ).toStrictEqual({ + context: 'prod', + stateMode: 'onboarding', + }); + }); + + it('exits for --context without value', () => { + expect(() => parseLaunchArgs(['--context'])).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + 'Error: --context requires a value (e2e|prod)\n', + ); + }); + + it('exits for --context with flag as value', () => { + expect(() => parseLaunchArgs(['--context', '--force'])).toThrowError( + 'process.exit', + ); + }); + + it('writes warning for unknown flags', () => { + parseLaunchArgs(['--unknown']); + expect(stderrSpy).toHaveBeenCalledWith( + "Warning: unknown launch flag '--unknown'\n", + ); + }); +}); + +describe('printHelp', () => { + it('writes help text to stdout', () => { + printHelp(); + expect(stdoutSpy).toHaveBeenCalledTimes(1); + const output = (stdoutSpy.mock.calls[0] as string[])[0]; + expect(output).toContain('mm — MetaMask CLI'); + expect(output).toContain('Usage:'); + expect(output).toContain('mm launch'); + }); +}); + +describe('resolveRuntime', () => { + it('returns node for node runtime', () => { + expect(resolveRuntime('/root', 'node')).toBe('node'); + }); + + it('returns bin path when runtime exists', () => { + vi.mocked(existsSync).mockReturnValue(true); + const result = resolveRuntime('/root', 'tsx'); + expect(result).toBe(path.join('/root', 'node_modules', '.bin', 'tsx')); + }); + + it('exits when runtime binary not found', () => { + vi.mocked(existsSync).mockReturnValue(false); + expect(() => resolveRuntime('/root', 'tsx')).toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining("Runtime 'tsx' not found"), + ); + }); +}); + +describe('sleep', () => { + it('resolves after delay', async () => { + vi.useFakeTimers(); + const promise = sleep(100); + vi.advanceTimersByTime(100); + expect(await promise).toBeUndefined(); + vi.useRealTimers(); + }); +}); + +describe('shutdownDaemon', () => { + it('sends SIGTERM and removes state', async () => { + const { removeDaemonState } = await import('../server/daemon-state.js'); + const killSpy = vi + .spyOn(process, 'kill') + .mockImplementation(vi.fn() as unknown as typeof process.kill); + + await shutdownDaemon('/root', { + port: 3000, + pid: 12345, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }); + + expect(killSpy).toHaveBeenCalledWith(12345, 'SIGTERM'); + expect(removeDaemonState).toHaveBeenCalledWith('/root'); + killSpy.mockRestore(); + }); + + it('ignores kill errors for dead processes', async () => { + const killSpy = vi.spyOn(process, 'kill').mockImplementation((() => { + throw new Error('ESRCH'); + }) as unknown as typeof process.kill); + + await shutdownDaemon('/root', { + port: 3000, + pid: 12345, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }); + + expect(killSpy).toHaveBeenCalled(); + killSpy.mockRestore(); + }); + + it('skips kill when pid is falsy', async () => { + const killSpy = vi + .spyOn(process, 'kill') + .mockImplementation(vi.fn() as unknown as typeof process.kill); + + await shutdownDaemon('/root', { + port: 3000, + pid: 0, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }); + + expect(killSpy).not.toHaveBeenCalled(); + killSpy.mockRestore(); + }); +}); + +describe('readDaemonConfig', () => { + it('reads and parses config from cosmiconfig', async () => { + mockSearch.mockResolvedValueOnce({ + config: { daemon: './my-daemon.ts', runtime: 'tsx' }, + filepath: '/project/mm-client-cli.config.ts', + isEmpty: false, + }); + + const result = await readDaemonConfig('/project'); + + expect(result).toStrictEqual({ + daemonPath: './my-daemon.ts', + runtime: 'tsx', + }); + expect(cosmiconfig).toHaveBeenCalledWith('mm-client-cli', { + searchPlaces: [ + 'mm-client-cli.config.ts', + 'mm-client-cli.config.js', + 'mm-client-cli.config.cjs', + 'mm-client-cli.config.mjs', + '.mm-client-clirc', + '.mm-client-clirc.json', + '.mm-client-clirc.yaml', + '.mm-client-clirc.yml', + '.mm-client-clirc.js', + '.mm-client-clirc.ts', + '.mm-client-clirc.cjs', + ], + stopDir: '/project', + }); + expect(mockSearch).toHaveBeenCalledWith('/project'); + }); + + it('defaults runtime to tsx when not specified', async () => { + mockSearch.mockResolvedValueOnce({ + config: { daemon: './d.ts' }, + filepath: '/project/mm-client-cli.config.ts', + isEmpty: false, + }); + + const result = await readDaemonConfig('/project'); + + expect(result.runtime).toBe('tsx'); + }); + + it('exits when no config file is found', async () => { + mockSearch.mockResolvedValueOnce(null); + + await expect(readDaemonConfig('/project')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('No mm-client-cli config found'), + ); + }); + + it('exits when config file is empty', async () => { + mockSearch.mockResolvedValueOnce({ + config: undefined, + filepath: '/project/mm-client-cli.config.ts', + isEmpty: true, + }); + + await expect(readDaemonConfig('/project')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('No mm-client-cli config found'), + ); + }); + + it('exits when daemon is not configured', async () => { + mockSearch.mockResolvedValueOnce({ + config: { runtime: 'tsx' }, + filepath: '/project/mm-client-cli.config.ts', + isEmpty: false, + }); + + await expect(readDaemonConfig('/project')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('No daemon entry point configured'), + ); + }); +}); + +describe('resolveWorktreeRoot', () => { + it('resolves path from --project flag', async () => { + vi.mocked(fs.realpath).mockResolvedValueOnce('/resolved/path'); + vi.mocked(fs.stat).mockResolvedValueOnce({ + isDirectory: () => true, + } as any); + + const result = await resolveWorktreeRoot('/some/path'); + expect(result).toBe('/resolved/path'); + }); + + it('resolves path from MM_PROJECT env when no flag', async () => { + const origEnv = process.env.MM_PROJECT; + process.env.MM_PROJECT = '/env/path'; + + vi.mocked(fs.realpath).mockResolvedValueOnce('/env/path'); + vi.mocked(fs.stat).mockResolvedValueOnce({ + isDirectory: () => true, + } as any); + + const result = await resolveWorktreeRoot(undefined); + expect(result).toBe('/env/path'); + + process.env.MM_PROJECT = origEnv; + }); + + it('exits when path does not exist', async () => { + vi.mocked(fs.realpath).mockRejectedValueOnce(new Error('ENOENT')); + + await expect(resolveWorktreeRoot('/bad/path')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('project path does not exist'), + ); + }); + + it('exits when path is not a directory', async () => { + vi.mocked(fs.realpath).mockResolvedValueOnce('/some/file.txt'); + vi.mocked(fs.stat).mockResolvedValueOnce({ + isDirectory: () => false, + } as any); + + await expect(resolveWorktreeRoot('/some/file.txt')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('project path is not a directory'), + ); + }); + + it('exits when stat fails', async () => { + vi.mocked(fs.realpath).mockResolvedValueOnce('/some/path'); + vi.mocked(fs.stat).mockRejectedValueOnce(new Error('EACCES')); + + await expect(resolveWorktreeRoot('/some/path')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('cannot access project path'), + ); + }); + + it('falls back to git worktree when no explicit path', async () => { + const origEnv = process.env.MM_PROJECT; + delete process.env.MM_PROJECT; + + const { execSync } = await import('node:child_process'); + vi.mocked(execSync).mockReturnValueOnce(Buffer.from('/git/root\n')); + + const result = await resolveWorktreeRoot(undefined); + expect(result).toBe('/git/root'); + + process.env.MM_PROJECT = origEnv; + }); + + it('exits when not in a git repository', async () => { + const origEnv = process.env.MM_PROJECT; + delete process.env.MM_PROJECT; + + const { execSync } = await import('node:child_process'); + vi.mocked(execSync).mockImplementation(() => { + throw new Error('not a git repo'); + }); + + await expect(resolveWorktreeRoot(undefined)).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('not in a git repository'), + ); + + process.env.MM_PROJECT = origEnv; + }); +}); + +describe('sendRequest', () => { + const originalFetch = globalThis.fetch; + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('sends GET request and prints JSON result', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, result: { status: 'running' } }), + } as Response); + + await sendRequest(3000, 'GET', '/status', null); + + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/status', + expect.objectContaining({ method: 'GET' }), + ); + expect(stdoutSpy).toHaveBeenCalled(); + }); + + it('sends POST request with JSON body', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, result: 'launched' }), + } as Response); + + await sendRequest(3000, 'POST', '/launch', { state: 'default' }); + + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/launch', + expect.objectContaining({ + method: 'POST', + body: '{"state":"default"}', + headers: { 'Content-Type': 'application/json' }, + }), + ); + }); + + it('prints string results directly', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, result: 'simple string' }), + } as Response); + + await sendRequest(3000, 'GET', '/status', null); + + expect(stdoutSpy).toHaveBeenCalledWith('simple string\n'); + }); + + it('exits on error response', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: false, + json: async () => ({ + ok: false, + error: { message: 'No session' }, + }), + } as Response); + + await expect( + sendRequest(3000, 'POST', '/tool/click', {}), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith('Error: No session\n'); + }); + + it('exits on ok:false in response body', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ + ok: false, + error: { message: 'Tool failed' }, + }), + } as Response); + + await expect( + sendRequest(3000, 'POST', '/tool/click', {}), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith('Error: Tool failed\n'); + }); + + it('retries transient errors', async () => { + let attempts = 0; + vi.spyOn(globalThis, 'fetch').mockImplementation(async () => { + attempts += 1; + if (attempts <= 2) { + throw new Error('ECONNREFUSED'); + } + return { + ok: true, + json: async () => ({ ok: true, result: 'ok' }), + } as Response; + }); + + await sendRequest(3000, 'GET', '/health', null); + + expect(attempts).toBe(3); + }); + + it('exits after max retries for transient errors', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED')); + + await expect( + sendRequest(3000, 'GET', '/health', null), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('ECONNREFUSED'), + ); + }); + + it('exits immediately for non-transient errors', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValue( + new Error('some other error'), + ); + + await expect( + sendRequest(3000, 'GET', '/health', null), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('some other error'), + ); + }); + + it('exits on request timeout (AbortError)', async () => { + const abortError = new Error('The operation was aborted'); + abortError.name = 'AbortError'; + vi.spyOn(globalThis, 'fetch').mockRejectedValue(abortError); + + await expect(sendRequest(3000, 'POST', '/launch', {})).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('request timed out'), + ); + }); + + it('falls back to data when no result key', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, status: 'running' }), + } as Response); + + await sendRequest(3000, 'GET', '/status', null); + + expect(stdoutSpy).toHaveBeenCalled(); + const output = (stdoutSpy.mock.calls[0] as string[])[0]; + expect(output).toContain('running'); + }); + + it('falls back to "Request failed" when error has no message', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: false, + json: async () => ({ ok: false }), + } as Response); + + await expect(sendRequest(3000, 'POST', '/tool/x', {})).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith('Error: Request failed\n'); + }); + + it('reaches the final fallback after repeated transient failures when exit does not throw', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED')); + exitSpy.mockRestore(); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => { + return undefined as never; + }) as never); + + await sendRequest(3000, 'GET', '/health', null); + + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('request failed after 4 attempts'), + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); +}); + +describe('routeCommand', () => { + const originalFetch = globalThis.fetch; + + beforeEach(() => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, result: {} }), + } as Response); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('routes status to GET /status', async () => { + await routeCommand('status', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/status', + expect.objectContaining({ method: 'GET' }), + ); + }); + + it('routes click with a11y ref', async () => { + await routeCommand('click', ['e3'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/click', + expect.objectContaining({ + body: JSON.stringify({ a11yRef: 'e3' }), + }), + ); + }); + + it('routes click with --selector', async () => { + await routeCommand('click', ['--selector', '.btn'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/click', + expect.objectContaining({ + body: JSON.stringify({ selector: '.btn' }), + }), + ); + }); + + it('exits when click has no target', async () => { + await expect(routeCommand('click', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('Usage: mm click'), + ); + }); + + it('routes type with ref and text', async () => { + await routeCommand('type', ['e1', 'hello'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/type', + expect.objectContaining({ + body: JSON.stringify({ a11yRef: 'e1', text: 'hello' }), + }), + ); + }); + + it('routes type with --testid', async () => { + await routeCommand('type', ['--testid', 'input', 'text'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/type', + expect.objectContaining({ + body: JSON.stringify({ testId: 'input', text: 'text' }), + }), + ); + }); + + it('exits when type has no target', async () => { + await expect(routeCommand('type', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('exits when type has no text', async () => { + await expect(routeCommand('type', ['e1'], 3000)).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith('Usage: mm type \n'); + }); + + it('routes describe-screen', async () => { + await routeCommand('describe-screen', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/describe_screen', + expect.anything(), + ); + }); + + it('routes screenshot', async () => { + await routeCommand('screenshot', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/screenshot', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes screenshot with --name', async () => { + await routeCommand('screenshot', ['--name', 'my-shot'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/screenshot', + expect.objectContaining({ + body: JSON.stringify({ name: 'my-shot' }), + }), + ); + }); + + it('routes wait-for with ref', async () => { + await routeCommand('wait-for', ['e5'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/wait_for', + expect.objectContaining({ + body: JSON.stringify({ a11yRef: 'e5' }), + }), + ); + }); + + it('routes wait-for with --timeout', async () => { + await routeCommand('wait-for', ['e5', '--timeout', '10000'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/wait_for', + expect.objectContaining({ + body: JSON.stringify({ a11yRef: 'e5', timeoutMs: 10000 }), + }), + ); + }); + + it('exits when wait-for has no target', async () => { + await expect(routeCommand('wait-for', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes navigate with url', async () => { + await routeCommand('navigate', ['http://example.com'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/navigate', + expect.objectContaining({ + body: JSON.stringify({ screen: 'url', url: 'http://example.com' }), + }), + ); + }); + + it('exits when navigate has no url', async () => { + await expect(routeCommand('navigate', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes navigate-home', async () => { + await routeCommand('navigate-home', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/navigate', + expect.objectContaining({ + body: JSON.stringify({ screen: 'home' }), + }), + ); + }); + + it('routes navigate-settings', async () => { + await routeCommand('navigate-settings', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/navigate', + expect.objectContaining({ + body: JSON.stringify({ screen: 'settings' }), + }), + ); + }); + + it('routes get-state', async () => { + await routeCommand('get-state', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_state', + expect.anything(), + ); + }); + + it('routes get-context', async () => { + await routeCommand('get-context', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_context', + expect.anything(), + ); + }); + + it('routes set-context with e2e', async () => { + await routeCommand('set-context', ['e2e'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/set_context', + expect.objectContaining({ + body: JSON.stringify({ context: 'e2e' }), + }), + ); + }); + + it('routes set-context with prod', async () => { + await routeCommand('set-context', ['prod'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/set_context', + expect.objectContaining({ + body: JSON.stringify({ context: 'prod' }), + }), + ); + }); + + it('exits when set-context has invalid value', async () => { + await expect( + routeCommand('set-context', ['other'], 3000), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + 'Usage: mm set-context \n', + ); + }); + + it('exits when set-context has no value', async () => { + await expect(routeCommand('set-context', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes build', async () => { + await routeCommand('build', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/build', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes build with --force', async () => { + await routeCommand('build', ['--force'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/build', + expect.objectContaining({ + body: JSON.stringify({ force: true }), + }), + ); + }); + + it('routes wait-for-notification', async () => { + await routeCommand('wait-for-notification', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/wait_for_notification', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes wait-for-notification with --timeout', async () => { + await routeCommand('wait-for-notification', ['--timeout', '5000'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/wait_for_notification', + expect.objectContaining({ + body: JSON.stringify({ timeoutMs: 5000 }), + }), + ); + }); + + it('routes switch-to-tab with --role', async () => { + await routeCommand('switch-to-tab', ['--role', 'extension'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/switch_to_tab', + expect.objectContaining({ + body: JSON.stringify({ role: 'extension' }), + }), + ); + }); + + it('routes switch-to-tab with --url', async () => { + await routeCommand('switch-to-tab', ['--url', 'http://dapp.io'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/switch_to_tab', + expect.objectContaining({ + body: JSON.stringify({ url: 'http://dapp.io' }), + }), + ); + }); + + it('routes switch-to-tab with positional role', async () => { + await routeCommand('switch-to-tab', ['dapp'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/switch_to_tab', + expect.objectContaining({ + body: JSON.stringify({ role: 'dapp' }), + }), + ); + }); + + it('exits when switch-to-tab has no flags', async () => { + await expect(routeCommand('switch-to-tab', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes get-text with positional ref', async () => { + await routeCommand('get-text', ['e1'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_text', + expect.objectContaining({ + body: JSON.stringify({ a11yRef: 'e1' }), + }), + ); + }); + + it('routes get-text with --testid', async () => { + await routeCommand('get-text', ['--testid', 'result-box'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_text', + expect.objectContaining({ + body: JSON.stringify({ testId: 'result-box' }), + }), + ); + }); + + it('routes get-text with --selector', async () => { + await routeCommand('get-text', ['--selector', '#output'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_text', + expect.objectContaining({ + body: JSON.stringify({ selector: '#output' }), + }), + ); + }); + + it('routes get-text with --within scoping', async () => { + await routeCommand( + 'get-text', + ['--testid', 'amount', '--within', 'testid:tx-row'], + 3000, + ); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_text', + expect.objectContaining({ + body: JSON.stringify({ + testId: 'amount', + within: { testId: 'tx-row' }, + }), + }), + ); + }); + + it('exits when get-text has no target', async () => { + await expect(routeCommand('get-text', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes close-tab with --role', async () => { + await routeCommand('close-tab', ['--role', 'dapp'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/close_tab', + expect.objectContaining({ + body: JSON.stringify({ role: 'dapp' }), + }), + ); + }); + + it('routes close-tab with --url', async () => { + await routeCommand('close-tab', ['--url', 'http://x.io'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/close_tab', + expect.objectContaining({ + body: JSON.stringify({ url: 'http://x.io' }), + }), + ); + }); + + it('exits when close-tab has no flags', async () => { + await expect(routeCommand('close-tab', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes clipboard read', async () => { + await routeCommand('clipboard', ['read'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/clipboard', + expect.objectContaining({ + body: JSON.stringify({ action: 'read' }), + }), + ); + }); + + it('routes clipboard write with text', async () => { + await routeCommand('clipboard', ['write', 'hello'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/clipboard', + expect.objectContaining({ + body: JSON.stringify({ action: 'write', text: 'hello' }), + }), + ); + }); + + it('exits when clipboard has invalid action', async () => { + await expect( + routeCommand('clipboard', ['invalid'], 3000), + ).rejects.toThrowError('process.exit'); + }); + + it('exits when clipboard has no action', async () => { + await expect(routeCommand('clipboard', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('exits when clipboard write has no text', async () => { + await expect( + routeCommand('clipboard', ['write'], 3000), + ).rejects.toThrowError('process.exit'); + }); + + it('routes seed-contract', async () => { + await routeCommand('seed-contract', ['hst'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/seed_contract', + expect.objectContaining({ + body: JSON.stringify({ contractName: 'hst' }), + }), + ); + }); + + it('routes seed-contract with --hardfork', async () => { + await routeCommand( + 'seed-contract', + ['hst', '--hardfork', 'shanghai'], + 3000, + ); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/seed_contract', + expect.objectContaining({ + body: JSON.stringify({ contractName: 'hst', hardfork: 'shanghai' }), + }), + ); + }); + + it('exits when seed-contract has no name', async () => { + await expect(routeCommand('seed-contract', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes seed-contracts with multiple names', async () => { + await routeCommand('seed-contracts', ['hst', 'nfts'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/seed_contracts', + expect.objectContaining({ + body: JSON.stringify({ contracts: ['hst', 'nfts'] }), + }), + ); + }); + + it('routes seed-contracts with --hardfork', async () => { + await routeCommand( + 'seed-contracts', + ['hst', '--hardfork', 'shanghai'], + 3000, + ); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/seed_contracts', + expect.objectContaining({ + body: expect.stringContaining('"hardfork":"shanghai"'), + }), + ); + }); + + it('exits when seed-contracts has no names', async () => { + await expect(routeCommand('seed-contracts', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('routes get-contract-address', async () => { + await routeCommand('get-contract-address', ['hst'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/get_contract_address', + expect.objectContaining({ + body: JSON.stringify({ contractName: 'hst' }), + }), + ); + }); + + it('exits when get-contract-address has no name', async () => { + await expect( + routeCommand('get-contract-address', [], 3000), + ).rejects.toThrowError('process.exit'); + }); + + it('routes list-contracts', async () => { + await routeCommand('list-contracts', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/list_contracts', + expect.anything(), + ); + }); + + it('routes list-testids', async () => { + await routeCommand('list-testids', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/list_testids', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes list-testids with --limit', async () => { + await routeCommand('list-testids', ['--limit', '50'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/list_testids', + expect.objectContaining({ + body: JSON.stringify({ limit: 50 }), + }), + ); + }); + + it('routes accessibility-snapshot', async () => { + await routeCommand('accessibility-snapshot', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/accessibility_snapshot', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes accessibility-snapshot with --root', async () => { + await routeCommand('accessibility-snapshot', ['--root', '#main'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/accessibility_snapshot', + expect.objectContaining({ + body: JSON.stringify({ rootSelector: '#main' }), + }), + ); + }); + + it('routes knowledge-search', async () => { + await routeCommand('knowledge-search', ['send flow'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/knowledge_search', + expect.objectContaining({ + body: JSON.stringify({ query: 'send flow' }), + }), + ); + }); + + it('exits when knowledge-search has no query', async () => { + await expect( + routeCommand('knowledge-search', [], 3000), + ).rejects.toThrowError('process.exit'); + }); + + it('routes knowledge-last', async () => { + await routeCommand('knowledge-last', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/knowledge_last', + expect.anything(), + ); + }); + + it('routes knowledge-sessions', async () => { + await routeCommand('knowledge-sessions', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/knowledge_sessions', + expect.anything(), + ); + }); + + it('routes knowledge-summarize', async () => { + await routeCommand('knowledge-summarize', [], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/knowledge_summarize', + expect.objectContaining({ body: JSON.stringify({}) }), + ); + }); + + it('routes knowledge-summarize with --session', async () => { + await routeCommand('knowledge-summarize', ['--session', 'sid'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/knowledge_summarize', + expect.objectContaining({ + body: JSON.stringify({ scope: { sessionId: 'sid' } }), + }), + ); + }); + + it('routes run-steps with JSON input', async () => { + const input = JSON.stringify({ + steps: [{ tool: 'click', args: { a11yRef: 'e1' } }], + }); + await routeCommand('run-steps', [input], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/run_steps', + expect.objectContaining({ body: input }), + ); + }); + + it('exits when run-steps has no input', async () => { + await expect(routeCommand('run-steps', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + }); + + it('exits when run-steps has invalid JSON', async () => { + await expect( + routeCommand('run-steps', ['{bad json}'], 3000), + ).rejects.toThrowError('process.exit'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('invalid JSON'), + ); + }); + + it('exits for unknown command', async () => { + await expect(routeCommand('unknown-cmd', [], 3000)).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining("unknown command 'unknown-cmd'"), + ); + }); +}); + +describe('discoverDaemon', () => { + it('returns existing alive daemon with matching version', async () => { + const { readDaemonState, isDaemonAlive, isDaemonVersionMatch } = + await import('../server/daemon-state.js'); + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + version: '1.0.0', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + vi.mocked(isDaemonVersionMatch).mockReturnValueOnce(true); + + const result = await discoverDaemon('/root', 'click'); + expect(result).toStrictEqual(mockState); + }); + + it('restarts daemon on version mismatch', async () => { + const { + readDaemonState, + isDaemonAlive, + isDaemonVersionMatch, + removeDaemonState, + } = await import('../server/daemon-state.js'); + + const killSpy = vi + .spyOn(process, 'kill') + .mockImplementation(vi.fn() as unknown as typeof process.kill); + + const oldState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + version: '0.0.1', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(oldState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + vi.mocked(isDaemonVersionMatch).mockReturnValueOnce(false); + + await expect(discoverDaemon('/root', 'click')).rejects.toThrowError( + 'process.exit', + ); + + expect(removeDaemonState).toHaveBeenCalledWith('/root'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('Daemon version mismatch'), + ); + + killSpy.mockRestore(); + }); + + it('auto-starts daemon for launch command when no daemon running', async () => { + const { + readDaemonState, + isDaemonAlive, + acquireStartupLock, + releaseStartupLock, + } = await import('../server/daemon-state.js'); + + vi.mocked(readDaemonState).mockResolvedValueOnce(null); + + vi.mocked(acquireStartupLock).mockResolvedValueOnce(true); + + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + const result = await discoverDaemon('/root', 'launch'); + + expect(result).toStrictEqual(mockState); + expect(releaseStartupLock).toHaveBeenCalledWith('/root'); + }); + + it('removes stale daemon state when not alive', async () => { + const { readDaemonState, isDaemonAlive, removeDaemonState } = + await import('../server/daemon-state.js'); + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(false); + + await expect(discoverDaemon('/root', 'click')).rejects.toThrowError( + 'process.exit', + ); + + expect(removeDaemonState).toHaveBeenCalledWith('/root'); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('no daemon running'), + ); + }); + + it('exits for non-auto-start commands when no daemon', async () => { + const { readDaemonState } = await import('../server/daemon-state.js'); + vi.mocked(readDaemonState).mockResolvedValueOnce(null); + + await expect(discoverDaemon('/root', 'status')).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('no daemon running'), + ); + }); +}); + +describe('waitForDaemon', () => { + it('returns daemon state when daemon becomes alive', async () => { + const { readDaemonState, isDaemonAlive } = + await import('../server/daemon-state.js'); + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState) + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + vi.useFakeTimers(); + const promise = waitForDaemon('/root'); + for (let i = 0; i < 3; i++) { + await vi.advanceTimersByTimeAsync(200); + } + const result = await promise; + vi.useRealTimers(); + + expect(result).toStrictEqual(mockState); + }); + + it('throws when daemon fails to start within timeout', async () => { + const { readDaemonState } = await import('../server/daemon-state.js'); + vi.mocked(readDaemonState).mockResolvedValue(null); + + vi.useFakeTimers(); + const promise = waitForDaemon('/root').catch((error: Error) => error); + for (let i = 0; i < 55; i++) { + await vi.advanceTimersByTimeAsync(200); + } + const result = await promise; + expect(result).toBeInstanceOf(Error); + expect((result as Error).message).toContain('Daemon failed to start'); + vi.useRealTimers(); + }); +}); + +describe('main', () => { + it('prints help when no args', async () => { + const origArgv = process.argv; + process.argv = ['node', 'mm']; + + await expect(main()).rejects.toThrowError('process.exit'); + expect(stdoutSpy).toHaveBeenCalledWith(expect.stringContaining('mm —')); + + process.argv = origArgv; + }); + + it('prints help for --help flag', async () => { + const origArgv = process.argv; + process.argv = ['node', 'mm', '--help']; + + await expect(main()).rejects.toThrowError('process.exit'); + expect(stdoutSpy).toHaveBeenCalledWith(expect.stringContaining('Usage:')); + + process.argv = origArgv; + }); + + it('prints help for -h flag', async () => { + const origArgv = process.argv; + process.argv = ['node', 'mm', '-h']; + + await expect(main()).rejects.toThrowError('process.exit'); + + process.argv = origArgv; + }); +}); + +describe('type command --selector/--testid text resolution', () => { + const originalFetch = globalThis.fetch; + + beforeEach(() => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ({ ok: true, result: {} }), + } as Response); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it('routes type with --selector and text after selector', async () => { + await routeCommand('type', ['--selector', '.input', 'hello world'], 3000); + expect(globalThis.fetch).toHaveBeenCalledWith( + 'http://127.0.0.1:3000/tool/type', + expect.objectContaining({ + body: JSON.stringify({ + selector: '.input', + text: 'hello world', + }), + }), + ); + }); +}); + +describe('handleServe', () => { + it('exits when daemon is already running', async () => { + const { readDaemonState, isDaemonAlive } = + await import('../server/daemon-state.js'); + vi.mocked(readDaemonState).mockResolvedValueOnce({ + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + await expect(handleServe('/root', false)).rejects.toThrowError( + 'process.exit', + ); + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('daemon already running'), + ); + }); + + it('starts daemon in background mode', async () => { + const { readDaemonState, isDaemonAlive } = + await import('../server/daemon-state.js'); + const { spawn } = await import('node:child_process'); + + vi.mocked(readDaemonState).mockResolvedValueOnce(null); + + vi.mocked(existsSync).mockReturnValue(true); + mockSearch.mockResolvedValueOnce({ + config: { daemon: './daemon.ts', runtime: 'node' }, + filepath: '/root/mm-client-cli.config.ts', + isEmpty: false, + }); + + const mockState = { + port: 4000, + pid: 456, + nonce: 'xyz', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState) + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + vi.useFakeTimers(); + const promise = handleServe('/root', true); + for (let i = 0; i < 3; i++) { + await vi.advanceTimersByTimeAsync(200); + } + await promise; + vi.useRealTimers(); + + expect(spawn).toHaveBeenCalledWith('node', ['./daemon.ts'], { + detached: true, + stdio: ['ignore', 'ignore', 'ignore'], + cwd: '/root', + }); + expect(stdoutSpy).toHaveBeenCalledWith( + 'Daemon started on port 4000 (PID 456)\n', + ); + }); + + it('cleans stale state before starting', async () => { + const { readDaemonState, isDaemonAlive, removeDaemonState } = + await import('../server/daemon-state.js'); + const staleState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(staleState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(false); + + vi.mocked(existsSync).mockReturnValue(true); + mockSearch.mockResolvedValueOnce({ + config: { daemon: './d.ts', runtime: 'node' }, + filepath: '/root/mm-client-cli.config.ts', + isEmpty: false, + }); + + const { spawn } = await import('node:child_process'); + vi.mocked(spawn).mockReturnValue({ + stdio: 'inherit', + on: vi.fn((event: string, handler: (code: number | null) => void) => { + if (event === 'exit') { + setTimeout(() => handler(0), 10); + } + }), + } as any); + + const promise = handleServe('/root', false); + await new Promise((resolve) => setTimeout(resolve, 50)); + await promise; + + expect(removeDaemonState).toHaveBeenCalledWith('/root'); + }); +}); + +describe('autoStartDaemon', () => { + it('returns existing daemon if one appeared after locking', async () => { + const { + acquireStartupLock, + readDaemonState, + isDaemonAlive, + releaseStartupLock, + } = await import('../server/daemon-state.js'); + + vi.mocked(acquireStartupLock).mockResolvedValueOnce(true); + + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState).mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + const result = await autoStartDaemon('/root'); + + expect(result).toStrictEqual(mockState); + expect(releaseStartupLock).toHaveBeenCalledWith('/root'); + }); + + it('spawns daemon when no existing daemon is found', async () => { + const { + acquireStartupLock, + readDaemonState, + isDaemonAlive, + releaseStartupLock, + } = await import('../server/daemon-state.js'); + const { spawn } = await import('node:child_process'); + + vi.mocked(acquireStartupLock).mockResolvedValueOnce(true); + vi.mocked(readDaemonState).mockResolvedValueOnce(null); + + vi.mocked(existsSync).mockReturnValue(true); + mockSearch.mockResolvedValueOnce({ + config: { daemon: './daemon.ts', runtime: 'node' }, + filepath: '/root/mm-client-cli.config.ts', + isEmpty: false, + }); + + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState) + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + vi.useFakeTimers(); + const promise = autoStartDaemon('/root'); + for (let i = 0; i < 3; i++) { + await vi.advanceTimersByTimeAsync(200); + } + const result = await promise; + vi.useRealTimers(); + + expect(spawn).toHaveBeenCalledWith('node', ['./daemon.ts'], { + detached: true, + stdio: ['ignore', 'ignore', 'ignore'], + cwd: '/root', + }); + expect(releaseStartupLock).toHaveBeenCalledWith('/root'); + expect(result).toStrictEqual(mockState); + }); + + it('waits when lock is held by another process', async () => { + const { acquireStartupLock, readDaemonState, isDaemonAlive } = + await import('../server/daemon-state.js'); + + vi.mocked(acquireStartupLock).mockResolvedValueOnce(false); + + const mockState = { + port: 3000, + pid: 123, + nonce: 'abc', + startedAt: '2024-01-01', + subPorts: { anvil: 8545, fixture: 8546, mock: 8547 }, + }; + vi.mocked(readDaemonState) + .mockResolvedValueOnce(null) + .mockResolvedValueOnce(mockState); + vi.mocked(isDaemonAlive).mockResolvedValueOnce(true); + + vi.useFakeTimers(); + const promise = autoStartDaemon('/root'); + for (let i = 0; i < 3; i++) { + await vi.advanceTimersByTimeAsync(200); + } + const result = await promise; + vi.useRealTimers(); + + expect(result).toStrictEqual(mockState); + }); +}); diff --git a/src/cli/mm.ts b/src/cli/mm.ts new file mode 100644 index 0000000..ed8a896 --- /dev/null +++ b/src/cli/mm.ts @@ -0,0 +1,1139 @@ +#!/usr/bin/env node +import { cosmiconfig } from 'cosmiconfig'; +import { execSync, spawn } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; + +import pkg from '../../package.json'; +import { + acquireStartupLock, + isDaemonAlive, + isDaemonVersionMatch, + readDaemonState, + releaseStartupLock, + removeDaemonState, +} from '../server/daemon-state.js'; +import type { DaemonState } from '../types/http.js'; + +const COMMAND_TIMEOUTS_MS: Record = { + launch: 120_000, + cleanup: 30_000, + default: 30_000, +}; + +const AUTO_START_COMMANDS = new Set(['launch', 'serve']); + +const DAEMON_POLL_INTERVAL_MS = 200; +const DAEMON_POLL_MAX_ATTEMPTS = 50; // 50 * 200ms = 10s +const SEND_MAX_RETRIES = 3; +const SEND_RETRY_BASE_DELAY_MS = 200; +const CONFIG_MODULE_NAME = 'mm-client-cli'; + +/** + * Configuration shape for mm-client-cli config files. + * Used in mm-client-cli.config.ts or equivalent. + */ +export type MmClientCliConfig = { + /** Path to the daemon entry point (TypeScript or JavaScript file). */ + daemon: string; + /** TypeScript runner to use. Defaults to 'tsx'. */ + runtime?: string; +}; + +type DaemonConfig = { + daemonPath: string; + runtime: string; +}; + +/** + * Extracts and consumes the `--project ` flag from argv, returning + * the remaining args and the extracted project path (if any). + * + * @param argv - Raw CLI arguments (after the node/script entries). + * @returns The remaining arguments and the optional project path. + */ +export function extractProjectFlag(argv: string[]): { + args: string[]; + projectPath: string | undefined; +} { + const idx = argv.indexOf('--project'); + if (idx < 0) { + return { args: argv, projectPath: undefined }; + } + const value = argv[idx + 1]; + if (!value || value.startsWith('--')) { + process.stderr.write('Error: --project requires a path value\n'); + process.exit(1); + } + const remaining = [...argv.slice(0, idx), ...argv.slice(idx + 2)]; + return { args: remaining, projectPath: value }; +} + +/** + * Resolves the target project root directory using the following precedence: + * 1. `--project ` CLI flag + * 2. `MM_PROJECT` environment variable + * 3. `git rev-parse --show-toplevel` (current working directory) + * + * Both explicit sources accept absolute or relative paths (resolved from cwd). + * The resolved path is normalized via `fs.realpath` to handle symlinks. + * + * @param projectFlag - The value of `--project`, if provided. + * @returns The absolute, real path to the project root. + */ +export async function resolveWorktreeRoot( + projectFlag: string | undefined, +): Promise { + const explicit = projectFlag ?? process.env.MM_PROJECT; + + if (explicit) { + const resolved = path.resolve(process.cwd(), explicit); + let real: string; + try { + real = await fs.realpath(resolved); + } catch { + process.stderr.write(`Error: project path does not exist: ${resolved}\n`); + process.exit(1); + } + + try { + const stat = await fs.stat(real); + if (!stat.isDirectory()) { + process.stderr.write( + `Error: project path is not a directory: ${real}\n`, + ); + process.exit(1); + } + } catch { + process.stderr.write(`Error: cannot access project path: ${real}\n`); + process.exit(1); + } + + return real; + } + + try { + return execSync('git rev-parse --show-toplevel', { + stdio: ['pipe', 'pipe', 'pipe'], + }) + .toString() + .trim(); + } catch { + process.stderr.write( + 'Error: not in a git repository. Use --project or set MM_PROJECT to target a project.\n', + ); + return process.exit(1); + } +} + +/** + * CLI entry point that parses arguments and routes to the appropriate handler. + */ +export async function main(): Promise { + const { args: remainingArgs, projectPath } = extractProjectFlag( + process.argv.slice(2), + ); + + if ( + remainingArgs.length === 0 || + remainingArgs[0] === '--help' || + remainingArgs[0] === '-h' + ) { + printHelp(); + process.exit(0); + } + + const worktreeRoot = await resolveWorktreeRoot(projectPath); + const args = remainingArgs; + const command = args[0]; + + // mm serve manages daemon lifecycle directly (no discovery needed) + if (command === 'serve') { + const background = args.includes('--background'); + await handleServe(worktreeRoot, background); + return; + } + + // Discover existing daemon or auto-start for launch + const daemonState = await discoverDaemon(worktreeRoot, command); + + if (command === 'launch') { + const launchArgs = parseLaunchArgs(args.slice(1)); + await sendRequest(daemonState.port, 'POST', '/launch', launchArgs); + return; + } + + if (command === 'cleanup') { + const shutdown = args.includes('--shutdown'); + await sendRequest(daemonState.port, 'POST', '/cleanup', {}); + if (shutdown) { + await shutdownDaemon(worktreeRoot, daemonState); + } + return; + } + + await routeCommand(command, args.slice(1), daemonState.port); +} + +/** + * Resolves `--within` scoping from CLI arguments. + * + * @param args - The CLI arguments to scan. + * @returns A within target object, or undefined if `--within` is absent. + */ +export function resolveWithinFromArgs( + args: string[], +): { a11yRef: string } | { testId: string } | { selector: string } | undefined { + const withinIdx = args.indexOf('--within'); + if (withinIdx < 0) { + return undefined; + } + const val = args[withinIdx + 1]; + if (!val || val.startsWith('--')) { + process.stderr.write('Error: --within requires a value\n'); + process.exit(1); + } + + // "testid:value" → testId, "selector:value" → selector, otherwise auto-detect + if (val.startsWith('testid:')) { + return { testId: val.slice('testid:'.length) }; + } + if (val.startsWith('selector:')) { + return { selector: val.slice('selector:'.length) }; + } + return /^e[0-9]+$/u.test(val) ? { a11yRef: val } : { testId: val }; +} + +/** + * Resolves element targeting from CLI arguments. Supports three targeting modes: + * --selector → CSS selector (explicit) + * --testid → data-testid value (explicit) + * positional arg → a11yRef if /^e\d+$/, otherwise testId (auto-detected) + * + * @param args - The CLI arguments after the command name. + * @returns An object with exactly one of `a11yRef`, `testId`, or `selector`. + */ +export function resolveTargetFromArgs( + args: string[], +): { a11yRef: string } | { testId: string } | { selector: string } { + const selectorIdx = args.indexOf('--selector'); + if (selectorIdx >= 0) { + const val = args[selectorIdx + 1]; + if (!val || val.startsWith('--')) { + process.stderr.write('Error: --selector requires a value\n'); + process.exit(1); + } + return { selector: val }; + } + + const testIdIdx = args.indexOf('--testid'); + if (testIdIdx >= 0) { + const val = args[testIdIdx + 1]; + if (!val || val.startsWith('--')) { + process.stderr.write('Error: --testid requires a value\n'); + process.exit(1); + } + return { testId: val }; + } + + const target = args[0]; + if (!target) { + process.stderr.write('Error: element target is required\n'); + process.exit(1); + } + return /^e[0-9]+$/u.test(target) ? { a11yRef: target } : { testId: target }; +} + +/** + * Returns the positional target argument from a CLI args list, + * skipping any --flag/value pairs. + * + * @param args - The CLI arguments to scan. + * @returns The first non-flag argument, or undefined. + */ +export function getPositionalTarget(args: string[]): string | undefined { + for (let i = 0; i < args.length; i++) { + if (args[i].startsWith('--')) { + i += 1; + continue; + } + return args[i]; + } + return undefined; +} + +/** + * Routes a CLI command to the appropriate daemon HTTP endpoint. + * + * @param command - The CLI command to route. + * @param args - Additional arguments for the command. + * @param port - The daemon HTTP server port. + */ +export async function routeCommand( + command: string, + args: string[], + port: number, +): Promise { + switch (command) { + case 'status': + await sendRequest(port, 'GET', '/status', null); + break; + case 'click': { + const target = getPositionalTarget(args); + if ( + !target && + !args.includes('--selector') && + !args.includes('--testid') + ) { + process.stderr.write( + 'Usage: mm click [--selector ] [--testid ] [--within ]\n', + ); + process.exit(1); + } + const clickWithin = resolveWithinFromArgs(args); + await sendRequest(port, 'POST', '/tool/click', { + ...resolveTargetFromArgs(args), + ...(clickWithin ? { within: clickWithin } : {}), + }); + break; + } + case 'type': { + const typeTarget = getPositionalTarget(args); + if ( + !typeTarget && + !args.includes('--selector') && + !args.includes('--testid') + ) { + process.stderr.write( + 'Usage: mm type [--selector ] [--testid ] [--within ]\n', + ); + process.exit(1); + } + let textArgIdx = 1; + if (args.includes('--selector')) { + textArgIdx = args.indexOf('--selector') + 2; + } else if (args.includes('--testid')) { + textArgIdx = args.indexOf('--testid') + 2; + } + const text = args[textArgIdx] ?? args[1]; + if (text === undefined) { + process.stderr.write('Usage: mm type \n'); + process.exit(1); + } + const typeWithin = resolveWithinFromArgs(args); + await sendRequest(port, 'POST', '/tool/type', { + ...resolveTargetFromArgs(args), + text, + ...(typeWithin ? { within: typeWithin } : {}), + }); + break; + } + case 'get-text': { + const getTextTarget = getPositionalTarget(args); + if ( + !getTextTarget && + !args.includes('--selector') && + !args.includes('--testid') + ) { + process.stderr.write( + 'Usage: mm get-text [--selector ] [--testid ] [--within ]\n', + ); + process.exit(1); + } + const getTextWithin = resolveWithinFromArgs(args); + await sendRequest(port, 'POST', '/tool/get_text', { + ...resolveTargetFromArgs(args), + ...(getTextWithin ? { within: getTextWithin } : {}), + }); + break; + } + case 'describe-screen': + await sendRequest(port, 'POST', '/tool/describe_screen', {}); + break; + case 'screenshot': { + const nameIdx = args.indexOf('--name'); + const name = nameIdx >= 0 ? args[nameIdx + 1] : undefined; + await sendRequest(port, 'POST', '/tool/screenshot', name ? { name } : {}); + break; + } + case 'wait-for': { + const waitTarget = getPositionalTarget(args); + if ( + !waitTarget && + !args.includes('--selector') && + !args.includes('--testid') + ) { + process.stderr.write( + 'Usage: mm wait-for [--timeout ] [--selector ] [--testid ] [--within ]\n', + ); + process.exit(1); + } + const timeoutMs = parseIntFlag(args, '--timeout'); + const waitWithin = resolveWithinFromArgs(args); + await sendRequest(port, 'POST', '/tool/wait_for', { + ...resolveTargetFromArgs(args), + ...(timeoutMs === undefined ? {} : { timeoutMs }), + ...(waitWithin ? { within: waitWithin } : {}), + }); + break; + } + case 'navigate': + if (!args[0]) { + process.stderr.write('Usage: mm navigate \n'); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/navigate', { + screen: 'url', + url: args[0], + }); + break; + case 'navigate-home': + await sendRequest(port, 'POST', '/tool/navigate', { screen: 'home' }); + break; + case 'navigate-settings': + await sendRequest(port, 'POST', '/tool/navigate', { + screen: 'settings', + }); + break; + case 'get-state': + await sendRequest(port, 'POST', '/tool/get_state', {}); + break; + case 'get-context': + await sendRequest(port, 'POST', '/tool/get_context', {}); + break; + case 'set-context': + if (!args[0] || (args[0] !== 'e2e' && args[0] !== 'prod')) { + process.stderr.write('Usage: mm set-context \n'); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/set_context', { + context: args[0], + }); + break; + case 'build': { + const buildForce = args.includes('--force'); + await sendRequest(port, 'POST', '/tool/build', { + ...(buildForce ? { force: true } : {}), + }); + break; + } + case 'wait-for-notification': { + const notifTimeout = parseIntFlag(args, '--timeout'); + await sendRequest(port, 'POST', '/tool/wait_for_notification', { + ...(notifTimeout === undefined ? {} : { timeoutMs: notifTimeout }), + }); + break; + } + case 'switch-to-tab': { + const tabRole = parseStringFlag(args, '--role'); + const tabUrl = parseStringFlag(args, '--url'); + // Support positional arg as role: mm switch-to-tab dapp + const positionalRole = + !tabRole && !tabUrl ? getPositionalTarget(args) : undefined; + const resolvedRole = tabRole ?? positionalRole; + if (!resolvedRole && !tabUrl) { + process.stderr.write( + 'Usage: mm switch-to-tab | --role | --url \n', + ); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/switch_to_tab', { + ...(resolvedRole ? { role: resolvedRole } : {}), + ...(tabUrl ? { url: tabUrl } : {}), + }); + break; + } + case 'close-tab': { + const closeRole = parseStringFlag(args, '--role'); + const closeUrl = parseStringFlag(args, '--url'); + if (!closeRole && !closeUrl) { + process.stderr.write( + 'Usage: mm close-tab --role | --url \n', + ); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/close_tab', { + ...(closeRole ? { role: closeRole } : {}), + ...(closeUrl ? { url: closeUrl } : {}), + }); + break; + } + case 'clipboard': { + const clipAction = args[0]; + if (!clipAction || (clipAction !== 'read' && clipAction !== 'write')) { + process.stderr.write('Usage: mm clipboard [text]\n'); + process.exit(1); + } + if (clipAction === 'write' && !args[1]) { + process.stderr.write('Usage: mm clipboard write \n'); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/clipboard', { + action: clipAction, + ...(clipAction === 'write' ? { text: args[1] } : {}), + }); + break; + } + case 'seed-contract': { + if (!args[0]) { + process.stderr.write( + 'Usage: mm seed-contract [--hardfork ]\n', + ); + process.exit(1); + } + const hardfork = parseStringFlag(args, '--hardfork'); + await sendRequest(port, 'POST', '/tool/seed_contract', { + contractName: args[0], + ...(hardfork ? { hardfork } : {}), + }); + break; + } + case 'seed-contracts': { + const contractNames = args.filter( + (a) => + !a.startsWith('--') && args[args.indexOf(a) - 1] !== '--hardfork', + ); + if (contractNames.length === 0) { + process.stderr.write( + 'Usage: mm seed-contracts ... [--hardfork ]\n', + ); + process.exit(1); + } + const seedHardfork = parseStringFlag(args, '--hardfork'); + await sendRequest(port, 'POST', '/tool/seed_contracts', { + contracts: contractNames, + ...(seedHardfork ? { hardfork: seedHardfork } : {}), + }); + break; + } + case 'get-contract-address': + if (!args[0]) { + process.stderr.write('Usage: mm get-contract-address \n'); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/get_contract_address', { + contractName: args[0], + }); + break; + case 'list-contracts': + await sendRequest(port, 'POST', '/tool/list_contracts', {}); + break; + case 'list-testids': { + const testIdLimit = parseIntFlag(args, '--limit'); + await sendRequest(port, 'POST', '/tool/list_testids', { + ...(testIdLimit === undefined ? {} : { limit: testIdLimit }), + }); + break; + } + case 'accessibility-snapshot': { + const rootSelector = parseStringFlag(args, '--root'); + await sendRequest(port, 'POST', '/tool/accessibility_snapshot', { + ...(rootSelector ? { rootSelector } : {}), + }); + break; + } + case 'knowledge-search': + if (!args[0]) { + process.stderr.write('Usage: mm knowledge-search \n'); + process.exit(1); + } + await sendRequest(port, 'POST', '/tool/knowledge_search', { + query: args[0], + }); + break; + case 'knowledge-last': + await sendRequest(port, 'POST', '/tool/knowledge_last', {}); + break; + case 'knowledge-sessions': + await sendRequest(port, 'POST', '/tool/knowledge_sessions', {}); + break; + case 'knowledge-summarize': { + const summarizeSession = parseStringFlag(args, '--session'); + await sendRequest(port, 'POST', '/tool/knowledge_summarize', { + ...(summarizeSession ? { scope: { sessionId: summarizeSession } } : {}), + }); + break; + } + case 'run-steps': + if (!args[0]) { + process.stderr.write( + 'Usage: mm run-steps \'{"steps":[{"tool":"click","args":{"a11yRef":"e1"}}]}\'\n', + ); + process.exit(1); + } + try { + await sendRequest( + port, + 'POST', + '/tool/run_steps', + JSON.parse(args[0]) as Record, + ); + } catch (error) { + if (error instanceof SyntaxError) { + process.stderr.write(`Error: invalid JSON — ${error.message}\n`); + process.exit(1); + } + /* istanbul ignore next -- non-SyntaxError path depends on delegated failures */ + throw error; + } + break; + default: + process.stderr.write( + `Error: unknown command '${command}'. Run 'mm --help' for usage.\n`, + ); + process.exit(1); + } +} + +/** + * Checks whether a fetch error is transient and worth retrying. + * Only network-level failures are retried — HTTP responses (even errors) are not. + * + * @param error - The caught error from a fetch attempt. + * @returns Whether the error is transient. + */ +export function isTransientError(error: unknown): boolean { + const message = String(error); + return ( + message.includes('ECONNREFUSED') || + message.includes('ECONNRESET') || + message.includes('EPIPE') || + message.includes('UND_ERR_SOCKET') || + message.includes('fetch failed') + ); +} + +/** + * Sends an HTTP request to the daemon and prints the response. + * Retries transient network errors (ECONNREFUSED, ECONNRESET, etc.) + * with linear backoff up to SEND_MAX_RETRIES times. + * + * @param port - The daemon HTTP server port. + * @param method - The HTTP method to use. + * @param requestPath - The URL path for the request. + * @param body - The request body payload, or null for no body. + */ +export async function sendRequest( + port: number, + method: string, + requestPath: string, + body: unknown, +): Promise { + const commandName = requestPath.split('/').pop() ?? ''; + const timeout = + COMMAND_TIMEOUTS_MS[commandName] ?? COMMAND_TIMEOUTS_MS.default; + + let lastError: unknown; + + for (let attempt = 0; attempt <= SEND_MAX_RETRIES; attempt++) { + if (attempt > 0) { + await sleep(SEND_RETRY_BASE_DELAY_MS * attempt); + } + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeout); + + try { + const headers: Record = {}; + if (body !== null) { + headers['Content-Type'] = 'application/json'; + } + const options: RequestInit = { + method, + signal: controller.signal, + headers, + ...(body === null ? {} : { body: JSON.stringify(body) }), + }; + const response = await fetch( + `http://127.0.0.1:${port}${requestPath}`, + options, + ); + const data = (await response.json()) as Record; + + if (!response.ok || data.ok === false) { + const errorData = data.error as { message?: string } | undefined; + process.stderr.write( + `Error: ${errorData?.message ?? 'Request failed'}\n`, + ); + process.exit(1); + } + + const result = data.result ?? data; + const observations = data.observations as + | Record + | undefined; + let output: unknown = result; + if (observations) { + const base = + typeof result === 'object' && result !== null + ? (result as Record) + : { result }; + output = { ...base, observations }; + } + if (typeof output === 'string') { + process.stdout.write(`${output}\n`); + } else { + process.stdout.write(`${JSON.stringify(output, null, 2)}\n`); + } + return; + } catch (error) { + if ((error as Error).name === 'AbortError') { + process.stderr.write(`Error: request timed out after ${timeout}ms\n`); + process.exit(1); + } + + if (isTransientError(error) && attempt < SEND_MAX_RETRIES) { + lastError = error; + continue; + } + + process.stderr.write(`Error: ${String(error)}\n`); + process.exit(1); + } finally { + clearTimeout(timer); + } + } + + process.stderr.write( + `Error: request failed after ${SEND_MAX_RETRIES + 1} attempts: ${String(lastError)}\n`, + ); + process.exit(1); +} + +/** + * Discovers a running daemon or auto-starts one for eligible commands. + * + * @param worktreeRoot - The git worktree root directory. + * @param command - The CLI command being executed. + * @returns The daemon state with connection details. + */ +export async function discoverDaemon( + worktreeRoot: string, + command: string, +): Promise { + let state = await readDaemonState(worktreeRoot); + + if (state) { + const alive = await isDaemonAlive(state); + if (alive) { + if (isDaemonVersionMatch(state)) { + return state; + } + + process.stderr.write( + `Daemon version mismatch (running: ${state.version ?? 'unknown'}, cli: ${pkg.version}). Restarting...\n`, + ); + await shutdownDaemon(worktreeRoot, state); + state = null; + } else { + await removeDaemonState(worktreeRoot); + state = null; + } + } + + if (!AUTO_START_COMMANDS.has(command)) { + process.stderr.write( + 'Error: no daemon running. Run `mm launch` to start.\n', + ); + process.exit(1); + } + + return autoStartDaemon(worktreeRoot); +} + +/** + * Spawns a new daemon process and waits for it to become ready. + * + * @param worktreeRoot - The git worktree root directory. + * @returns The daemon state once it is alive. + */ +export async function autoStartDaemon( + worktreeRoot: string, +): Promise { + const locked = await acquireStartupLock(worktreeRoot); + if (!locked) { + return waitForDaemon(worktreeRoot); + } + + try { + const existingState = await readDaemonState(worktreeRoot); + if (existingState && (await isDaemonAlive(existingState))) { + return existingState; + } + + const config = await readDaemonConfig(worktreeRoot); + const runtimeBin = resolveRuntime(worktreeRoot, config.runtime); + + const child = spawn(runtimeBin, [config.daemonPath], { + detached: true, + stdio: ['ignore', 'ignore', 'ignore'], + cwd: worktreeRoot, + }); + child.unref(); + + return await waitForDaemon(worktreeRoot); + } finally { + await releaseStartupLock(worktreeRoot); + } +} + +/** + * Starts the daemon in foreground or background mode. + * + * @param worktreeRoot - The git worktree root directory. + * @param background - Whether to run the daemon as a detached background process. + */ +export async function handleServe( + worktreeRoot: string, + background: boolean, +): Promise { + const existing = await readDaemonState(worktreeRoot); + if (existing && (await isDaemonAlive(existing))) { + process.stderr.write( + `Error: daemon already running on port ${existing.port} (PID ${existing.pid})\n`, + ); + process.exit(1); + } + + if (existing) { + await removeDaemonState(worktreeRoot); + } + + const config = await readDaemonConfig(worktreeRoot); + const runtimeBin = resolveRuntime(worktreeRoot, config.runtime); + + if (background) { + const child = spawn(runtimeBin, [config.daemonPath], { + detached: true, + stdio: ['ignore', 'ignore', 'ignore'], + cwd: worktreeRoot, + }); + child.unref(); + + const state = await waitForDaemon(worktreeRoot); + process.stdout.write( + `Daemon started on port ${state.port} (PID ${state.pid})\n`, + ); + return; + } + + const child = spawn(runtimeBin, [config.daemonPath], { + stdio: 'inherit', + cwd: worktreeRoot, + }); + + await new Promise((resolve) => { + child.on('exit', (code) => { + process.exitCode = code ?? 0; + resolve(); + }); + }); +} + +/** + * Reads the daemon configuration using cosmiconfig file discovery. + * + * Searches for configuration files (e.g., mm-client-cli.config.ts) + * starting from the worktree root directory. + * + * @param worktreeRoot - The git worktree root directory. + * @returns The daemon path and runtime configuration. + */ +export async function readDaemonConfig( + worktreeRoot: string, +): Promise { + const explorer = cosmiconfig(CONFIG_MODULE_NAME, { + searchPlaces: [ + `${CONFIG_MODULE_NAME}.config.ts`, + `${CONFIG_MODULE_NAME}.config.js`, + `${CONFIG_MODULE_NAME}.config.cjs`, + `${CONFIG_MODULE_NAME}.config.mjs`, + `.${CONFIG_MODULE_NAME}rc`, + `.${CONFIG_MODULE_NAME}rc.json`, + `.${CONFIG_MODULE_NAME}rc.yaml`, + `.${CONFIG_MODULE_NAME}rc.yml`, + `.${CONFIG_MODULE_NAME}rc.js`, + `.${CONFIG_MODULE_NAME}rc.ts`, + `.${CONFIG_MODULE_NAME}rc.cjs`, + ], + stopDir: worktreeRoot, + }); + + const result = await explorer.search(worktreeRoot); + + if (!result || result.isEmpty) { + process.stderr.write( + `Error: No mm-client-cli config found. Create ${CONFIG_MODULE_NAME}.config.ts in your project root.\n`, + ); + process.exit(1); + } + + const config = result.config as MmClientCliConfig; + if (!config.daemon) { + process.stderr.write( + `Error: No daemon entry point configured. Add 'daemon' to ${result.filepath}.\n`, + ); + process.exit(1); + } + + return { + daemonPath: config.daemon, + runtime: config.runtime ?? 'tsx', + }; +} + +/** + * Resolves the runtime binary path for spawning the daemon. + * + * @param worktreeRoot - The git worktree root directory. + * @param runtime - The runtime name from configuration. + * @returns The absolute path to the runtime binary. + */ +export function resolveRuntime(worktreeRoot: string, runtime: string): string { + if (runtime === 'node') { + return 'node'; + } + + const binPath = path.join(worktreeRoot, 'node_modules', '.bin', runtime); + if (!existsSync(binPath)) { + process.stderr.write( + `Error: Runtime '${runtime}' not found at ${binPath}. Install it or set "mm.runtime" in package.json.\n`, + ); + process.exit(1); + } + return binPath; +} + +/** + * Polls for daemon state until the daemon is alive or times out. + * + * @param worktreeRoot - The git worktree root directory. + * @returns The daemon state once the daemon is responsive. + */ +export async function waitForDaemon( + worktreeRoot: string, +): Promise { + for (let i = 0; i < DAEMON_POLL_MAX_ATTEMPTS; i++) { + await sleep(DAEMON_POLL_INTERVAL_MS); + const state = await readDaemonState(worktreeRoot); + if (state && (await isDaemonAlive(state))) { + return state; + } + } + throw new Error('Daemon failed to start within 10 seconds'); +} + +/** + * Terminates the daemon process and removes its state file. + * + * @param worktreeRoot - The git worktree root directory. + * @param state - The current daemon state containing the PID. + */ +export async function shutdownDaemon( + worktreeRoot: string, + state: DaemonState, +): Promise { + if (state.pid) { + try { + process.kill(state.pid, 'SIGTERM'); + } catch { + /* already dead */ + } + } + await removeDaemonState(worktreeRoot); +} + +/** + * Delays execution for the specified duration. + * + * @param ms - The number of milliseconds to wait. + * @returns A promise that resolves after the delay. + */ +export async function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Parses a numeric flag value from a CLI argument list. + * + * @param args - The raw CLI arguments to search. + * @param flag - The flag name to look for (e.g., '--timeout'). + * @returns The parsed integer value, or undefined if the flag is absent or invalid. + */ +export function parseIntFlag(args: string[], flag: string): number | undefined { + const idx = args.indexOf(flag); + if (idx < 0) { + return undefined; + } + const parsed = parseInt(args[idx + 1], 10); + return isNaN(parsed) ? undefined : parsed; +} + +/** + * Parses a string flag value from a CLI argument list. + * + * @param args - The raw CLI arguments to search. + * @param flag - The flag name to look for (e.g., '--role'). + * @returns The string value, or undefined if the flag is absent. + */ +export function parseStringFlag( + args: string[], + flag: string, +): string | undefined { + const idx = args.indexOf(flag); + if (idx < 0 || !args[idx + 1] || args[idx + 1].startsWith('--')) { + return undefined; + } + return args[idx + 1]; +} + +/** + * Parses launch command arguments into a key-value object. + * + * @param args - The raw CLI arguments after the command name. + * @returns The parsed launch options. + */ +export function parseLaunchArgs(args: string[]): Record { + const result: Record = {}; + const knownFlags = new Set([ + '--context', + '--state', + '--extension-path', + '--goal', + '--force', + '--flow-tags', + ]); + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === '--force') { + result.force = true; + } else if (arg === '--context') { + i += 1; + if (!args[i] || args[i].startsWith('--')) { + process.stderr.write('Error: --context requires a value (e2e|prod)\n'); + process.exit(1); + } + result.context = args[i]; + } else if (arg === '--state') { + i += 1; + if (!args[i] || args[i].startsWith('--')) { + process.stderr.write( + 'Error: --state requires a value (default|onboarding|custom)\n', + ); + process.exit(1); + } + result.stateMode = args[i]; + } else if (arg === '--extension-path') { + i += 1; + if (!args[i] || args[i].startsWith('--')) { + process.stderr.write('Error: --extension-path requires a value\n'); + process.exit(1); + } + result.extensionPath = args[i]; + } else if (arg === '--goal') { + i += 1; + if (!args[i] || args[i].startsWith('--')) { + process.stderr.write('Error: --goal requires a value\n'); + process.exit(1); + } + result.goal = args[i]; + } else if (arg === '--flow-tags') { + i += 1; + if (!args[i] || args[i].startsWith('--')) { + process.stderr.write( + 'Error: --flow-tags requires a comma-separated value\n', + ); + process.exit(1); + } + result.flowTags = args[i].split(',').map((tag) => tag.trim()); + } else if (arg.startsWith('--') && !knownFlags.has(arg)) { + process.stderr.write(`Warning: unknown launch flag '${arg}'\n`); + } + } + return result; +} + +/** + * Prints CLI usage information to stdout. + */ +export function printHelp(): void { + process.stdout.write(`mm — MetaMask CLI + +Usage: mm [--project ] [options] + +Global Options: + --project Target a specific project directory (absolute or relative). + Overrides MM_PROJECT and git-based discovery. + +Environment Variables: + MM_PROJECT Default project directory when --project is not provided. + Falls back to the current git worktree root. + +Lifecycle: + mm launch [--context e2e|prod] [--state default|onboarding|custom] [--extension-path ] [--goal ] [--force] [--flow-tags ] + mm cleanup [--shutdown] + mm status + mm serve [--background] + mm build [--force] + +Interaction: + mm click [--selector ] [--testid ] [--within ] + mm type [--selector ] [--testid ] [--within ] + mm get-text [--selector ] [--testid ] [--within ] + mm describe-screen + mm screenshot [--name ] + mm wait-for [--timeout ] [--selector ] [--testid ] [--within ] + mm wait-for-notification [--timeout ] + mm clipboard [text] + +Navigation: + mm navigate + mm navigate-home + mm navigate-settings + mm switch-to-tab | --role | --url + mm close-tab --role | --url + +Discovery: + mm list-testids [--limit ] + mm accessibility-snapshot [--root ] + +State & Context: + mm get-state + mm get-context + mm set-context + +Knowledge: + mm knowledge-search + mm knowledge-last + mm knowledge-sessions + mm knowledge-summarize [--session ] + +Contracts (E2E only): + mm seed-contract [--hardfork ] + mm seed-contracts ... [--hardfork ] + mm get-contract-address + mm list-contracts + +Batching: + mm run-steps + +Examples: + mm launch (from inside project) + mm --project ../metamask-extension launch (from parent folder) + MM_PROJECT=/path/to/extension mm describe-screen (via env var) +`); +} + +/* istanbul ignore next -- CLI entry point, tested via exported functions */ +/* istanbul ignore next -- top-level fatal handler is not exercised in tests */ +const handleFatalCliError = (error: unknown): void => { + process.stderr.write(`Fatal: ${String(error)}\n`); + process.exit(1); +}; + +/* istanbul ignore next -- CLI entry point, tested via exported functions */ +if (process.env.VITEST === undefined) { + main().catch(handleFatalCliError); +} diff --git a/src/index.ts b/src/index.ts index eaefc45..2275fbe 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,26 +2,36 @@ export type * from './capabilities/types.js'; export * from './capabilities/context.js'; -// MCP Server - Session Manager Interface -export * from './mcp-server/session-manager.js'; - -// MCP Server - Server -export * from './mcp-server/server.js'; - -// MCP Server - Core Components -export * from './mcp-server/knowledge-store.js'; -export * from './mcp-server/discovery.js'; -export * from './mcp-server/schemas.js'; -export * from './mcp-server/tools/definitions.js'; -export * from './mcp-server/tokenization.js'; - -// MCP Server - Types -export * from './mcp-server/types'; - -// MCP Server - Utils -export * from './mcp-server/utils'; - -// Shared utilities +// Session Manager Interface (transport-agnostic) +export type { + ISessionManager, + TrackedPage, + SessionLaunchInput, + SessionLaunchResult, + SessionScreenshotOptions, +} from './server/session-manager.js'; + +// Core Components +export * from './knowledge-store/knowledge-store.js'; +export * from './tools/utils/discovery.js'; +export * from './validation/schemas.js'; +export * from './knowledge-store/tokenization.js'; + +// Types +export * from './tools/types'; + +// HTTP Server Types +export type * from './types/http.js'; +export type { MmClientCliConfig } from './cli/mm.js'; +export * from './tools/registry.js'; + +// Server utilities +export * from './server/request-queue.js'; +export * from './server/port-allocator.js'; +export * from './server/daemon-state.js'; +export * from './server/create-server.js'; + +// Utils export * from './utils'; // Launcher utilities @@ -30,26 +40,5 @@ export * from './launcher/extension-readiness.js'; export * from './launcher/console-error-buffer.js'; export * from './launcher/retry.js'; -// MCP Server - Tool Handlers -export * from './mcp-server/tools/build.js'; -export * from './mcp-server/tools/launch.js'; -export * from './mcp-server/tools/cleanup.js'; -export * from './mcp-server/tools/state.js'; -export * from './mcp-server/tools/seeding.js'; -export * from './mcp-server/tools/interaction.js'; -export * from './mcp-server/tools/navigation.js'; -export * from './mcp-server/tools/discovery-tools.js'; -export * from './mcp-server/tools/screenshot.js'; -export * from './mcp-server/tools/knowledge.js'; -export * from './mcp-server/tools/batch.js'; -export * from './mcp-server/tools/context.js'; -export * from './mcp-server/tools/clipboard.js'; - -// Run tool utility -export * from './mcp-server/tools/run-tool.js'; - // Error classification -export * from './mcp-server/tools/error-classification.js'; - -// Helpers -export * from './mcp-server/tools/helpers.js'; +export * from './tools/error-classification.js'; diff --git a/src/mcp-server/knowledge-store.test.ts b/src/knowledge-store/knowledge-store.test.ts similarity index 91% rename from src/mcp-server/knowledge-store.test.ts rename to src/knowledge-store/knowledge-store.test.ts index ea984f6..40e75b3 100644 --- a/src/mcp-server/knowledge-store.test.ts +++ b/src/knowledge-store/knowledge-store.test.ts @@ -16,12 +16,12 @@ import { knowledgeStore, } from './knowledge-store.js'; import type { KnowledgeStoreConfig } from './knowledge-store.js'; +import type { ExtensionState } from '../capabilities/types.js'; import type { SessionMetadata, StepRecordOutcome, StepRecordObservation, -} from './types'; -import type { ExtensionState } from '../capabilities/types.js'; +} from '../tools/types'; vi.mock('fs', () => ({ existsSync: vi.fn(), @@ -101,21 +101,10 @@ describe('core', () => { expect(store).toBeDefined(); }); - it('accepts custom toolPrefix configuration', () => { - const config: KnowledgeStoreConfig = { - toolPrefix: 'custom', - }; - - const store = new KnowledgeStore(config); - - expect(store).toBeDefined(); - }); - it('accepts full configuration object', () => { const config: KnowledgeStoreConfig = { rootDir: '/custom/root', sessionIdPrefix: 'test-', - toolPrefix: 'test', }; const store = new KnowledgeStore(config); @@ -126,7 +115,9 @@ describe('core', () => { describe('writeSessionMetadata', () => { it('creates session directory and writes metadata file', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata: SessionMetadata = { schemaVersion: 1, sessionId: 'session-001', @@ -152,7 +143,9 @@ describe('core', () => { }); it('includes optional goal in metadata', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata: SessionMetadata = { schemaVersion: 1, sessionId: 'session-003', @@ -244,13 +237,15 @@ describe('core', () => { describe('recordStep', () => { it('creates steps directory and writes step file', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); const result = await store.recordStep({ sessionId: 'session-step-001', - toolName: 'mm_click', + toolName: 'click', input: { testId: 'send-button' }, outcome, observation, @@ -264,17 +259,19 @@ describe('core', () => { expect(fs.writeFile).toHaveBeenCalled(); expect(result).toContain('session-step-001'); expect(result).toContain('steps'); - expect(result).toContain('mm_click.json'); + expect(result).toContain('click.json'); }); it('records step with screenshot artifact', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation({ currentScreen: 'send' }); await store.recordStep({ sessionId: 'session-step-002', - toolName: 'mm_screenshot', + toolName: 'screenshot', outcome, observation, screenshotPath: '/test/screenshots/screenshot-001.png', @@ -294,7 +291,9 @@ describe('core', () => { }); it('sanitizes sensitive input fields', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation({ currentScreen: 'unlock', @@ -303,7 +302,7 @@ describe('core', () => { await store.recordStep({ sessionId: 'session-step-003', - toolName: 'mm_type', + toolName: 'type', input: { testId: 'password-input', text: 'my-secret-password' }, outcome, observation, @@ -318,13 +317,15 @@ describe('core', () => { }); it('records step with target information', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); await store.recordStep({ sessionId: 'session-step-004', - toolName: 'mm_click', + toolName: 'click', input: { testId: 'confirm-btn' }, target: { testId: 'confirm-btn', @@ -344,13 +345,15 @@ describe('core', () => { }); it('computes discovery label for discovery tools', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); await store.recordStep({ sessionId: 'session-step-005', - toolName: 'mm_describe_screen', + toolName: 'describe_screen', outcome, observation, }); @@ -362,13 +365,15 @@ describe('core', () => { }); it('computes navigation label for navigation tools', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation({ currentScreen: 'settings' }); await store.recordStep({ sessionId: 'session-step-006', - toolName: 'mm_navigate', + toolName: 'navigate', outcome, observation, }); @@ -380,13 +385,15 @@ describe('core', () => { }); it('computes interaction label for interaction tools', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); await store.recordStep({ sessionId: 'session-step-007', - toolName: 'mm_click', + toolName: 'click', input: { testId: 'send-button' }, outcome, observation, @@ -399,7 +406,9 @@ describe('core', () => { }); it('computes confirmation label for confirmation-related targets', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation({ currentScreen: 'confirm-transaction', @@ -407,7 +416,7 @@ describe('core', () => { await store.recordStep({ sessionId: 'session-step-008', - toolName: 'mm_click', + toolName: 'click', target: { testId: 'confirm-transaction-btn' }, outcome, observation, @@ -420,7 +429,9 @@ describe('core', () => { }); it('computes error-recovery label for failed outcomes', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: false, error: { code: 'MM_TARGET_NOT_FOUND', message: 'Target not found' }, @@ -429,7 +440,7 @@ describe('core', () => { await store.recordStep({ sessionId: 'session-step-009', - toolName: 'mm_click', + toolName: 'click', input: { testId: 'nonexistent-btn' }, outcome, observation, @@ -442,13 +453,15 @@ describe('core', () => { }); it('records step with e2e context', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); await store.recordStep({ sessionId: 'session-step-011', - toolName: 'mm_click', + toolName: 'click', outcome, observation, context: 'e2e', @@ -461,43 +474,24 @@ describe('core', () => { }); it('records step with prod context', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); - const outcome: StepRecordOutcome = { ok: true }; - const observation = createObservation(); - - await store.recordStep({ - sessionId: 'session-step-012', - toolName: 'mm_click', - outcome, - observation, - context: 'prod', - }); - - const writeCall = vi.mocked(fs.writeFile).mock.calls[0]; - const writtenData = JSON.parse(writeCall[1] as string); - - expect(writtenData.context).toBe('prod'); - }); - - it('uses custom tool prefix for label computation', async () => { const store = new KnowledgeStore({ rootDir: '/test/knowledge', - toolPrefix: 'custom', }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation(); await store.recordStep({ - sessionId: 'session-step-013', - toolName: 'custom_describe_screen', + sessionId: 'session-step-012', + toolName: 'click', outcome, observation, + context: 'prod', }); const writeCall = vi.mocked(fs.writeFile).mock.calls[0]; const writtenData = JSON.parse(writeCall[1] as string); - expect(writtenData.labels).toContain('discovery'); + expect(writtenData.context).toBe('prod'); }); }); @@ -507,7 +501,9 @@ describe('core', () => { } it('returns empty array when no sessions exist', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([]); const result = await store.listSessions(10); @@ -516,7 +512,9 @@ describe('core', () => { }); it('returns sessions sorted by createdAt descending', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const oldMetadata: SessionMetadata = { schemaVersion: 1, sessionId: 'mm-session-old', @@ -549,7 +547,9 @@ describe('core', () => { }); it('limits results to specified count', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-1'), @@ -583,7 +583,9 @@ describe('core', () => { }); it('filters by flowTag', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const sendMetadata: SessionMetadata = { schemaVersion: 1, sessionId: 'mm-session-send', @@ -616,7 +618,9 @@ describe('core', () => { }); it('filters by tag', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const testMetadata: SessionMetadata = { schemaVersion: 1, sessionId: 'mm-session-test', @@ -649,7 +653,9 @@ describe('core', () => { }); it('filters by sinceHours', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const now = new Date(); const recentDate = new Date(now.getTime() - 12 * 60 * 60 * 1000); const oldDate = new Date(now.getTime() - 72 * 60 * 60 * 1000); @@ -692,7 +698,9 @@ describe('core', () => { } it('returns current session ID for scope "current"', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const result = await store.resolveSessionIds( 'current', @@ -703,7 +711,9 @@ describe('core', () => { }); it('returns empty array for scope "current" without current session', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const result = await store.resolveSessionIds('current', undefined); @@ -711,7 +721,9 @@ describe('core', () => { }); it('returns specific session ID for scope object', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const result = await store.resolveSessionIds( { sessionId: 'specific-session-001' }, @@ -722,7 +734,9 @@ describe('core', () => { }); it('returns all session IDs for scope "all"', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-1'), createDirent('mm-session-2'), @@ -739,7 +753,9 @@ describe('core', () => { }); it('filters session IDs by filters for scope "all"', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const sendMetadata: SessionMetadata = { schemaVersion: 1, sessionId: 'mm-session-send', @@ -773,7 +789,9 @@ describe('core', () => { }); it('includes sessions without metadata when filtering', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-with-metadata'), @@ -803,7 +821,9 @@ describe('core', () => { describe('extractPathTokens', () => { it('extracts tokens from URL hash fragment', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const outcome: StepRecordOutcome = { ok: true }; const observation = createObservation({ currentScreen: 'confirm-transaction', @@ -812,7 +832,7 @@ describe('core', () => { await store.recordStep({ sessionId: 'session-path-001', - toolName: 'mm_click', + toolName: 'click', outcome, observation, }); @@ -855,7 +875,7 @@ describe('similarity', () => { } = {}, ) { const baseTool = { - name: 'mm_click', + name: 'click', input: { testId: 'test-btn' }, target: { testId: 'test-btn' }, }; @@ -904,9 +924,11 @@ describe('similarity', () => { describe('searchSteps scoring', () => { it('scores steps matching tool name in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, }); vi.mocked(fs.readdir).mockResolvedValueOnce([ @@ -928,11 +950,13 @@ describe('similarity', () => { const results = await store.searchSteps('click', 10, 'all', undefined); expect(results.length).toBeGreaterThan(0); - expect(results[0].tool).toBe('mm_click'); + expect(results[0].tool).toBe('click'); }); it('scores steps matching screen name in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ observation: { state: { currentScreen: 'send' }, @@ -964,10 +988,12 @@ describe('similarity', () => { }); it('scores steps matching target testId in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ tool: { - name: 'mm_click', + name: 'click', input: { testId: 'confirm-button' }, target: { testId: 'confirm-button' }, }, @@ -995,7 +1021,9 @@ describe('similarity', () => { }); it('scores steps matching labels in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ labels: ['navigation', 'confirmation'], }); @@ -1027,7 +1055,9 @@ describe('similarity', () => { }); it('scores steps matching observed testIds in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ observation: { state: { currentScreen: 'home' }, @@ -1061,7 +1091,9 @@ describe('similarity', () => { }); it('scores steps matching a11y node names in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ observation: { state: { currentScreen: 'home' }, @@ -1097,7 +1129,9 @@ describe('similarity', () => { }); it('scores steps matching a11y node roles in query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ observation: { state: { currentScreen: 'home' }, @@ -1130,7 +1164,9 @@ describe('similarity', () => { }); it('returns empty results for empty query', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const results = await store.searchSteps('', 10, 'all', undefined); @@ -1138,9 +1174,11 @@ describe('similarity', () => { }); it('calculates token coverage ratio bonus', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const stepRecord = createStepRecord({ - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, observation: { state: { currentScreen: 'send' }, testIds: [], @@ -1177,7 +1215,9 @@ describe('similarity', () => { describe('session scoring', () => { it('scores sessions with matching flowTags higher', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const sendMetadata = { schemaVersion: 1, sessionId: 'mm-session-send', @@ -1219,7 +1259,9 @@ describe('similarity', () => { }); it('scores sessions with matching goal tokens', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1245,7 +1287,9 @@ describe('similarity', () => { }); it('scores sessions with matching tags', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1275,7 +1319,9 @@ describe('similarity', () => { }); it('gives recency bonus to recent sessions (< 24 hours)', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const recentDate = new Date( Date.now() - 12 * 60 * 60 * 1000, ).toISOString(); @@ -1296,7 +1342,7 @@ describe('similarity', () => { vi.mocked(fs.readFile).mockResolvedValueOnce( JSON.stringify( createStepRecord({ - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, }), ), ); @@ -1307,7 +1353,9 @@ describe('similarity', () => { }); it('gives smaller recency bonus to moderately recent sessions (24-72 hours)', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const moderateDate = new Date( Date.now() - 48 * 60 * 60 * 1000, ).toISOString(); @@ -1328,7 +1376,7 @@ describe('similarity', () => { vi.mocked(fs.readFile).mockResolvedValueOnce( JSON.stringify( createStepRecord({ - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, }), ), ); @@ -1339,7 +1387,9 @@ describe('similarity', () => { }); it('sorts sessions by score then by createdAt', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata1 = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1372,7 +1422,7 @@ describe('similarity', () => { JSON.stringify( createStepRecord({ sessionId: 'mm-session-1', - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, }), ), ) @@ -1380,7 +1430,7 @@ describe('similarity', () => { JSON.stringify( createStepRecord({ sessionId: 'mm-session-2', - tool: { name: 'mm_click', input: {} }, + tool: { name: 'click', input: {} }, }), ), ); @@ -1393,7 +1443,9 @@ describe('similarity', () => { describe('generatePriorKnowledge similarity scoring', () => { it('scores steps with same screen higher', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1433,7 +1485,9 @@ describe('similarity', () => { }); it('scores steps with URL path overlap', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1474,7 +1528,9 @@ describe('similarity', () => { }); it('scores steps with testId overlap', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1518,7 +1574,9 @@ describe('similarity', () => { }); it('scores steps with a11y node overlap', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1563,7 +1621,9 @@ describe('similarity', () => { }); it('scores actionable tools higher than discovery tools', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1573,7 +1633,7 @@ describe('similarity', () => { launch: { stateMode: 'default' }, }; const clickStep = createStepRecord({ - tool: { name: 'mm_click', input: { testId: 'send-btn' } }, + tool: { name: 'click', input: { testId: 'send-btn' } }, observation: { state: { currentScreen: 'home' }, testIds: [{ testId: 'send-btn', tag: 'button', visible: true }], @@ -1601,12 +1661,14 @@ describe('similarity', () => { expect(result).toBeDefined(); if (result?.similarSteps.length) { - expect(result.similarSteps[0].tool).toBe('mm_click'); + expect(result.similarSteps[0].tool).toBe('click'); } }); it('excludes discovery tools from similarity scoring', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1616,7 +1678,7 @@ describe('similarity', () => { launch: { stateMode: 'default' }, }; const discoveryStep = createStepRecord({ - tool: { name: 'mm_describe_screen', input: {} }, + tool: { name: 'describe_screen', input: {} }, observation: { state: { currentScreen: 'home' }, testIds: [{ testId: 'send-btn', tag: 'button', visible: true }], @@ -1644,14 +1706,16 @@ describe('similarity', () => { if (result?.similarSteps.length) { const hasDiscoveryTool = result.similarSteps.some( - (s) => s.tool === 'mm_describe_screen', + (s) => s.tool === 'describe_screen', ); expect(hasDiscoveryTool).toBe(false); } }); it('returns undefined when no candidate sessions exist', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([] as any); @@ -1668,7 +1732,9 @@ describe('similarity', () => { }); it('excludes current session from candidate sessions', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-current', @@ -1696,7 +1762,9 @@ describe('similarity', () => { }); it('caps testId overlap scoring at 3 items', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1745,7 +1813,9 @@ describe('similarity', () => { }); it('caps a11y overlap scoring at 2 items', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1794,7 +1864,9 @@ describe('similarity', () => { }); it('computes confidence as ratio of score to max score', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1804,7 +1876,7 @@ describe('similarity', () => { launch: { stateMode: 'default' }, }; const stepRecord = createStepRecord({ - tool: { name: 'mm_click', input: { testId: 'send-btn' } }, + tool: { name: 'click', input: { testId: 'send-btn' } }, observation: { state: { currentScreen: 'send' }, testIds: [{ testId: 'send-btn', tag: 'button', visible: true }], @@ -1842,7 +1914,9 @@ describe('similarity', () => { }); it('filters steps using flowTag from context', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1875,7 +1949,9 @@ describe('similarity', () => { }); it('does not award sameScreen bonus for unknown screens', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1915,7 +1991,9 @@ describe('similarity', () => { }); it('builds avoid list only for targets meeting failure threshold', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -1931,7 +2009,7 @@ describe('similarity', () => { }) => ({ ...createStepRecord({ tool: { - name: 'mm_click', + name: 'click', input: { testId: target.testId ?? 'unknown-btn' }, target, }, @@ -1955,7 +2033,7 @@ describe('similarity', () => { const failedSelector = makeFailedStep({ selector: '.unstable-target' }); const successfulStep = createStepRecord({ tool: { - name: 'mm_click', + name: 'click', input: { testId: 'confirm-btn' }, target: { testId: 'confirm-btn' }, }, @@ -2007,7 +2085,9 @@ describe('similarity', () => { }); it('skips suggested action when tool is not in action map', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -2018,7 +2098,7 @@ describe('similarity', () => { }; const unknownToolStep = createStepRecord({ tool: { - name: 'mm_unknown_tool', + name: 'unknown_tool', input: { testId: 'send-btn' }, target: { testId: 'send-btn' }, }, @@ -2056,7 +2136,9 @@ describe('similarity', () => { }); it('includes a11y fallback target when testId text matches visible a11y name', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const metadata = { schemaVersion: 1, sessionId: 'mm-session-1', @@ -2067,7 +2149,7 @@ describe('similarity', () => { }; const actionableStep = createStepRecord({ tool: { - name: 'mm_click', + name: 'click', input: { testId: 'send-button' }, target: { testId: 'send-button' }, }, @@ -2151,7 +2233,7 @@ describe('session', () => { schemaVersion: 1, sessionId, timestamp, - tool: { name: 'mm_click', input: { testId: 'test-btn' } }, + tool: { name: 'click', input: { testId: 'test-btn' } }, observation: { state: { isLoaded: true, @@ -2176,7 +2258,9 @@ describe('session', () => { describe('getAllSessionIds', () => { it('returns session IDs from directories starting with mm-', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-1'), @@ -2208,7 +2292,9 @@ describe('session', () => { }); it('returns empty array when directory read fails', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockRejectedValueOnce(new Error('ENOENT')); @@ -2218,7 +2304,9 @@ describe('session', () => { }); it('returns empty array for empty directory', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([] as any); @@ -2230,7 +2318,9 @@ describe('session', () => { describe('session scanning limits', () => { it('limits sessions scanned to maxSessionsToScan (20)', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); // Create 25 sessions const sessions = Array.from({ length: 25 }, (_, i) => @@ -2267,7 +2357,9 @@ describe('session', () => { }); it('limits steps per session to maxStepsPerSession (500)', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); // Create 600 steps for one session const stepFiles = Array.from({ length: 600 }, (_, i) => `step-${i}.json`); @@ -2300,7 +2392,9 @@ describe('session', () => { }); it('stops scanning when maxTotalSteps (2000) is reached', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); // Create 5 sessions with 500 steps each = 2500 total const sessions = Array.from({ length: 5 }, (_, i) => @@ -2353,7 +2447,9 @@ describe('session', () => { describe('filter parameters', () => { it('filters sessions by flowTag', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-send'), @@ -2385,7 +2481,9 @@ describe('session', () => { }); it('filters sessions by tag', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-e2e'), @@ -2417,7 +2515,9 @@ describe('session', () => { }); it('filters sessions by sinceHours', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const now = new Date(); const recentDate = new Date(now.getTime() - 6 * 60 * 60 * 1000); const oldDate = new Date(now.getTime() - 48 * 60 * 60 * 1000); @@ -2452,7 +2552,9 @@ describe('session', () => { }); it('combines multiple filters', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const recentDate = new Date(Date.now() - 6 * 60 * 60 * 1000); const oldDate = new Date(Date.now() - 48 * 60 * 60 * 1000); @@ -2503,7 +2605,9 @@ describe('session', () => { describe('corrupted session file handling', () => { it('skips corrupted session metadata files', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-valid'), @@ -2527,7 +2631,9 @@ describe('session', () => { }); it('skips corrupted step files during search', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir) .mockResolvedValueOnce([createDirent('mm-session-1')] as any) @@ -2559,7 +2665,9 @@ describe('session', () => { }); it('handles missing step files gracefully', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir) .mockResolvedValueOnce([createDirent('mm-session-1')] as any) @@ -2582,7 +2690,9 @@ describe('session', () => { }); it('handles steps directory not existing', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir) .mockResolvedValueOnce([createDirent('mm-session-1')] as any) @@ -2605,7 +2715,9 @@ describe('session', () => { describe('empty session directory', () => { it('returns empty results for empty knowledge root', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([] as any); @@ -2615,7 +2727,9 @@ describe('session', () => { }); it('returns empty search results for empty knowledge root', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([] as any); @@ -2625,7 +2739,9 @@ describe('session', () => { }); it('returns empty getLastSteps for session with no steps', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir) .mockResolvedValueOnce([createDirent('mm-session-1')] as any) @@ -2641,7 +2757,9 @@ describe('session', () => { }); it('returns empty summarizeSession for session with no steps', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([] as any); @@ -2654,7 +2772,9 @@ describe('session', () => { describe('resolveSessionIds with filters', () => { it('includes sessions without metadata when filtering', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); vi.mocked(fs.readdir).mockResolvedValueOnce([ createDirent('mm-session-with-metadata'), @@ -2682,7 +2802,9 @@ describe('session', () => { }); it('returns empty array for scope current without sessionId', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const result = await store.resolveSessionIds('current', undefined); @@ -2690,7 +2812,9 @@ describe('session', () => { }); it('returns specific sessionId for scope object', async () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); const result = await store.resolveSessionIds( { sessionId: 'specific-session' }, @@ -2715,7 +2839,9 @@ describe('session', () => { }); it('returns true when knowledge store is initialized', () => { - const store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + const store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); setKnowledgeStore(store); const result = hasKnowledgeStore(); @@ -2728,7 +2854,9 @@ describe('session', () => { let store: KnowledgeStore; beforeEach(() => { - store = new KnowledgeStore({ rootDir: '/test/knowledge' }); + store = new KnowledgeStore({ + rootDir: '/test/knowledge', + }); setKnowledgeStore(store); }); @@ -2739,7 +2867,7 @@ describe('session', () => { it('recordStep delegates to underlying KnowledgeStore instance', async () => { const params = { sessionId: 'test-session', - toolName: 'mm_click', + toolName: 'click', observation: createObservation(), outcome: { ok: true } as StepRecordOutcome, }; @@ -2757,7 +2885,7 @@ describe('session', () => { const params = { sessionId: 'test-session', - toolName: 'mm_click', + toolName: 'click', observation: createObservation(), outcome: { ok: true } as StepRecordOutcome, }; @@ -2771,7 +2899,7 @@ describe('session', () => { const mockSteps = [ { timestamp: '2024-01-15T10:30:00.000Z', - tool: 'mm_click', + tool: 'click', screen: 'home' as const, snippet: 'Clicked button', }, @@ -2809,7 +2937,7 @@ describe('session', () => { const mockResults = [ { timestamp: '2024-01-15T10:30:00.000Z', - tool: 'mm_click', + tool: 'click', screen: 'home' as const, snippet: 'Clicked send button', }, @@ -2845,7 +2973,7 @@ describe('session', () => { const mockSummary = { sessionId: 'test-session', stepCount: 5, - recipe: [{ stepNumber: 1, tool: 'mm_click', notes: 'Clicked send' }], + recipe: [{ stepNumber: 1, tool: 'click', notes: 'Clicked send' }], }; vi.spyOn(store, 'summarizeSession').mockResolvedValueOnce(mockSummary); diff --git a/src/mcp-server/knowledge-store.ts b/src/knowledge-store/knowledge-store.ts similarity index 97% rename from src/mcp-server/knowledge-store.ts rename to src/knowledge-store/knowledge-store.ts index fbf1ec5..fecbd8c 100644 --- a/src/mcp-server/knowledge-store.ts +++ b/src/knowledge-store/knowledge-store.ts @@ -8,6 +8,7 @@ import { tokenize, tokenizeIdentifier, } from './tokenization.js'; +import type { ExtensionState } from '../capabilities/types.js'; import type { StepRecord, StepRecordTool, @@ -28,14 +29,12 @@ import type { PriorKnowledgeAvoid, PriorKnowledgeRelatedSession, PriorKnowledgeTarget, -} from './types'; +} from '../tools/types'; +import { generateFilesafeTimestamp, debugWarn } from '../utils'; import { - generateFilesafeTimestamp, isSensitiveField, SENSITIVE_FIELD_PATTERNS, - debugWarn, -} from './utils'; -import type { ExtensionState } from '../capabilities/types.js'; +} from './utils/redaction.js'; const KNOWLEDGE_ROOT = 'test-artifacts/llm-knowledge'; const SCHEMA_VERSION = 1; @@ -96,10 +95,6 @@ export type KnowledgeStoreConfig = { * Prefix for session IDs (default: 'mm-') */ sessionIdPrefix?: string; - /** - * Prefix for tool names (default: 'mm') - */ - toolPrefix?: string; }; /** @@ -128,8 +123,6 @@ export class KnowledgeStore { readonly #sessionIdPrefix: string; - readonly #toolPrefix: string; - readonly #sessionMetadataCache: Map = new Map(); @@ -151,30 +144,28 @@ export class KnowledgeStore { this.#knowledgeRoot = config.rootDir ?? path.join(process.cwd(), KNOWLEDGE_ROOT); this.#sessionIdPrefix = config.sessionIdPrefix ?? 'mm-'; - this.#toolPrefix = config.toolPrefix ?? 'mm'; - const prefix = this.#toolPrefix; this.#actionableTools = [ - `${prefix}_click`, - `${prefix}_type`, - `${prefix}_wait_for`, - `${prefix}_navigate`, - `${prefix}_wait_for_notification`, + 'click', + 'type', + 'wait_for', + 'navigate', + 'wait_for_notification', ]; this.#toolActionMap = { - [`${prefix}_click`]: 'click', - [`${prefix}_type`]: 'type', - [`${prefix}_wait_for`]: 'wait_for', - [`${prefix}_navigate`]: 'navigate', - [`${prefix}_wait_for_notification`]: 'wait_for_notification', + click: 'click', + type: 'type', + wait_for: 'wait_for', + navigate: 'navigate', + wait_for_notification: 'wait_for_notification', }; this.#discoveryTools = [ - `${prefix}_describe_screen`, - `${prefix}_list_testids`, - `${prefix}_accessibility_snapshot`, - `${prefix}_get_state`, + 'describe_screen', + 'list_testids', + 'accessibility_snapshot', + 'get_state', ]; } @@ -448,15 +439,8 @@ export class KnowledgeStore { ): string[] { const labels: string[] = []; - const navigationTools = [ - `${this.#toolPrefix}_navigate`, - `${this.#toolPrefix}_wait_for_notification`, - ]; - const interactionTools = [ - `${this.#toolPrefix}_click`, - `${this.#toolPrefix}_type`, - `${this.#toolPrefix}_wait_for`, - ]; + const navigationTools = ['navigate', 'wait_for_notification']; + const interactionTools = ['click', 'type', 'wait_for']; if (this.#discoveryTools.includes(toolName)) { labels.push('discovery'); @@ -825,7 +809,7 @@ export class KnowledgeStore { let textRedacted = false; let textLength: number | undefined; - const typeToolName = `${this.#toolPrefix}_type`; + const typeToolName = 'type'; for (const [key, value] of Object.entries(input)) { if (toolName === typeToolName && key === 'text') { diff --git a/src/mcp-server/tokenization.test.ts b/src/knowledge-store/tokenization.test.ts similarity index 99% rename from src/mcp-server/tokenization.test.ts rename to src/knowledge-store/tokenization.test.ts index 64c33d5..939ac1c 100644 --- a/src/mcp-server/tokenization.test.ts +++ b/src/knowledge-store/tokenization.test.ts @@ -56,7 +56,7 @@ describe('tokenization', () => { expect(buttonCount).toBe(1); }); - it('handles special MCP/extension stopwords', () => { + it('handles special tool/extension stopwords', () => { const tokens = tokenize('mm mcp lw test flow'); expect(tokens).not.toContain('mm'); expect(tokens).not.toContain('mcp'); diff --git a/src/mcp-server/tokenization.ts b/src/knowledge-store/tokenization.ts similarity index 100% rename from src/mcp-server/tokenization.ts rename to src/knowledge-store/tokenization.ts diff --git a/src/mcp-server/utils/redaction.test.ts b/src/knowledge-store/utils/redaction.test.ts similarity index 100% rename from src/mcp-server/utils/redaction.test.ts rename to src/knowledge-store/utils/redaction.test.ts diff --git a/src/mcp-server/utils/redaction.ts b/src/knowledge-store/utils/redaction.ts similarity index 100% rename from src/mcp-server/utils/redaction.ts rename to src/knowledge-store/utils/redaction.ts diff --git a/src/launcher/console-error-buffer.test.ts b/src/launcher/console-error-buffer.test.ts index c34747b..b824862 100644 --- a/src/launcher/console-error-buffer.test.ts +++ b/src/launcher/console-error-buffer.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect } from 'vitest'; -import { ConsoleErrorBuffer } from './console-error-buffer'; -import type { ConsoleErrorEntry } from './console-error-buffer'; +import { ConsoleErrorBuffer } from './console-error-buffer.js'; +import type { ConsoleErrorEntry } from './console-error-buffer.js'; describe('ConsoleErrorBuffer', () => { describe('constructor', () => { diff --git a/src/launcher/retry.test.ts b/src/launcher/retry.test.ts index 06f7ef6..4cf4dbf 100644 --- a/src/launcher/retry.test.ts +++ b/src/launcher/retry.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { delay, retryUntil } from './retry'; +import { delay, retryUntil } from './retry.js'; describe('retry', () => { beforeEach(() => { diff --git a/src/mcp-server/server.test.ts b/src/mcp-server/server.test.ts deleted file mode 100644 index f6ff8fa..0000000 --- a/src/mcp-server/server.test.ts +++ /dev/null @@ -1,677 +0,0 @@ -/* eslint-disable @typescript-eslint/naming-convention */ -import { Server } from '@modelcontextprotocol/sdk/server/index.js'; -import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import type { MockInstance } from 'vitest'; - -import { createMcpServer } from './server.js'; -import type { McpServerConfig } from './server.js'; -import * as sessionManagerModule from './session-manager.js'; -import { flushPromises } from './test-utils'; -import * as batchModule from './tools/batch.js'; -import * as definitionsModule from './tools/definitions.js'; -import { ErrorCodes } from './types'; - -vi.mock('@modelcontextprotocol/sdk/server/index.js'); -vi.mock('@modelcontextprotocol/sdk/server/stdio.js'); -vi.mock('./session-manager.js'); -vi.mock('./tools/definitions.js'); -vi.mock('./tools/batch.js'); - -describe('createMcpServer', () => { - let processExitSpy: MockInstance; - let processOnSpy: MockInstance; - let consoleErrorSpy: MockInstance; - let signalHandlers: Map void>; - let mockSetRequestHandler: ReturnType; - let mockConnect: ReturnType; - let mockClose: ReturnType; - - const mockToolDefinitions = [ - { name: 'mm_click', description: 'Click element', inputSchema: {} }, - { name: 'mm_type', description: 'Type text', inputSchema: {} }, - ]; - - const mockToolHandlers = { - mm_click: vi - .fn() - .mockResolvedValue({ ok: true, result: { clicked: true } }), - mm_type: vi.fn().mockResolvedValue({ ok: true, result: { typed: true } }), - }; - - beforeEach(() => { - vi.clearAllMocks(); - - mockSetRequestHandler = vi.fn(); - mockConnect = vi.fn().mockResolvedValue(undefined); - mockClose = vi.fn().mockResolvedValue(undefined); - - vi.mocked(Server).mockImplementation( - () => - ({ - setRequestHandler: mockSetRequestHandler, - connect: mockConnect, - close: mockClose, - }) as unknown as InstanceType, - ); - - vi.mocked(StdioServerTransport).mockImplementation( - () => - ({ - type: 'stdio', - }) as unknown as InstanceType, - ); - - vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({ - getSessionId: vi.fn().mockReturnValue('test-session-123'), - cleanup: vi.fn().mockResolvedValue(true), - } as unknown as ReturnType); - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true); - - vi.mocked(definitionsModule.getToolDefinitions).mockReturnValue( - mockToolDefinitions, - ); - vi.mocked(definitionsModule.buildToolHandlersRecord).mockReturnValue( - mockToolHandlers, - ); - vi.mocked(definitionsModule.getToolHandler).mockReturnValue( - vi.fn().mockResolvedValue({ ok: true, result: {} }), - ); - vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValue({ - success: true, - data: {}, - }); - (definitionsModule as { TOOL_PREFIX: string }).TOOL_PREFIX = 'mm'; - - vi.mocked(batchModule.setToolRegistry).mockImplementation(() => {}); - - signalHandlers = new Map(); - processOnSpy = vi - .spyOn(process, 'on') - .mockImplementation( - (event: string | symbol, handler: (...args: unknown[]) => void) => { - signalHandlers.set(String(event), handler as () => void); - return process; - }, - ); - - processExitSpy = vi - .spyOn(process, 'exit') - .mockImplementation( - (_code?: string | number | null | undefined): never => { - throw new Error(`process.exit(${_code})`); - }, - ); - - consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('factory function', () => { - it('creates server with required config', () => { - const config: McpServerConfig = { - name: 'test-server', - version: '1.0.0', - }; - - const server = createMcpServer(config); - - expect(server).toBeDefined(); - expect(server.start).toBeInstanceOf(Function); - expect(server.stop).toBeInstanceOf(Function); - expect(server.getServer).toBeInstanceOf(Function); - expect(server.getToolDefinitions).toBeInstanceOf(Function); - expect(server.getToolPrefix).toBeInstanceOf(Function); - }); - - it('creates Server with name and version', () => { - const config: McpServerConfig = { - name: 'my-extension', - version: '2.0.0', - }; - - createMcpServer(config); - - expect(Server).toHaveBeenCalledWith( - { name: 'my-extension', version: '2.0.0' }, - { capabilities: { tools: {} } }, - ); - }); - - it('registers ListTools and CallTool request handlers', () => { - createMcpServer({ - name: 'test-server', - version: '1.0.0', - }); - - expect(mockSetRequestHandler).toHaveBeenCalledTimes(2); - }); - - it('registers signal handlers for SIGINT and SIGTERM', () => { - createMcpServer({ - name: 'test-server', - version: '1.0.0', - }); - - expect(processOnSpy).toHaveBeenCalledWith('SIGINT', expect.any(Function)); - expect(processOnSpy).toHaveBeenCalledWith( - 'SIGTERM', - expect.any(Function), - ); - }); - }); - - describe('getServer()', () => { - it('returns the underlying MCP Server instance', () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - - const mcpServer = server.getServer(); - - expect(mcpServer).toBeDefined(); - expect(mcpServer.setRequestHandler).toBeInstanceOf(Function); - expect(mcpServer.connect).toBeInstanceOf(Function); - expect(mcpServer.close).toBeInstanceOf(Function); - }); - }); - - describe('getToolDefinitions()', () => { - it('returns all tool definitions', () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - - const toolDefs = server.getToolDefinitions(); - - expect(toolDefs).toStrictEqual(mockToolDefinitions); - }); - }); - - describe('getToolPrefix()', () => { - it('returns the tool prefix', () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - - const prefix = server.getToolPrefix(); - - expect(prefix).toBe('mm'); - }); - }); - - describe('start()', () => { - it('creates StdioServerTransport and connects', async () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - - await server.start(); - - expect(StdioServerTransport).toHaveBeenCalled(); - expect(mockConnect).toHaveBeenCalled(); - }); - - it('logs server startup message', async () => { - const customLogger = vi.fn(); - const server = createMcpServer({ - name: 'my-server', - version: '2.0.0', - logger: customLogger, - }); - - await server.start(); - - expect(customLogger).toHaveBeenCalledWith( - 'my-server MCP Server v2.0.0 running on stdio', - ); - }); - - it('uses console.error as default logger', async () => { - const server = createMcpServer({ - name: 'test-server', - version: '1.0.0', - }); - - await server.start(); - - expect(consoleErrorSpy).toHaveBeenCalledWith( - 'test-server MCP Server v1.0.0 running on stdio', - ); - }); - }); - - describe('stop()', () => { - it('closes server when transport exists', async () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - await server.start(); - - await server.stop(); - - expect(mockClose).toHaveBeenCalled(); - }); - - it('does nothing when transport does not exist', async () => { - const server = createMcpServer({ name: 'test', version: '1.0.0' }); - - await server.stop(); - - expect(mockClose).not.toHaveBeenCalled(); - }); - }); - - describe('ListToolsRequestSchema handler', () => { - it('returns tool definitions', async () => { - createMcpServer({ name: 'test', version: '1.0.0' }); - - const listToolsHandler = mockSetRequestHandler.mock.calls[0][1]; - - const result = await listToolsHandler(); - - expect(result).toStrictEqual({ - tools: mockToolDefinitions, - }); - }); - }); - - describe('CallToolRequestSchema handler', () => { - let callToolHandler: ( - request: { - params: { name: string; arguments?: Record }; - }, - extra?: { signal?: AbortSignal }, - ) => Promise; - - beforeEach(() => { - createMcpServer({ name: 'test', version: '1.0.0' }); - callToolHandler = mockSetRequestHandler.mock.calls[1][1]; - }); - - it('returns error for unknown tool', async () => { - const result = await callToolHandler({ - params: { name: 'mm_unknown', arguments: {} }, - }); - - expect(result).toMatchObject({ - content: [{ type: 'text' }], - isError: true, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(responseText.error.message).toContain('Unknown tool: mm_unknown'); - }); - - it('returns error for invalid input', async () => { - vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValueOnce({ - success: false, - error: 'name: Required', - }); - - const result = await callToolHandler({ - params: { name: 'mm_click', arguments: {} }, - }); - - expect(result).toMatchObject({ - content: [{ type: 'text' }], - isError: true, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(responseText.error.message).toContain( - 'Invalid input: name: Required', - ); - }); - - it('returns error when no handler registered', async () => { - vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce( - undefined, - ); - - const result = await callToolHandler({ - params: { name: 'mm_click', arguments: {} }, - }); - - expect(result).toMatchObject({ - content: [{ type: 'text' }], - isError: true, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(responseText.error.message).toContain( - 'No handler registered for tool: mm_click', - ); - }); - - it('executes handler and returns success response', async () => { - const mockHandler = vi - .fn() - .mockResolvedValue({ ok: true, result: { clicked: true } }); - vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce( - mockHandler, - ); - - const result = await callToolHandler({ - params: { name: 'mm_click', arguments: { testId: 'btn' } }, - }); - - expect(result).toMatchObject({ - content: [{ type: 'text' }], - isError: false, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.ok).toBe(true); - expect(responseText.result.clicked).toBe(true); - }); - - it('passes signal to handler', async () => { - const mockHandler = vi.fn().mockResolvedValue({ ok: true, result: {} }); - vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce( - mockHandler, - ); - const mockSignal = new AbortController().signal; - - await callToolHandler( - { params: { name: 'mm_click', arguments: {} } }, - { signal: mockSignal }, - ); - - expect(mockHandler).toHaveBeenCalledWith( - expect.any(Object), - expect.objectContaining({ signal: mockSignal }), - ); - }); - - it('returns isError: true when handler returns ok: false', async () => { - const mockHandler = vi.fn().mockResolvedValue({ - ok: false, - error: { code: 'MM_CLICK_FAILED', message: 'Click failed' }, - }); - vi.mocked(definitionsModule.getToolHandler).mockReturnValueOnce( - mockHandler, - ); - - const result = await callToolHandler({ - params: { name: 'mm_click', arguments: {} }, - }); - - expect(result).toMatchObject({ - isError: true, - }); - }); - - it('includes sessionId in error response when session manager available', async () => { - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true); - vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({ - getSessionId: vi.fn().mockReturnValue('session-abc'), - cleanup: vi.fn(), - } as unknown as ReturnType< - typeof sessionManagerModule.getSessionManager - >); - - const result = await callToolHandler({ - params: { name: 'mm_unknown', arguments: {} }, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.meta.sessionId).toBe('session-abc'); - }); - - it('does not include sessionId when no session manager', async () => { - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(false); - - const result = await callToolHandler({ - params: { name: 'mm_unknown', arguments: {} }, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.meta.sessionId).toBeUndefined(); - }); - }); - - describe('signal handlers', () => { - it('calls cleanup on SIGINT', async () => { - const onCleanup = vi.fn().mockResolvedValue(undefined); - createMcpServer({ - name: 'test', - version: '1.0.0', - onCleanup, - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - expect(sigintHandler).toBeDefined(); - - try { - sigintHandler?.(); - await flushPromises(); - } catch (e) { - expect((e as Error).message).toBe('process.exit(0)'); - } - - expect(onCleanup).toHaveBeenCalled(); - }); - - it('calls cleanup on SIGTERM', async () => { - const onCleanup = vi.fn().mockResolvedValue(undefined); - createMcpServer({ - name: 'test', - version: '1.0.0', - onCleanup, - }); - - const sigtermHandler = signalHandlers.get('SIGTERM'); - expect(sigtermHandler).toBeDefined(); - - try { - sigtermHandler?.(); - await flushPromises(); - } catch (e) { - expect((e as Error).message).toBe('process.exit(0)'); - } - - expect(onCleanup).toHaveBeenCalled(); - }); - - it('cleans up session manager if available', async () => { - const mockCleanup = vi.fn().mockResolvedValue(true); - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true); - vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({ - getSessionId: vi.fn().mockReturnValue('session-abc'), - cleanup: mockCleanup, - } as unknown as ReturnType< - typeof sessionManagerModule.getSessionManager - >); - - createMcpServer({ - name: 'test', - version: '1.0.0', - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - await flushPromises(); - - expect(mockCleanup).toHaveBeenCalled(); - }); - - it('does not call session cleanup when no session manager', async () => { - const mockCleanup = vi.fn(); - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(false); - - createMcpServer({ - name: 'test', - version: '1.0.0', - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - await flushPromises(); - - expect(mockCleanup).not.toHaveBeenCalled(); - }); - - it('prevents duplicate cleanup calls', async () => { - const onCleanup = vi.fn().mockResolvedValue(undefined); - createMcpServer({ - name: 'test', - version: '1.0.0', - onCleanup, - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - sigintHandler?.(); - await flushPromises(); - - expect(onCleanup).toHaveBeenCalledTimes(1); - }); - - it('logs cleanup message', async () => { - const customLogger = vi.fn(); - createMcpServer({ - name: 'test', - version: '1.0.0', - logger: customLogger, - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - await flushPromises(); - - expect(customLogger).toHaveBeenCalledWith( - 'Received SIGINT, cleaning up...', - ); - }); - - it('logs cleanup errors', async () => { - const customLogger = vi.fn(); - const onCleanup = vi.fn().mockRejectedValue(new Error('Cleanup failed')); - createMcpServer({ - name: 'test', - version: '1.0.0', - onCleanup, - logger: customLogger, - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - await flushPromises(); - - expect(customLogger).toHaveBeenCalledWith( - expect.stringContaining('Cleanup error:'), - ); - }); - - it('exits with code 0 after cleanup', async () => { - createMcpServer({ - name: 'test', - version: '1.0.0', - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - try { - sigintHandler?.(); - await flushPromises(); - } catch (e) { - expect((e as Error).message).toBe('process.exit(0)'); - } - - expect(processExitSpy).toHaveBeenCalledWith(0); - }); - - it('handles signal error gracefully', async () => { - const customLogger = vi.fn(); - const onCleanup = vi.fn().mockImplementation(() => { - throw new Error('Sync error'); - }); - createMcpServer({ - name: 'test', - version: '1.0.0', - onCleanup, - logger: customLogger, - }); - - const sigintHandler = signalHandlers.get('SIGINT'); - - sigintHandler?.(); - await flushPromises(); - - expect(customLogger).toHaveBeenCalledWith( - expect.stringContaining('Cleanup error:'), - ); - }); - }); - - describe('tool registry', () => { - it('sets tool registry with handlers', () => { - createMcpServer({ name: 'test', version: '1.0.0' }); - - expect(batchModule.setToolRegistry).toHaveBeenCalledWith( - mockToolHandlers, - ); - }); - }); - - describe('createToolErrorResponse helper', () => { - it('formats error with sessionId from session manager', async () => { - vi.mocked(sessionManagerModule.hasSessionManager).mockReturnValue(true); - vi.mocked(sessionManagerModule.getSessionManager).mockReturnValue({ - getSessionId: vi.fn().mockReturnValue('my-session'), - cleanup: vi.fn(), - } as unknown as ReturnType< - typeof sessionManagerModule.getSessionManager - >); - - createMcpServer({ name: 'test', version: '1.0.0' }); - const callToolHandler = mockSetRequestHandler.mock.calls[1][1]; - - const result = await callToolHandler({ - params: { name: 'mm_invalid', arguments: {} }, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.meta.sessionId).toBe('my-session'); - expect(responseText.meta.timestamp).toBeDefined(); - expect(responseText.meta.durationMs).toBeGreaterThanOrEqual(0); - }); - - it('includes error details when provided', async () => { - vi.mocked(definitionsModule.safeValidateToolInput).mockReturnValueOnce({ - success: false, - error: 'validation error', - }); - - createMcpServer({ name: 'test', version: '1.0.0' }); - const callToolHandler = mockSetRequestHandler.mock.calls[1][1]; - - const result = await callToolHandler({ - params: { name: 'mm_click', arguments: { invalid: 'arg' } }, - }); - - const responseText = JSON.parse( - (result as { content: [{ text: string }] }).content[0].text, - ); - expect(responseText.error.details).toStrictEqual({ - providedArgs: { invalid: 'arg' }, - }); - }); - }); -}); diff --git a/src/mcp-server/server.ts b/src/mcp-server/server.ts deleted file mode 100644 index 1c3411c..0000000 --- a/src/mcp-server/server.ts +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env node -/* eslint-disable @typescript-eslint/explicit-function-return-type */ -import { Server } from '@modelcontextprotocol/sdk/server/index.js'; -import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { - CallToolRequestSchema, - ListToolsRequestSchema, -} from '@modelcontextprotocol/sdk/types.js'; - -import { getSessionManager, hasSessionManager } from './session-manager.js'; -import { setToolRegistry } from './tools/batch.js'; -import { - getToolDefinitions, - getToolHandler, - safeValidateToolInput, - buildToolHandlersRecord, - TOOL_PREFIX, -} from './tools/definitions.js'; -import type { ToolDefinition } from './tools/definitions.js'; -import { ErrorCodes } from './types'; -import { createErrorResponse } from './utils'; - -export type McpServerConfig = { - name: string; - version: string; - onCleanup?: () => Promise; - logger?: (message: string) => void; -}; - -/** - * Create a standardized error response for tool execution failures. - * - * @param code The error code from ErrorCodes enum - * @param message Human-readable error message - * @param details Optional error details object - * @param startTime Timestamp when the operation started - * @returns MCP-formatted error response object - */ -function createToolErrorResponse( - code: (typeof ErrorCodes)[keyof typeof ErrorCodes], - message: string, - details: Record | undefined, - startTime: number, -) { - const sessionId = hasSessionManager() - ? getSessionManager().getSessionId() - : undefined; - - const response = createErrorResponse( - code, - message, - details, - sessionId, - startTime, - ); - - return { - content: [ - { - type: 'text' as const, - text: JSON.stringify(response), - }, - ], - isError: true, - }; -} - -export type McpServer = { - start(): Promise; - stop(): Promise; - getServer(): Server; - getToolDefinitions(): ToolDefinition[]; - getToolPrefix(): string; -}; - -/** - * Create and configure an MCP server instance. - * - * @param config Server configuration including name, version, and optional cleanup handler - * @returns McpServer instance with start/stop methods and tool definitions - */ -export function createMcpServer(config: McpServerConfig): McpServer { - const { name, version, onCleanup, logger = console.error } = config; - - const toolDefinitions = getToolDefinitions(); - const toolHandlers = buildToolHandlersRecord(); - - setToolRegistry(toolHandlers); - - const validToolNames = new Set(toolDefinitions.map((tool) => tool.name)); - - const server = new Server({ name, version }, { capabilities: { tools: {} } }); - - let isCleaningUp = false; - - server.setRequestHandler(ListToolsRequestSchema, async () => ({ - tools: toolDefinitions, - })); - - server.setRequestHandler(CallToolRequestSchema, async (request, extra) => { - const { name: toolName, arguments: args } = request.params; - const startTime = Date.now(); - const signal = extra?.signal; - - if (!validToolNames.has(toolName)) { - return createToolErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - `Unknown tool: ${toolName}`, - undefined, - startTime, - ); - } - - const validation = safeValidateToolInput(toolName, args); - if (!validation.success) { - return createToolErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - `Invalid input: ${validation.error}`, - { providedArgs: args }, - startTime, - ); - } - - const handler = getToolHandler(toolName); - - if (!handler) { - return createToolErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - `No handler registered for tool: ${toolName}`, - undefined, - startTime, - ); - } - - const response = await handler(validation.data as Record, { - signal, - }); - - return { - content: [ - { - type: 'text' as const, - text: JSON.stringify(response), - }, - ], - isError: !response.ok, - }; - }); - - /** - * Handle process signals (SIGINT, SIGTERM) and perform cleanup. - * - * @param signal The signal name received (e.g., 'SIGINT', 'SIGTERM') - */ - const handleSignal = async (signal: string) => { - if (isCleaningUp) { - return; - } - isCleaningUp = true; - - logger(`Received ${signal}, cleaning up...`); - - try { - if (onCleanup) { - await onCleanup(); - } - - if (hasSessionManager()) { - await getSessionManager().cleanup(); - } - } catch (error) { - logger(`Cleanup error: ${JSON.stringify(error)}`); - } - - process.exit(0); - }; - - process.on('SIGINT', () => { - handleSignal('SIGINT').catch((error) => logger(`SIGINT error: ${error}`)); - }); - process.on('SIGTERM', () => { - handleSignal('SIGTERM').catch((error) => logger(`SIGTERM error: ${error}`)); - }); - - let transport: StdioServerTransport | undefined; - - return { - /** - * Start the MCP server and connect to stdio transport. - * - * @returns Promise that resolves when server is running - */ - async start() { - transport = new StdioServerTransport(); - await server.connect(transport); - logger(`${name} MCP Server v${version} running on stdio`); - }, - - /** - * Stop the MCP server and close the transport. - * - * @returns Promise that resolves when server is stopped - */ - async stop() { - if (transport) { - await server.close(); - } - }, - - /** - * Get the underlying MCP Server instance. - * - * @returns The MCP Server instance - */ - getServer() { - return server; - }, - - /** - * Get all available tool definitions. - * - * @returns Array of tool definitions - */ - getToolDefinitions() { - return toolDefinitions; - }, - - /** - * Get the tool name prefix (e.g., 'mm_'). - * - * @returns The tool prefix string - */ - getToolPrefix() { - return TOOL_PREFIX; - }, - }; -} diff --git a/src/mcp-server/session-manager.test.ts b/src/mcp-server/session-manager.test.ts deleted file mode 100644 index b41b7ca..0000000 --- a/src/mcp-server/session-manager.test.ts +++ /dev/null @@ -1,105 +0,0 @@ -import { describe, it, expect, beforeEach } from 'vitest'; - -import { - setSessionManager, - getSessionManager, - hasSessionManager, -} from './session-manager.js'; -import type { ISessionManager } from './session-manager.js'; -import { createMockSessionManager } from './test-utils/mock-factories.js'; - -describe('session-manager', () => { - beforeEach(() => { - setSessionManager(undefined as unknown as ISessionManager); - }); - - describe('setSessionManager', () => { - it('sets the session manager instance', () => { - const mockManager = createMockSessionManager(); - setSessionManager(mockManager); - - expect(hasSessionManager()).toBe(true); - }); - - it('replaces the existing session manager', () => { - const mockManager1 = createMockSessionManager(); - const mockManager2 = createMockSessionManager(); - - setSessionManager(mockManager1); - setSessionManager(mockManager2); - - expect(getSessionManager()).toBe(mockManager2); - }); - }); - - describe('getSessionManager', () => { - it('returns the session manager when set', () => { - const mockManager = createMockSessionManager(); - setSessionManager(mockManager); - - expect(getSessionManager()).toBe(mockManager); - }); - - it('throws error when session manager is not set', () => { - expect(() => getSessionManager()).toThrowError( - 'Session manager not initialized. Call setSessionManager() first.', - ); - }); - }); - - describe('hasSessionManager', () => { - it('returns false when no session manager is set', () => { - expect(hasSessionManager()).toBe(false); - }); - - it('returns true when session manager is set', () => { - const mockManager = createMockSessionManager(); - setSessionManager(mockManager); - - expect(hasSessionManager()).toBe(true); - }); - }); - - describe('ISessionManager interface compliance', () => { - let manager: ISessionManager; - - beforeEach(() => { - manager = createMockSessionManager(); - setSessionManager(manager); - }); - - it('can call hasActiveSession', () => { - const result = getSessionManager().hasActiveSession(); - expect(typeof result).toBe('boolean'); - }); - - it('can call getSessionId', () => { - const result = getSessionManager().getSessionId(); - expect(result).toBeUndefined(); - }); - - it('can call launch', async () => { - const result = await getSessionManager().launch({}); - expect(result.sessionId).toBe('test-session-123'); - }); - - it('can call cleanup', async () => { - const result = await getSessionManager().cleanup(); - expect(result).toBe(true); - }); - - it('can call screenshot', async () => { - const result = await getSessionManager().screenshot({ name: 'test' }); - expect(result.path).toBeDefined(); - }); - - it('can access capability methods', () => { - expect(getSessionManager().getBuildCapability()).toBeUndefined(); - expect(getSessionManager().getFixtureCapability()).toBeUndefined(); - expect(getSessionManager().getChainCapability()).toBeUndefined(); - expect( - getSessionManager().getContractSeedingCapability(), - ).toBeUndefined(); - }); - }); -}); diff --git a/src/mcp-server/test-utils/flush-promises.ts b/src/mcp-server/test-utils/flush-promises.ts deleted file mode 100644 index eb3403f..0000000 --- a/src/mcp-server/test-utils/flush-promises.ts +++ /dev/null @@ -1,8 +0,0 @@ -const scheduler = - typeof setImmediate === 'function' ? setImmediate : setTimeout; - -export async function flushPromises() { - return new Promise((resolve) => { - scheduler(resolve, 0); - }); -} diff --git a/src/mcp-server/tools/batch.test.ts b/src/mcp-server/tools/batch.test.ts deleted file mode 100644 index 2a84d2b..0000000 --- a/src/mcp-server/tools/batch.test.ts +++ /dev/null @@ -1,428 +0,0 @@ -import { describe, it, expect, beforeEach, vi } from 'vitest'; - -import { - setToolRegistry, - getToolRegistry, - hasToolRegistry, - setToolValidator, - getToolValidator, - handleRunSteps, -} from './batch.js'; -import type { ToolRegistry, ToolHandler, ToolValidator } from './batch.js'; -import { setSessionManager } from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; - -/** - * Clears the tool validator by resetting it to undefined. - */ -function clearToolValidator(): void { - setToolValidator((() => ({ success: true })) as ToolValidator); - setToolValidator(undefined as unknown as ToolValidator); -} - -describe('batch', () => { - beforeEach(() => { - setToolRegistry({}); - clearToolValidator(); - }); - - describe('setToolRegistry / getToolRegistry', () => { - it('sets and gets tool registry', () => { - const mockHandler: ToolHandler = vi.fn().mockResolvedValue({ ok: true }); - const registry: ToolRegistry = { - mm_click: mockHandler, - }; - - setToolRegistry(registry); - - expect(getToolRegistry()).toBe(registry); - expect(getToolRegistry().mm_click).toBe(mockHandler); - }); - - it('replaces existing registry', () => { - const registry1: ToolRegistry = { tool1: vi.fn() }; - const registry2: ToolRegistry = { tool2: vi.fn() }; - - setToolRegistry(registry1); - setToolRegistry(registry2); - - expect(getToolRegistry()).toBe(registry2); - expect(getToolRegistry().tool1).toBeUndefined(); - expect(getToolRegistry().tool2).toBeDefined(); - }); - }); - - describe('hasToolRegistry', () => { - it('returns false for empty registry', () => { - setToolRegistry({}); - expect(hasToolRegistry()).toBe(false); - }); - - it('returns true when registry has handlers', () => { - setToolRegistry({ mm_click: vi.fn() }); - expect(hasToolRegistry()).toBe(true); - }); - }); - - describe('setToolValidator / getToolValidator', () => { - it('sets and gets tool validator', () => { - const validator: ToolValidator = vi - .fn() - .mockReturnValue({ success: true }); - setToolValidator(validator); - - expect(getToolValidator()).toBe(validator); - }); - - it('returns undefined when not set', () => { - expect(getToolValidator()).toBeUndefined(); - }); - }); - - describe('handleRunSteps', () => { - beforeEach(() => { - setSessionManager(createMockSessionManager({ hasActive: true })); - }); - - it('returns error when no active session', async () => { - setSessionManager(createMockSessionManager({ hasActive: false })); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: { testId: 'button' } }], - }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error?.code).toBe('MM_NO_ACTIVE_SESSION'); - } - }); - - it('executes steps in sequence', async () => { - const executionOrder: string[] = []; - const clickHandler = vi.fn().mockImplementation(async () => { - executionOrder.push('click'); - return { ok: true, result: 'clicked' }; - }); - const typeHandler = vi.fn().mockImplementation(async () => { - executionOrder.push('type'); - return { ok: true, result: 'typed' }; - }); - - setToolRegistry({ - mm_click: clickHandler, - mm_type: typeHandler, - }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: { testId: 'button' } }, - { tool: 'mm_type', args: { testId: 'input', text: 'hello' } }, - ], - }); - - expect(result.ok).toBe(true); - expect(executionOrder).toStrictEqual(['click', 'type']); - if (result.ok) { - expect(result.result?.summary.total).toBe(2); - expect(result.result?.summary.succeeded).toBe(2); - expect(result.result?.summary.failed).toBe(0); - } - }); - - it('returns error for unknown tool', async () => { - setToolRegistry({}); - - const result = await handleRunSteps({ - steps: [{ tool: 'unknown_tool', args: {} }], - }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_UNKNOWN_TOOL'); - expect(result.result?.summary.failed).toBe(1); - } - }); - - it('stops on error when stopOnError is true', async () => { - const clickHandler = vi.fn().mockResolvedValue({ - ok: false, - error: { code: 'ERR', message: 'fail' }, - }); - const typeHandler = vi.fn().mockResolvedValue({ ok: true }); - - setToolRegistry({ - mm_click: clickHandler, - mm_type: typeHandler, - }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: {} }, - { tool: 'mm_type', args: { text: 'hello' } }, - ], - stopOnError: true, - }); - - expect(clickHandler).toHaveBeenCalledTimes(1); - expect(typeHandler).not.toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps.length).toBe(1); - } - }); - - it('continues on error when stopOnError is false', async () => { - const clickHandler = vi.fn().mockResolvedValue({ - ok: false, - error: { code: 'ERR', message: 'fail' }, - }); - const typeHandler = vi - .fn() - .mockResolvedValue({ ok: true, result: 'typed' }); - - setToolRegistry({ - mm_click: clickHandler, - mm_type: typeHandler, - }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: {} }, - { tool: 'mm_type', args: { text: 'hello' } }, - ], - stopOnError: false, - }); - - expect(clickHandler).toHaveBeenCalledTimes(1); - expect(typeHandler).toHaveBeenCalledTimes(1); - if (result.ok) { - expect(result.result?.steps.length).toBe(2); - expect(result.result?.summary.failed).toBe(1); - expect(result.result?.summary.succeeded).toBe(1); - } - }); - - it('uses tool validator when set', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_click: clickHandler }); - - const validator: ToolValidator = vi.fn().mockReturnValue({ - success: false, - error: { message: 'Invalid testId' }, - }); - setToolValidator(validator); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: { testId: '' } }], - }); - - expect(validator).toHaveBeenCalledWith('mm_click', { testId: '' }); - expect(clickHandler).not.toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_INVALID_INPUT'); - } - }); - - it('passes validation when validator returns success', async () => { - const clickHandler = vi - .fn() - .mockResolvedValue({ ok: true, result: 'clicked' }); - setToolRegistry({ mm_click: clickHandler }); - - const validator: ToolValidator = vi - .fn() - .mockReturnValue({ success: true }); - setToolValidator(validator); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: { testId: 'btn' } }], - }); - - expect(clickHandler).toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(true); - } - }); - - it('handles exceptions from tool handlers', async () => { - const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout')); - setToolRegistry({ mm_click: clickHandler }); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: {} }], - }); - - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_INTERNAL_ERROR'); - expect(result.result?.steps[0].error?.message).toContain('Timeout'); - } - }); - - it('includes duration in step results', async () => { - vi.useFakeTimers(); - const clickHandler = vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 100)); - return { ok: true }; - }); - setToolRegistry({ mm_click: clickHandler }); - - const resultPromise = handleRunSteps({ - steps: [{ tool: 'mm_click', args: {} }], - }); - - await vi.advanceTimersByTimeAsync(100); - const result = await resultPromise; - - if (result.ok) { - expect(result.result?.steps[0].meta?.durationMs).toBe(100); - } - - vi.useRealTimers(); - }); - - it('includes total duration in summary', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_click: clickHandler }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: {} }, - { tool: 'mm_click', args: {} }, - ], - }); - - if (result.ok) { - expect(result.result?.summary.durationMs).toBeGreaterThanOrEqual(0); - } - }); - - it('defaults args to empty object when not provided', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_click: clickHandler }); - - await handleRunSteps({ - steps: [{ tool: 'mm_click' }], - }); - - expect(clickHandler).toHaveBeenCalledWith({}, expect.any(Object)); - }); - - it('maps includeObservations "none" to observation policy', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_click: clickHandler }); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: {} }], - includeObservations: 'none', - }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(true); - } - expect(clickHandler).toHaveBeenCalledWith( - {}, - expect.objectContaining({ observationPolicy: 'none' }), - ); - }); - - it('maps includeObservations "failures" to observation policy', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_click: clickHandler }); - - const result = await handleRunSteps({ - steps: [{ tool: 'mm_click', args: {} }], - includeObservations: 'failures', - }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result?.steps[0].ok).toBe(true); - } - expect(clickHandler).toHaveBeenCalledWith( - {}, - expect.objectContaining({ observationPolicy: 'failures' }), - ); - }); - - it('stops execution when stopOnError=true and handler not found', async () => { - const typeHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ mm_type: typeHandler }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'unknown_tool', args: {} }, - { tool: 'mm_type', args: { text: 'hello' } }, - ], - stopOnError: true, - }); - - expect(typeHandler).not.toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps.length).toBe(1); - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_UNKNOWN_TOOL'); - } - }); - - it('stops execution when stopOnError=true and validation fails', async () => { - const clickHandler = vi.fn().mockResolvedValue({ ok: true }); - const typeHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ - mm_click: clickHandler, - mm_type: typeHandler, - }); - - const validator: ToolValidator = vi.fn().mockImplementation((tool) => { - if (tool === 'mm_click') { - return { success: false, error: { message: 'Invalid testId' } }; - } - return { success: true }; - }); - setToolValidator(validator); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: { testId: '' } }, - { tool: 'mm_type', args: { text: 'hello' } }, - ], - stopOnError: true, - }); - - expect(clickHandler).not.toHaveBeenCalled(); - expect(typeHandler).not.toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps.length).toBe(1); - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_INVALID_INPUT'); - } - }); - - it('stops execution when stopOnError=true and handler throws error', async () => { - const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout')); - const typeHandler = vi.fn().mockResolvedValue({ ok: true }); - setToolRegistry({ - mm_click: clickHandler, - mm_type: typeHandler, - }); - - const result = await handleRunSteps({ - steps: [ - { tool: 'mm_click', args: {} }, - { tool: 'mm_type', args: { text: 'hello' } }, - ], - stopOnError: true, - }); - - expect(clickHandler).toHaveBeenCalledTimes(1); - expect(typeHandler).not.toHaveBeenCalled(); - if (result.ok) { - expect(result.result?.steps.length).toBe(1); - expect(result.result?.steps[0].ok).toBe(false); - expect(result.result?.steps[0].error?.code).toBe('MM_INTERNAL_ERROR'); - } - }); - }); -}); diff --git a/src/mcp-server/tools/batch.ts b/src/mcp-server/tools/batch.ts deleted file mode 100644 index b723a69..0000000 --- a/src/mcp-server/tools/batch.ts +++ /dev/null @@ -1,286 +0,0 @@ -import { getSessionManager } from '../session-manager.js'; -import type { - McpResponse, - HandlerOptions, - RunStepsInput, - RunStepsResult, - StepResult, - ObservationPolicyOverride, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createSuccessResponse, - createErrorResponse, - extractErrorMessage, -} from '../utils'; - -/** - * Maps includeObservations string to observation policy override. - * - * @param value The observation policy string ('none', 'failures', 'all', or undefined) - * @returns The mapped observation policy override - */ -function mapIncludeObservationsToPolicy( - value: 'none' | 'failures' | 'all' | undefined, -): ObservationPolicyOverride { - switch (value) { - case 'none': - return 'none'; - case 'failures': - return 'failures'; - case 'all': - default: - return 'default'; - } -} - -/** - * Handler function type for executing MCP tools. - * - * @param input Tool arguments as key-value pairs - * @param options Optional handler configuration - * @returns Promise resolving to MCP response with tool result - */ -export type ToolHandler = ( - input: Record, - options?: HandlerOptions, -) => Promise>; - -/** - * Registry mapping tool names to their handler functions. - * - * @returns Record of tool name to handler function mappings - */ -export type ToolRegistry = Record; - -/** - * Validator function type for validating tool arguments before execution. - * - * @param tool Tool name being validated - * @param args Tool arguments to validate - * @returns Validation result with success status and optional error details - */ -export type ToolValidator = ( - tool: string, - args: Record, -) => - | { - /** - * Validation succeeded - */ - success: true; - } - | { - /** - * Validation failed - */ - success: false; - /** - * Error details when validation fails - */ - error: { - /** - * Error message describing validation failure - */ - message: string; - }; - }; - -let _toolRegistry: ToolRegistry = {}; -let _toolValidator: ToolValidator | undefined; - -/** - * Sets the global tool registry for batch execution. - * - * @param registry Tool registry mapping names to handlers - */ -export function setToolRegistry(registry: ToolRegistry): void { - _toolRegistry = registry; -} - -/** - * Gets the current global tool registry. - * - * @returns The current tool registry - */ -export function getToolRegistry(): ToolRegistry { - return _toolRegistry; -} - -/** - * Checks if the tool registry has any registered handlers. - * - * @returns True if registry contains handlers, false otherwise - */ -export function hasToolRegistry(): boolean { - return Object.keys(_toolRegistry).length > 0; -} - -/** - * Sets the global tool validator for batch execution. - * - * @param validator Validator function to validate tool arguments - */ -export function setToolValidator(validator: ToolValidator): void { - _toolValidator = validator; -} - -/** - * Gets the current global tool validator. - * - * @returns The current tool validator or undefined if not set - */ -export function getToolValidator(): ToolValidator | undefined { - return _toolValidator; -} - -/** - * Executes multiple tool steps in sequence with optional validation and error handling. - * - * @param input Steps to execute with optional stop-on-error and observation policy - * @param options Optional handler configuration and observation policy override - * @returns Promise resolving to MCP response with step results and summary - */ -export async function handleRunSteps( - input: RunStepsInput, - options?: HandlerOptions, -): Promise> { - const batchStartTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - - if (!sessionManager.hasActiveSession()) { - return createErrorResponse( - ErrorCodes.MM_NO_ACTIVE_SESSION, - 'No active session. Call launch first.', - { input }, - undefined, - batchStartTime, - ); - } - - const { steps: stepInputs, stopOnError = false, includeObservations } = input; - const observationPolicy = mapIncludeObservationsToPolicy(includeObservations); - const stepResults: StepResult[] = []; - let succeeded = 0; - let failed = 0; - - const toolHandlers = getToolRegistry(); - const toolValidator = getToolValidator(); - - for (const stepInput of stepInputs) { - const stepStartTime = Date.now(); - const { tool, args = {} } = stepInput; - - const handler = toolHandlers[tool]; - if (!handler) { - const result: StepResult = { - tool, - ok: false, - error: { - code: ErrorCodes.MM_UNKNOWN_TOOL, - message: `Unknown tool: ${tool}`, - }, - meta: { - durationMs: Date.now() - stepStartTime, - timestamp: new Date().toISOString(), - }, - }; - stepResults.push(result); - failed += 1; - - if (stopOnError) { - break; - } - continue; - } - - if (toolValidator) { - const validation = toolValidator(tool, args); - if (!validation.success) { - const result: StepResult = { - tool, - ok: false, - error: { - code: ErrorCodes.MM_INVALID_INPUT, - message: `Invalid input: ${validation.error.message}`, - }, - meta: { - durationMs: Date.now() - stepStartTime, - timestamp: new Date().toISOString(), - }, - }; - stepResults.push(result); - failed += 1; - - if (stopOnError) { - break; - } - continue; - } - } - - try { - const stepOptions: HandlerOptions = { - ...options, - observationPolicy, - }; - const response = await handler(args, stepOptions); - - const result: StepResult = { - tool, - ok: response.ok, - result: response.ok ? response.result : undefined, - error: response.ok ? undefined : response.error, - meta: { - durationMs: Date.now() - stepStartTime, - timestamp: new Date().toISOString(), - }, - }; - - stepResults.push(result); - - if (response.ok) { - succeeded += 1; - } else { - failed += 1; - if (stopOnError) { - break; - } - } - } catch (error) { - const message = extractErrorMessage(error); - const result: StepResult = { - tool, - ok: false, - error: { - code: ErrorCodes.MM_INTERNAL_ERROR, - message: `Unexpected error: ${message}`, - }, - meta: { - durationMs: Date.now() - stepStartTime, - timestamp: new Date().toISOString(), - }, - }; - stepResults.push(result); - failed += 1; - - if (stopOnError) { - break; - } - } - } - - const batchResult: RunStepsResult = { - steps: stepResults, - summary: { - ok: failed === 0, - total: stepResults.length, - succeeded, - failed, - durationMs: Date.now() - batchStartTime, - }, - }; - - return createSuccessResponse(batchResult, sessionId, batchStartTime); -} diff --git a/src/mcp-server/tools/build.test.ts b/src/mcp-server/tools/build.test.ts deleted file mode 100644 index 4e3721c..0000000 --- a/src/mcp-server/tools/build.test.ts +++ /dev/null @@ -1,211 +0,0 @@ -/** - * Unit tests for build tool handler. - * - * Tests the build handler with BuildCapability and legacy build paths, - * including success/failure scenarios and build options handling. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { handleBuild } from './build.js'; -import type { BuildCapability } from '../../capabilities/types.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types/errors.js'; - -describe('build', () => { - let mockSessionManager: ReturnType; - let mockBuildCapability: BuildCapability; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - sessionMetadata: { - schemaVersion: 1, - sessionId: 'test-session-123', - createdAt: new Date().toISOString(), - flowTags: [], - tags: [], - launch: { stateMode: 'default' }, - }, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - - mockBuildCapability = { - build: vi.fn(), - getExtensionPath: vi.fn(), - isBuilt: vi.fn(), - }; - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleBuild with capability', () => { - it('builds extension successfully with default buildType', async () => { - // Arrange - const mockedBuild = vi - .spyOn(mockBuildCapability, 'build') - .mockResolvedValue({ - success: true, - extensionPath: '/path/to/dist/chrome', - durationMs: 5000, - }); - - // Act - const result = await handleBuild( - {}, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.buildType).toBe('build:test'); - expect(result.result.extensionPathResolved).toBe( - '/path/to/dist/chrome', - ); - } - expect(mockedBuild).toHaveBeenCalledWith({ - buildType: undefined, - force: undefined, - }); - }); - - it('builds extension with explicit buildType', async () => { - // Arrange - const mockedBuild = vi - .spyOn(mockBuildCapability, 'build') - .mockResolvedValue({ - success: true, - extensionPath: '/path/to/dist/chrome', - durationMs: 5000, - }); - - // Act - const result = await handleBuild( - { buildType: 'build:test' }, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.buildType).toBe('build:test'); - expect(result.result.extensionPathResolved).toBe( - '/path/to/dist/chrome', - ); - } - expect(mockedBuild).toHaveBeenCalledWith({ - buildType: 'build:test', - force: undefined, - }); - }); - - it('builds extension with force flag', async () => { - // Arrange - const mockedBuild = vi - .spyOn(mockBuildCapability, 'build') - .mockResolvedValue({ - success: true, - extensionPath: '/path/to/dist/chrome', - durationMs: 5000, - }); - - // Act - const result = await handleBuild( - { force: true }, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - expect(mockedBuild).toHaveBeenCalledWith({ - buildType: undefined, - force: true, - }); - }); - - it('returns error when build fails with error message', async () => { - // Arrange - vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ - success: false, - extensionPath: '', - durationMs: 1000, - error: 'Compilation error', - }); - - // Act - const result = await handleBuild( - {}, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); - expect(result.error.message).toContain('Compilation error'); - } - }); - - it('returns error when build fails without error message', async () => { - // Arrange - vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ - success: false, - extensionPath: '', - durationMs: 1000, - }); - - // Act - const result = await handleBuild( - {}, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); - expect(result.error.message).toContain('Unknown error'); - } - }); - - it('returns error when build throws exception', async () => { - // Arrange - vi.spyOn(mockBuildCapability, 'build').mockRejectedValue( - new Error('Build process crashed'), - ); - - // Act - const result = await handleBuild( - {}, - { buildCapability: mockBuildCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); - expect(result.error.message).toContain('Build process crashed'); - } - }); - }); -}); diff --git a/src/mcp-server/tools/build.ts b/src/mcp-server/tools/build.ts deleted file mode 100644 index 7d422cf..0000000 --- a/src/mcp-server/tools/build.ts +++ /dev/null @@ -1,100 +0,0 @@ -import type { BuildCapability } from '../../capabilities/types.js'; -import type { - BuildInput, - BuildToolResult, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createSuccessResponse, - createErrorResponse, - extractErrorMessage, -} from '../utils'; - -/** - * Options for the build tool handler. - * - * @returns Handler options with optional build capability - */ -export type BuildToolOptions = HandlerOptions & { - /** - * Optional build capability for extension building - */ - buildCapability?: BuildCapability; -}; - -/** - * Handles the build tool request to build the extension. - * - * @param input Build configuration with optional buildType and force flag - * @param options Optional handler options with build capability - * @returns Promise resolving to MCP response with build result - */ -export async function handleBuild( - input: BuildInput, - options?: BuildToolOptions, -): Promise> { - const startTime = Date.now(); - - if (options?.buildCapability) { - return handleBuildWithCapability(input, options.buildCapability, startTime); - } - - return createErrorResponse( - ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE, - 'BuildCapability not available. The mm_build tool requires either: (1) running in e2e mode with the MetaMask extension wrapper, or (2) running directly in the metamask-extension repository with dependencies installed.', - { capability: 'BuildCapability' }, - undefined, - startTime, - ); -} - -/** - * Handles build using the provided build capability. - * - * @param input Build configuration with optional buildType and force flag - * @param buildCapability Build capability instance for executing the build - * @param startTime Timestamp when the operation started - * @returns Promise resolving to MCP response with build result - */ -async function handleBuildWithCapability( - input: BuildInput, - buildCapability: BuildCapability, - startTime: number, -): Promise> { - try { - const result = await buildCapability.build({ - buildType: input.buildType, - force: input.force, - }); - - if (!result.success) { - return createErrorResponse( - ErrorCodes.MM_BUILD_FAILED, - `Build failed: ${result.error ?? 'Unknown error'}`, - { buildType: input.buildType ?? 'build:test' }, - undefined, - startTime, - ); - } - - return createSuccessResponse( - { - buildType: input.buildType ?? 'build:test', - extensionPathResolved: result.extensionPath, - }, - undefined, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - return createErrorResponse( - ErrorCodes.MM_BUILD_FAILED, - `Build failed: ${message}`, - { buildType: input.buildType ?? 'build:test' }, - undefined, - startTime, - ); - } -} diff --git a/src/mcp-server/tools/cleanup.test.ts b/src/mcp-server/tools/cleanup.test.ts deleted file mode 100644 index 7a8193f..0000000 --- a/src/mcp-server/tools/cleanup.test.ts +++ /dev/null @@ -1,161 +0,0 @@ -/** - * Unit tests for cleanup tool handler. - * - * Tests session cleanup with various session states. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { handleCleanup } from './cleanup.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; - -describe('handleCleanup', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it('cleans up active session successfully', async () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.cleanedUp).toBe(true); - expect(result.meta.sessionId).toBe('test-session-123'); - } - expect(mockSessionManager.cleanup).toHaveBeenCalled(); - }); - - it('returns false when no session to clean up', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(false); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.cleanedUp).toBe(false); - } - }); - - it('uses provided sessionId in input', async () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'current-session', - }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({ sessionId: 'custom-session-456' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.sessionId).toBe('custom-session-456'); - } - }); - - it('falls back to current sessionId when input sessionId is undefined', async () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-789', - }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.sessionId).toBe('test-session-789'); - } - }); - - it('handles cleanup when sessionId is undefined', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(false); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.cleanedUp).toBe(false); - } - }); - - it('includes timestamp in response', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: true }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.timestamp).toBeDefined(); - expect(typeof result.meta.timestamp).toBe('string'); - expect(new Date(result.meta.timestamp).getTime()).toBeGreaterThan(0); - } - }); - - it('includes durationMs in response', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: true }); - vi.spyOn(mockSessionManager, 'cleanup').mockResolvedValue(true); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleCleanup({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.durationMs).toBeGreaterThanOrEqual(0); - expect(typeof result.meta.durationMs).toBe('number'); - } - }); - - it('cleans up multiple times without error', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: true }); - vi.spyOn(mockSessionManager, 'cleanup') - .mockResolvedValueOnce(true) - .mockResolvedValueOnce(false); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result1 = await handleCleanup({}); - const result2 = await handleCleanup({}); - - expect(result1.ok).toBe(true); - if (result1.ok) { - expect(result1.result.cleanedUp).toBe(true); - } - - expect(result2.ok).toBe(true); - if (result2.ok) { - expect(result2.result.cleanedUp).toBe(false); - } - - expect(mockSessionManager.cleanup).toHaveBeenCalledTimes(2); - }); -}); diff --git a/src/mcp-server/tools/cleanup.ts b/src/mcp-server/tools/cleanup.ts deleted file mode 100644 index 9b6f266..0000000 --- a/src/mcp-server/tools/cleanup.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { getSessionManager } from '../session-manager.js'; -import type { - CleanupInput, - CleanupResult, - McpResponse, - HandlerOptions, -} from '../types'; -import { createSuccessResponse } from '../utils'; - -/** - * Handles the cleanup tool request to stop browser and services. - * - * @param input - The cleanup input parameters. - * @param _options - Handler options (unused). - * @returns Response indicating if cleanup was performed. - */ -export async function handleCleanup( - input: CleanupInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = input.sessionId ?? sessionManager.getSessionId(); - - const cleanedUp = await sessionManager.cleanup(); - - return createSuccessResponse( - { cleanedUp }, - sessionId, - startTime, - ); -} diff --git a/src/mcp-server/tools/clipboard.test.ts b/src/mcp-server/tools/clipboard.test.ts deleted file mode 100644 index d52c3f6..0000000 --- a/src/mcp-server/tools/clipboard.test.ts +++ /dev/null @@ -1,325 +0,0 @@ -/** - * Unit tests for clipboard tool handler. - * - * Tests CDP-based clipboard operations (read/write) with proper mocking. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { handleClipboard } from './clipboard.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; -import { ErrorCodes } from '../types/errors.js'; - -describe('handleClipboard', () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - sessionMetadata: { - schemaVersion: 1, - sessionId: 'test-session-123', - createdAt: new Date().toISOString(), - flowTags: [], - tags: [], - launch: { stateMode: 'default' }, - }, - }); - - beforeEach(() => { - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - describe('write action', () => { - it('writes text to clipboard via CDP', async () => { - const mockCdpSession = { - send: vi.fn().mockResolvedValue(undefined), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ - action: 'write', - text: 'test content', - }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.action).toBe('write'); - expect(result.result.success).toBe(true); - expect(result.result.text).toBe('test content'); - } - expect(mockCdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', { - expression: 'navigator.clipboard.writeText("test content")', - awaitPromise: true, - userGesture: true, - }); - expect(mockCdpSession.detach).toHaveBeenCalled(); - }); - - it('detaches CDP session even if write fails', async () => { - const mockCdpSession = { - send: vi.fn().mockRejectedValue(new Error('Write failed')), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'write', text: 'test' }); - - expect(result.ok).toBe(false); - expect(mockCdpSession.detach).toHaveBeenCalled(); - }); - }); - - describe('read action', () => { - it('reads text from clipboard via CDP', async () => { - const mockCdpSession = { - send: vi.fn().mockResolvedValue({ - result: { value: 'clipboard content' }, - }), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.action).toBe('read'); - expect(result.result.success).toBe(true); - expect(result.result.text).toBe('clipboard content'); - } - expect(mockCdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', { - expression: 'navigator.clipboard.readText()', - awaitPromise: true, - userGesture: true, - }); - }); - - it('uses description when value is missing', async () => { - const mockCdpSession = { - send: vi.fn().mockResolvedValue({ - result: { description: 'fallback content' }, - }), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.text).toBe('fallback content'); - } - }); - - it('returns empty string when result is missing', async () => { - const mockCdpSession = { - send: vi.fn().mockResolvedValue({ result: {} }), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.text).toBe(''); - } - }); - }); - - describe('error classification', () => { - it('classifies permission denied errors', async () => { - const mockCdpSession = { - send: vi.fn().mockRejectedValue(new Error('permissions denied')), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_CLIPBOARD_PERMISSION_DENIED'); - expect(result.error.message).toContain('Clipboard permission denied'); - } - }); - - it('classifies LavaMoat blocked errors', async () => { - const mockCdpSession = { - send: vi.fn().mockRejectedValue(new Error('LavaMoat policy violation')), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'write', text: 'test' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_CLIPBOARD_LAVAMOAT_BLOCKED'); - expect(result.error.message).toContain( - 'Clipboard blocked by LavaMoat policy', - ); - } - }); - - it('classifies generic clipboard errors', async () => { - const mockCdpSession = { - send: vi.fn().mockRejectedValue(new Error('Unknown error')), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_CLIPBOARD_FAILED'); - expect(result.error.message).toContain('Clipboard operation failed'); - } - }); - }); - - describe('input sanitization', () => { - it('sanitizes write input for recording', async () => { - const mockCdpSession = { - send: vi.fn().mockResolvedValue(undefined), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - const recordStepSpy = vi.fn().mockResolvedValue(undefined); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: recordStepSpy, - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - - await handleClipboard({ action: 'write', text: 'sensitive password' }); - - expect(recordStepSpy).toHaveBeenCalled(); - const recordedInput = recordStepSpy.mock.calls[0][0].input; - expect(recordedInput).toStrictEqual({ - action: 'write', - textLength: 18, - }); - expect(recordedInput).not.toHaveProperty('text'); - }); - - it('sanitizes read input for recording', async () => { - const mockCdpSession = { - send: vi - .fn() - .mockResolvedValue({ result: { value: 'clipboard content' } }), - detach: vi.fn().mockResolvedValue(undefined), - }; - const mockPage = { - context: vi.fn().mockReturnValue({ - newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), - }), - }; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - const recordStepSpy = vi.fn().mockResolvedValue(undefined); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: recordStepSpy, - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - - await handleClipboard({ action: 'read' }); - - expect(recordStepSpy).toHaveBeenCalled(); - const recordedInput = recordStepSpy.mock.calls[0][0].input; - expect(recordedInput).toStrictEqual({ - action: 'read', - textLength: 0, - }); - }); - }); - - describe('session validation', () => { - it('returns error when no active session', async () => { - const noSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - noSessionManager, - ); - - const result = await handleClipboard({ action: 'read' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); -}); diff --git a/src/mcp-server/tools/clipboard.ts b/src/mcp-server/tools/clipboard.ts deleted file mode 100644 index a9e4fd6..0000000 --- a/src/mcp-server/tools/clipboard.ts +++ /dev/null @@ -1,117 +0,0 @@ -import { runTool } from './run-tool.js'; -import type { - ClipboardInput, - ClipboardResult, - McpResponse, - HandlerOptions, -} from '../types'; - -/** - * Clipboard handler using CDP (Chrome DevTools Protocol) to bypass LavaMoat restrictions. - * - * Why CDP instead of page.evaluate()? - * - page.evaluate() runs JavaScript inside the page context, which is wrapped by LavaMoat - * - LavaMoat restricts access to navigator.clipboard in the page context - * - CDP's Runtime.evaluate runs at the browser/DevTools level, bypassing LavaMoat - * - userGesture: true simulates a user gesture to satisfy clipboard security requirements - * - * @param input Clipboard action ('read' or 'write') with optional text content - * @param options Optional handler configuration - * @returns Promise resolving to MCP response with clipboard operation result - */ -export async function handleClipboard( - input: ClipboardInput, - options?: HandlerOptions, -): Promise> { - return runTool({ - toolName: 'mm_clipboard', - input, - options, - - /** - * Executes the clipboard operation using CDP. - * - * @param context Tool execution context with page and session info - * @returns Promise resolving to clipboard operation result - */ - execute: async (context) => { - const { page } = context; - const cdpSession = await page.context().newCDPSession(page); - - try { - if (input.action === 'write') { - await cdpSession.send('Runtime.evaluate', { - expression: `navigator.clipboard.writeText(${JSON.stringify(input.text)})`, - awaitPromise: true, - userGesture: true, - }); - - return { - action: 'write', - success: true, - text: input.text, - }; - } - - const result = await cdpSession.send('Runtime.evaluate', { - expression: `navigator.clipboard.readText()`, - awaitPromise: true, - userGesture: true, - }); - - const clipboardText = - result.result?.value ?? result.result?.description ?? ''; - - return { - action: 'read', - success: true, - text: clipboardText as string, - }; - } finally { - // eslint-disable-next-line no-empty-function - await cdpSession.detach().catch(() => {}); - } - }, - - /** - * Classifies clipboard errors into specific error codes. - * - * @param error The error to classify - * @returns Error classification with code and message - */ - classifyError: (error) => { - const message = error instanceof Error ? error.message : String(error); - - if (message.includes('permissions') || message.includes('denied')) { - return { - code: 'MM_CLIPBOARD_PERMISSION_DENIED', - message: `Clipboard permission denied: ${message}`, - }; - } - - if (message.includes('LavaMoat') || message.includes('policy')) { - return { - code: 'MM_CLIPBOARD_LAVAMOAT_BLOCKED', - message: `Clipboard blocked by LavaMoat policy: ${message}`, - }; - } - - return { - code: 'MM_CLIPBOARD_FAILED', - message: `Clipboard operation failed: ${message}`, - }; - }, - - /** - * Sanitizes clipboard input for recording (removes sensitive text). - * - * @param inp The clipboard input to sanitize - * @returns Sanitized input with text length instead of actual text - */ - sanitizeInputForRecording: (inp) => ({ - action: inp.action, - // Don't record the actual text content for privacy (could be SRP, passwords, etc.) - textLength: inp.text?.length ?? 0, - }), - }); -} diff --git a/src/mcp-server/tools/context.test.ts b/src/mcp-server/tools/context.test.ts deleted file mode 100644 index d4c384c..0000000 --- a/src/mcp-server/tools/context.test.ts +++ /dev/null @@ -1,221 +0,0 @@ -/** - * Unit tests for context tool handlers. - * - * Tests context switching (e2e/prod) and context info retrieval. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { handleSetContext, handleGetContext } from './context.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; -import { ErrorCodes } from '../types/errors.js'; - -describe('handleSetContext', () => { - beforeEach(() => { - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - it('switches context from e2e to prod', async () => { - const mockSessionManager = createMockSessionManager({ - environmentMode: 'e2e', - }); - vi.spyOn(mockSessionManager, 'setContext'); - // eslint-disable-next-line vitest/prefer-spy-on - mockSessionManager.getContextInfo = vi.fn().mockReturnValue({ - currentContext: 'prod', - hasActiveSession: false, - sessionId: null, - capabilities: { available: ['build', 'fixture'] }, - canSwitchContext: true, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleSetContext({ context: 'prod' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.previousContext).toBe('e2e'); - expect(result.result.newContext).toBe('prod'); - expect(result.result.availableCapabilities).toStrictEqual([ - 'build', - 'fixture', - ]); - } - expect(mockSessionManager.setContext).toHaveBeenCalledWith( - 'prod', - undefined, - ); - }); - - it('forwards context options to session manager', async () => { - const mockSessionManager = createMockSessionManager({ - environmentMode: 'e2e', - }); - vi.spyOn(mockSessionManager, 'setContext'); - // eslint-disable-next-line vitest/prefer-spy-on - mockSessionManager.getContextInfo = vi.fn().mockReturnValue({ - currentContext: 'e2e', - hasActiveSession: false, - sessionId: null, - capabilities: { available: ['build', 'fixture', 'chain'] }, - canSwitchContext: true, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const contextOptions = { - mockServer: { - enabled: true, - port: 18000, - }, - }; - - const result = await handleSetContext({ - context: 'e2e', - options: contextOptions, - }); - - expect(result.ok).toBe(true); - expect(mockSessionManager.setContext).toHaveBeenCalledWith( - 'e2e', - contextOptions, - ); - }); - - it('switches context from prod to e2e', async () => { - const mockSessionManager = createMockSessionManager({ - environmentMode: 'prod', - }); - vi.spyOn(mockSessionManager, 'setContext'); - // eslint-disable-next-line vitest/prefer-spy-on - mockSessionManager.getContextInfo = vi.fn().mockReturnValue({ - currentContext: 'e2e', - hasActiveSession: false, - sessionId: null, - capabilities: { available: ['build', 'fixture', 'chain', 'seeding'] }, - canSwitchContext: true, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleSetContext({ context: 'e2e' }); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.previousContext).toBe('prod'); - expect(result.result.newContext).toBe('e2e'); - expect(result.result.availableCapabilities).toStrictEqual([ - 'build', - 'fixture', - 'chain', - 'seeding', - ]); - } - }); - - it('classifies context switch blocked errors', async () => { - const mockSessionManager = createMockSessionManager({ - environmentMode: 'e2e', - }); - vi.spyOn(mockSessionManager, 'setContext').mockImplementation(() => { - throw new Error(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleSetContext({ context: 'prod' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); - expect(result.error.message).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); - } - }); - - it('classifies generic context errors', async () => { - const mockSessionManager = createMockSessionManager({ - environmentMode: 'e2e', - }); - vi.spyOn(mockSessionManager, 'setContext').mockImplementation(() => { - throw new Error('Unknown error'); - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleSetContext({ context: 'prod' }); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SET_CONTEXT_FAILED); - expect(result.error.message).toContain('Context switch failed'); - } - }); -}); - -describe('handleGetContext', () => { - beforeEach(() => { - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - it('returns context info when getContextInfo is available', async () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - environmentMode: 'e2e', - }); - // eslint-disable-next-line vitest/prefer-spy-on - mockSessionManager.getContextInfo = vi.fn().mockReturnValue({ - currentContext: 'e2e', - hasActiveSession: true, - sessionId: 'test-session-123', - capabilities: { available: ['build', 'fixture', 'chain'] }, - canSwitchContext: false, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const result = await handleGetContext({}); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.currentContext).toBe('e2e'); - expect(result.result.hasActiveSession).toBe(true); - expect(result.result.sessionId).toBe('test-session-123'); - expect(result.result.capabilities.available).toStrictEqual([ - 'build', - 'fixture', - 'chain', - ]); - expect(result.result.canSwitchContext).toBe(false); - } - }); -}); diff --git a/src/mcp-server/tools/context.ts b/src/mcp-server/tools/context.ts deleted file mode 100644 index cbf2b52..0000000 --- a/src/mcp-server/tools/context.ts +++ /dev/null @@ -1,94 +0,0 @@ -import { runTool } from './run-tool.js'; -import { getSessionManager } from '../session-manager.js'; -import { classifyContextError } from './error-classification.js'; -import type { McpResponse, HandlerOptions } from '../types'; - -export type SetContextInput = { - context: 'e2e' | 'prod'; - options?: Record; -}; -export type SetContextResult = { - previousContext: 'e2e' | 'prod'; - newContext: 'e2e' | 'prod'; - availableCapabilities: string[]; -}; - -/** - * Handle setting the workflow context (e2e or prod). - * - * @param input The context input containing the desired context mode - * @param options Optional handler options for the operation - * @returns Promise resolving to the context change result with previous and new context - */ -export async function handleSetContext( - input: SetContextInput, - options?: HandlerOptions, -): Promise> { - return runTool({ - toolName: 'mm_set_context', - input, - options, - requiresSession: false, - observationPolicy: 'none', - - /** - * Execute the context switch operation. - * - * @returns The result containing previous context, new context, and available capabilities - */ - execute: async () => { - const sessionManager = getSessionManager(); - const previousContext = sessionManager.getEnvironmentMode(); - sessionManager.setContext(input.context, input.options); - const info = sessionManager.getContextInfo(); - - return { - previousContext, - newContext: input.context, - availableCapabilities: info.capabilities.available, - }; - }, - - classifyError: classifyContextError, - }); -} - -export type GetContextResult = { - currentContext: 'e2e' | 'prod'; - hasActiveSession: boolean; - sessionId: string | null; - capabilities: { - available: string[]; - }; - canSwitchContext: boolean; -}; - -/** - * Handle getting the current workflow context and capabilities. - * - * @param input Empty input object for this operation - * @param options Optional handler options for the operation - * @returns Promise resolving to the current context, session state, and available capabilities - */ -export async function handleGetContext( - input: Record, - options?: HandlerOptions, -): Promise> { - return runTool, GetContextResult>({ - toolName: 'mm_get_context', - input, - options, - requiresSession: false, - observationPolicy: 'none', - - /** - * Execute the get context operation. - * - * @returns The result containing current context, session state, and capabilities - */ - execute: async () => { - const sessionManager = getSessionManager(); - return sessionManager.getContextInfo(); - }, - }); -} diff --git a/src/mcp-server/tools/definitions.test.ts b/src/mcp-server/tools/definitions.test.ts deleted file mode 100644 index 2d1634f..0000000 --- a/src/mcp-server/tools/definitions.test.ts +++ /dev/null @@ -1,759 +0,0 @@ -/* eslint-disable vitest/require-to-throw-message */ -import { describe, it, expect, beforeAll } from 'vitest'; - -import { - getToolDefinitions, - TOOL_PREFIX, - extractBaseName, - validateToolInput, - safeValidateToolInput, - getToolNames, - getPrefixedToolNames, - buildToolHandlersRecord, - getToolHandler, - hasToolHandler, -} from './definitions.js'; -import type { ToolDefinition } from './definitions.js'; - -describe('tool-definitions', () => { - describe('getToolDefinitions', () => { - it('creates tool definitions with mm_ prefix', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - expect(def.name.startsWith(`${TOOL_PREFIX}_`)).toBe(true); - } - }); - - it('creates 27 tool definitions', () => { - const definitions = getToolDefinitions(); - expect(definitions).toHaveLength(27); - }); - - it('includes all expected tools', () => { - const definitions = getToolDefinitions(); - const toolNames = definitions.map((d) => d.name); - - const expectedTools = [ - 'mm_build', - 'mm_launch', - 'mm_cleanup', - 'mm_get_state', - 'mm_navigate', - 'mm_wait_for_notification', - 'mm_switch_to_tab', - 'mm_close_tab', - 'mm_list_testids', - 'mm_accessibility_snapshot', - 'mm_describe_screen', - 'mm_screenshot', - 'mm_click', - 'mm_type', - 'mm_wait_for', - 'mm_knowledge_last', - 'mm_knowledge_search', - 'mm_knowledge_summarize', - 'mm_knowledge_sessions', - 'mm_seed_contract', - 'mm_seed_contracts', - 'mm_get_contract_address', - 'mm_list_contracts', - 'mm_run_steps', - 'mm_set_context', - 'mm_get_context', - ]; - - for (const expected of expectedTools) { - expect(toolNames).toContain(expected); - } - }); - - it('all tools have valid input schema', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - expect(def.inputSchema).toBeDefined(); - const hasObjectType = def.inputSchema.type === 'object'; - const hasAllOf = Array.isArray(def.inputSchema.allOf); - expect(hasObjectType || hasAllOf).toBe(true); - } - }); - - it('all tools have descriptions', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - expect(def.description).toBeDefined(); - expect(typeof def.description).toBe('string'); - expect(def.description.length).toBeGreaterThan(10); - } - }); - - describe('specific tool schemas', () => { - let definitions: ToolDefinition[]; - - beforeAll(() => { - definitions = getToolDefinitions(); - }); - - /** - * Find a tool definition by its name. - * - * @param name The tool name to search for - * @returns The matching tool definition or undefined if not found - */ - const findTool = (name: string): ToolDefinition | undefined => - definitions.find((d) => d.name === name); - - /** - * Schema object structure for testing. - */ - type SchemaObj = { - /** - * Object properties mapping - */ - properties?: Record; - /** - * Required property names - */ - required?: string[]; - /** - * Array of schemas to combine - */ - allOf?: SchemaObj[]; - }; - - /** - * Get all properties from a schema, including those in allOf. - * - * @param schema The schema object to extract properties from - * @returns Combined properties from schema and allOf items - */ - const getAllProperties = (schema: SchemaObj): Record => { - if (schema.properties) { - return schema.properties; - } - if (schema.allOf) { - return schema.allOf.reduce( - (acc, item) => ({ ...acc, ...getAllProperties(item) }), - {}, - ); - } - return {}; - }; - - /** - * Get all required properties from a schema, including those in allOf. - * - * @param schema The schema object to extract required properties from - * @returns Combined required property names from schema and allOf items - */ - const getAllRequired = (schema: SchemaObj): string[] => { - const required: string[] = []; - if (schema.required) { - required.push(...schema.required); - } - if (schema.allOf) { - for (const item of schema.allOf) { - required.push(...getAllRequired(item)); - } - } - return required; - }; - - it('mm_click has correct schema', () => { - const tool = findTool('mm_click'); - expect(tool).toBeDefined(); - - const props = getAllProperties(tool?.inputSchema as SchemaObj); - expect(props.a11yRef).toBeDefined(); - expect(props.testId).toBeDefined(); - expect(props.selector).toBeDefined(); - expect(props.timeoutMs).toBeDefined(); - }); - - it('mm_type has required text property', () => { - const tool = findTool('mm_type'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('text'); - }); - - it('mm_navigate has required screen property', () => { - const tool = findTool('mm_navigate'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('screen'); - - const props = getAllProperties( - tool?.inputSchema as SchemaObj, - ) as Record< - string, - { - /** - * - */ - enum?: string[]; - } - >; - expect(props.screen?.enum).toStrictEqual([ - 'home', - 'settings', - 'notification', - 'url', - ]); - }); - - it('mm_screenshot has required name property', () => { - const tool = findTool('mm_screenshot'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('name'); - }); - - it('mm_run_steps has required steps property', () => { - const tool = findTool('mm_run_steps'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('steps'); - - const props = getAllProperties( - tool?.inputSchema as SchemaObj, - ) as Record< - string, - { - /** - * The JSON schema type - */ - type?: string; - /** - * Array item schema definition - */ - items?: { - /** - * The item type - */ - type: string; - }; - } - >; - expect(props.steps?.type).toBe('array'); - }); - - it('mm_seed_contract has required contractName property', () => { - const tool = findTool('mm_seed_contract'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('contractName'); - - const props = getAllProperties( - tool?.inputSchema as SchemaObj, - ) as Record< - string, - { - /** - * - */ - enum?: string[]; - } - >; - expect(props.contractName?.enum).toContain('hst'); - expect(props.contractName?.enum).toContain('nfts'); - }); - - it('mm_launch has stateMode enum', () => { - const tool = findTool('mm_launch'); - expect(tool).toBeDefined(); - - const props = getAllProperties( - tool?.inputSchema as SchemaObj, - ) as Record< - string, - { - /** - * - */ - enum?: string[]; - } - >; - expect(props.stateMode?.enum).toStrictEqual([ - 'default', - 'onboarding', - 'custom', - ]); - }); - - it('mm_switch_to_tab has role enum', () => { - const tool = findTool('mm_switch_to_tab'); - expect(tool).toBeDefined(); - - const props = getAllProperties( - tool?.inputSchema as SchemaObj, - ) as Record< - string, - { - /** - * - */ - enum?: string[]; - } - >; - expect(props.role?.enum).toStrictEqual([ - 'extension', - 'notification', - 'dapp', - 'other', - ]); - }); - - it('mm_knowledge_search has required query property', () => { - const tool = findTool('mm_knowledge_search'); - expect(tool).toBeDefined(); - - const required = getAllRequired(tool?.inputSchema as SchemaObj); - expect(required).toContain('query'); - }); - }); - - it('uses mm_ prefix in descriptions', () => { - const definitions = getToolDefinitions(); - - const a11yTool = definitions.find( - (d) => d.name === 'mm_accessibility_snapshot', - ); - expect(a11yTool?.description).toContain('mm_click'); - expect(a11yTool?.description).toContain('mm_type'); - }); - - it('all schemas have additionalProperties set to false', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - const schema = def.inputSchema; - if (schema.type === 'object') { - expect(schema.additionalProperties).toBe(false); - } - } - }); - - it('all schemas have properties defined', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - const schema = def.inputSchema; - expect( - schema.properties ?? schema.allOf ?? schema.anyOf ?? schema.oneOf, - ).toBeDefined(); - } - }); - - it('all required properties are defined in properties', () => { - const definitions = getToolDefinitions(); - - for (const def of definitions) { - const schema = def.inputSchema; - if (Array.isArray(schema.required) && schema.properties) { - const props = schema.properties as Record; - for (const req of schema.required) { - expect(props[req as string]).toBeDefined(); - } - } - } - }); - - it('processes anyOf arrays in nested properties', () => { - const definitions = getToolDefinitions(); - - // Find tools with anyOf in properties (e.g., knowledge tools with scope) - // This exercises the anyOf handling in removeDefaultsFromRequired (lines 397-400) - let foundAnyOf = false; - for (const def of definitions) { - const schema = def.inputSchema; - if (schema.properties && typeof schema.properties === 'object') { - const props = schema.properties as Record; - for (const [, prop] of Object.entries(props)) { - if (prop && typeof prop === 'object') { - const propObj = prop as Record; - if ('anyOf' in propObj) { - foundAnyOf = true; - expect(Array.isArray(propObj.anyOf)).toBe(true); - // Verify anyOf items are properly processed - const anyOfArray = propObj.anyOf as unknown[]; - for (const item of anyOfArray) { - expect(item).toBeDefined(); - } - } - } - } - } - } - // Verify we found at least one tool with anyOf (knowledge tools) - expect(foundAnyOf).toBe(true); - }); - - it('processes nested object properties recursively', () => { - const definitions = getToolDefinitions(); - - // Verify that nested object properties are processed correctly - // This exercises the recursive property handling in removeDefaultsFromRequired (lines 418-421) - for (const def of definitions) { - const schema = def.inputSchema; - if (schema.properties && typeof schema.properties === 'object') { - const props = schema.properties as Record; - for (const [, value] of Object.entries(props)) { - if (value && typeof value === 'object') { - const propObj = value as Record; - // Nested objects should have proper structure - expect(propObj).toBeDefined(); - // If it has properties, they should be objects - if ('properties' in propObj && propObj.properties) { - expect(typeof propObj.properties).toBe('object'); - } - } - } - } - } - }); - - it('sets additionalProperties false on top-level object schemas', () => { - const definitions = getToolDefinitions(); - - // Verify that additionalProperties is set to false on top-level schemas - // This exercises the additionalProperties assignment in zodSchemaToJsonSchema (line 503) - for (const def of definitions) { - const schema = def.inputSchema; - // All tool schemas should be objects with additionalProperties: false - if (schema.type === 'object') { - expect(schema.additionalProperties).toBe(false); - } - } - }); - }); - - describe('extractBaseName', () => { - it('removes mm_ prefix from tool name', () => { - const result = extractBaseName('mm_click'); - - expect(result).toBe('click'); - }); - - it('returns original name when no prefix', () => { - const result = extractBaseName('click'); - - expect(result).toBe('click'); - }); - - it('handles multiple underscores correctly', () => { - const result = extractBaseName('mm_wait_for_notification'); - - expect(result).toBe('wait_for_notification'); - }); - - it('handles empty string', () => { - const result = extractBaseName(''); - - expect(result).toBe(''); - }); - - it('handles string with only prefix', () => { - const result = extractBaseName('mm_'); - - expect(result).toBe(''); - }); - - it('handles all tool names from getToolNames', () => { - const baseNames = getToolNames(); - - for (const baseName of baseNames) { - const prefixed = `${TOOL_PREFIX}_${baseName}`; - const extracted = extractBaseName(prefixed); - expect(extracted).toBe(baseName); - } - }); - }); - - describe('validateToolInput', () => { - it('parses valid input for known tool', () => { - const result = validateToolInput('mm_click', { testId: 'button' }); - - expect(result).toBeDefined(); - expect(result).toHaveProperty('testId', 'button'); - }); - - it('throws error for unknown tool', () => { - expect(() => { - validateToolInput('mm_unknown_tool', {}); - }).toThrowError('Unknown tool: mm_unknown_tool'); - }); - - it('throws error for invalid input schema', () => { - expect(() => { - validateToolInput('mm_type', { text: 123 }); - }).toThrowError(); - }); - - it('accepts input without prefix', () => { - const result = validateToolInput('click', { testId: 'button' }); - - expect(result).toBeDefined(); - expect(result).toHaveProperty('testId', 'button'); - }); - - it('parses input with multiple valid properties', () => { - const result = validateToolInput('mm_click', { - testId: 'button', - timeoutMs: 5000, - }); - - expect(result).toBeDefined(); - expect(result).toHaveProperty('testId', 'button'); - expect(result).toHaveProperty('timeoutMs', 5000); - }); - }); - - describe('safeValidateToolInput', () => { - it('returns success with data for valid input', () => { - const result = safeValidateToolInput('mm_click', { testId: 'button' }); - - expect(result.success).toBe(true); - expect(result).toHaveProperty('data'); - if (result.success) { - expect(result.data).toHaveProperty('testId', 'button'); - } - }); - - it('returns failure for unknown tool', () => { - const result = safeValidateToolInput('mm_unknown_tool', {}); - - expect(result.success).toBe(false); - expect(result).toHaveProperty('error'); - if (!result.success) { - expect(result.error).toContain('Unknown tool'); - } - }); - - it('returns failure for invalid input', () => { - const result = safeValidateToolInput('mm_type', { text: 123 }); - - expect(result.success).toBe(false); - expect(result).toHaveProperty('error'); - }); - - it('accepts input without prefix', () => { - const result = safeValidateToolInput('click', { testId: 'button' }); - - expect(result.success).toBe(true); - if (result.success) { - expect(result.data).toHaveProperty('testId', 'button'); - } - }); - - it('returns success with multiple valid properties', () => { - const result = safeValidateToolInput('mm_click', { - testId: 'button', - timeoutMs: 5000, - }); - - expect(result.success).toBe(true); - if (result.success) { - expect(result.data).toHaveProperty('testId', 'button'); - expect(result.data).toHaveProperty('timeoutMs', 5000); - } - }); - - it('includes error message with path for validation errors', () => { - const result = safeValidateToolInput('mm_type', { text: 123 }); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error).toMatch(/text/u); - } - }); - }); - - describe('getToolNames', () => { - it('returns array of tool base names', () => { - const names = getToolNames(); - - expect(Array.isArray(names)).toBe(true); - expect(names.length).toBeGreaterThan(0); - }); - - it('includes expected tool names without prefix', () => { - const names = getToolNames(); - - expect(names).toContain('click'); - expect(names).toContain('type'); - expect(names).toContain('launch'); - expect(names).toContain('cleanup'); - }); - - it('does not include mm_ prefix in names', () => { - const names = getToolNames(); - - for (const name of names) { - expect(name).not.toMatch(/^mm_/u); - } - }); - - it('returns 27 tool names', () => { - const names = getToolNames(); - - expect(names).toHaveLength(27); - }); - - it('all names are strings', () => { - const names = getToolNames(); - - for (const name of names) { - expect(typeof name).toBe('string'); - expect(name.length).toBeGreaterThan(0); - } - }); - }); - - describe('getPrefixedToolNames', () => { - it('returns array of prefixed tool names', () => { - const names = getPrefixedToolNames(); - - expect(Array.isArray(names)).toBe(true); - expect(names.length).toBeGreaterThan(0); - }); - - it('includes mm_ prefix in all names', () => { - const names = getPrefixedToolNames(); - - for (const name of names) { - expect(name).toMatch(/^mm_/u); - } - }); - - it('includes expected prefixed tool names', () => { - const names = getPrefixedToolNames(); - - expect(names).toContain('mm_click'); - expect(names).toContain('mm_type'); - expect(names).toContain('mm_launch'); - expect(names).toContain('mm_cleanup'); - }); - - it('has same count as getToolNames', () => { - const baseNames = getToolNames(); - const prefixedNames = getPrefixedToolNames(); - - expect(prefixedNames).toHaveLength(baseNames.length); - }); - }); - - describe('buildToolHandlersRecord', () => { - it('returns record mapping prefixed names to handlers', () => { - const handlers = buildToolHandlersRecord(); - - expect(typeof handlers).toBe('object'); - expect(handlers).not.toBeNull(); - }); - - it('includes all prefixed tool names as keys', () => { - const handlers = buildToolHandlersRecord(); - const prefixedNames = getPrefixedToolNames(); - - for (const name of prefixedNames) { - expect(handlers).toHaveProperty(name); - } - }); - - it('all values are functions', () => { - const handlers = buildToolHandlersRecord(); - - for (const [, handler] of Object.entries(handlers)) { - expect(typeof handler).toBe('function'); - } - }); - - it('has same count as getPrefixedToolNames', () => { - const handlers = buildToolHandlersRecord(); - const prefixedNames = getPrefixedToolNames(); - - expect(Object.keys(handlers)).toHaveLength(prefixedNames.length); - }); - - it('does not include base names without prefix', () => { - const handlers = buildToolHandlersRecord(); - const baseNames = getToolNames(); - - for (const baseName of baseNames) { - expect(handlers).not.toHaveProperty(baseName); - } - }); - }); - - describe('getToolHandler', () => { - it('returns handler for prefixed tool name', () => { - const handler = getToolHandler('mm_click'); - - expect(handler).toBeDefined(); - expect(typeof handler).toBe('function'); - }); - - it('returns handler for base tool name', () => { - const handler = getToolHandler('click'); - - expect(handler).toBeDefined(); - expect(typeof handler).toBe('function'); - }); - - it('returns undefined for unknown tool', () => { - const handler = getToolHandler('mm_unknown_tool'); - - expect(handler).toBeUndefined(); - }); - - it('returns same handler for prefixed and base names', () => { - const prefixedHandler = getToolHandler('mm_click'); - const baseHandler = getToolHandler('click'); - - expect(prefixedHandler).toBe(baseHandler); - }); - }); - - describe('hasToolHandler', () => { - it('returns true for existing prefixed tool', () => { - const exists = hasToolHandler('mm_click'); - - expect(exists).toBe(true); - }); - - it('returns true for existing base tool name', () => { - const exists = hasToolHandler('click'); - - expect(exists).toBe(true); - }); - - it('returns false for unknown tool', () => { - const exists = hasToolHandler('mm_unknown_tool'); - - expect(exists).toBe(false); - }); - - it('returns true for all prefixed tool names', () => { - const prefixedNames = getPrefixedToolNames(); - - for (const name of prefixedNames) { - expect(hasToolHandler(name)).toBe(true); - } - }); - - it('returns true for all base tool names', () => { - const baseNames = getToolNames(); - - for (const name of baseNames) { - expect(hasToolHandler(name)).toBe(true); - } - }); - }); -}); diff --git a/src/mcp-server/tools/definitions.ts b/src/mcp-server/tools/definitions.ts deleted file mode 100644 index f6e7fdc..0000000 --- a/src/mcp-server/tools/definitions.ts +++ /dev/null @@ -1,638 +0,0 @@ -import type { ZodType } from 'zod'; - -import { - buildInputSchema, - launchInputSchema, - cleanupInputSchema, - getStateInputSchema, - navigateInputSchema, - waitForNotificationInputSchema, - switchToTabInputSchema, - closeTabInputSchema, - listTestIdsInputSchema, - accessibilitySnapshotInputSchema, - describeScreenInputSchema, - screenshotInputSchema, - clickInputSchema, - typeInputSchema, - waitForInputSchema, - knowledgeLastInputSchema, - knowledgeSearchInputSchema, - knowledgeSummarizeInputSchema, - knowledgeSessionsInputSchema, - seedContractInputSchema, - seedContractsInputSchema, - getContractAddressInputSchema, - listDeployedContractsInputSchema, - runStepsInputSchema, - setContextInputSchema, - getContextInputSchema, - clipboardInputSchema, -} from '../schemas.js'; -import { getSessionManager } from '../session-manager.js'; -import { handleRunSteps } from './batch.js'; -import type { ToolHandler } from './batch.js'; -import type { BuildToolOptions } from './build.js'; -import { handleBuild } from './build.js'; -import { handleCleanup } from './cleanup.js'; -import { handleClipboard } from './clipboard.js'; -import { handleSetContext, handleGetContext } from './context.js'; -import { - handleListTestIds, - handleAccessibilitySnapshot, - handleDescribeScreen, -} from './discovery-tools.js'; -import { handleClick, handleType, handleWaitFor } from './interaction.js'; -import { - handleKnowledgeLast, - handleKnowledgeSearch, - handleKnowledgeSummarize, - handleKnowledgeSessions, -} from './knowledge.js'; -import { handleLaunch } from './launch.js'; -import { - handleNavigate, - handleWaitForNotification, - handleSwitchToTab, - handleCloseTab, -} from './navigation.js'; -import { handleScreenshot } from './screenshot.js'; -import { - handleSeedContract, - handleSeedContracts, - handleGetContractAddress, - handleListDeployedContracts, -} from './seeding.js'; -import type { SeedingToolOptions } from './seeding.js'; -import { handleGetState } from './state.js'; -import type { StateToolOptions } from './state.js'; -import type { - SeedContractInput, - SeedContractsInput, - GetContractAddressInput, - ListDeployedContractsInput, -} from '../types'; - -export const TOOL_PREFIX = 'mm'; - -export type ToolDefinition = { - name: string; - description: string; - inputSchema: Record; -}; - -type ZodSchema = ZodType & { toJSONSchema(): Record }; - -type ToolEntry = { - schema: ZodSchema; - description: string; - handler: ToolHandler; -}; - -/** - * Create a handler for the build tool that injects build capability. - * - * @returns A tool handler function for building the extension - */ -function createBuildHandler(): ToolHandler { - return async (input, options) => { - const sessionManager = getSessionManager(); - const buildOptions: BuildToolOptions = { - ...options, - buildCapability: sessionManager.getBuildCapability?.(), - }; - return handleBuild(input, buildOptions); - }; -} - -/** - * Create a handler for the state tool that injects state snapshot capability. - * - * @returns A tool handler function for getting extension state - */ -function createStateHandler(): ToolHandler { - return async (_, options) => { - const sessionManager = getSessionManager(); - const stateOptions: StateToolOptions = { - ...options, - stateSnapshotCapability: sessionManager.getStateSnapshotCapability?.(), - }; - return handleGetState(stateOptions); - }; -} - -/** - * Create a handler for the seed contract tool that injects seeding capability. - * - * @returns A tool handler function for deploying a single contract - */ -function createSeedContractHandler(): ToolHandler { - return async (input, options) => { - const sessionManager = getSessionManager(); - const seedingOptions: SeedingToolOptions = { - ...options, - seedingCapability: sessionManager.getContractSeedingCapability?.(), - }; - return handleSeedContract(input as SeedContractInput, seedingOptions); - }; -} - -/** - * Create a handler for the seed contracts tool that injects seeding capability. - * - * @returns A tool handler function for deploying multiple contracts - */ -function createSeedContractsHandler(): ToolHandler { - return async (input, options) => { - const sessionManager = getSessionManager(); - const seedingOptions: SeedingToolOptions = { - ...options, - seedingCapability: sessionManager.getContractSeedingCapability?.(), - }; - return handleSeedContracts(input as SeedContractsInput, seedingOptions); - }; -} - -/** - * Create a handler for the get contract address tool that injects seeding capability. - * - * @returns A tool handler function for retrieving a deployed contract address - */ -function createGetContractAddressHandler(): ToolHandler { - return async (input, options) => { - const sessionManager = getSessionManager(); - const seedingOptions: SeedingToolOptions = { - ...options, - seedingCapability: sessionManager.getContractSeedingCapability?.(), - }; - return handleGetContractAddress( - input as GetContractAddressInput, - seedingOptions, - ); - }; -} - -/** - * Create a handler for the list contracts tool that injects seeding capability. - * - * @returns A tool handler function for listing deployed contracts - */ -function createListDeployedContractsHandler(): ToolHandler { - return async (input, options) => { - const sessionManager = getSessionManager(); - const seedingOptions: SeedingToolOptions = { - ...options, - seedingCapability: sessionManager.getContractSeedingCapability?.(), - }; - return handleListDeployedContracts( - input as ListDeployedContractsInput, - seedingOptions, - ); - }; -} - -const tools: Record = { - build: { - schema: buildInputSchema, - description: `Build the extension using yarn build:test. Call before ${TOOL_PREFIX}_launch if extension is not built.`, - handler: createBuildHandler(), - }, - launch: { - schema: launchInputSchema, - description: - 'Launch extension in a headed Chrome browser with Playwright. Returns session info and initial state.', - handler: handleLaunch as ToolHandler, - }, - cleanup: { - schema: cleanupInputSchema, - description: - 'Stop the browser, Anvil, and all services. Always call when done.', - handler: handleCleanup as ToolHandler, - }, - get_state: { - schema: getStateInputSchema, - description: - 'Get current extension state including screen, URL, balance, network, and account address.', - handler: createStateHandler(), - }, - navigate: { - schema: navigateInputSchema, - description: 'Navigate to a specific screen in the extension.', - handler: handleNavigate as ToolHandler, - }, - wait_for_notification: { - schema: waitForNotificationInputSchema, - description: - 'Wait for notification popup to appear (e.g., after dapp interaction). Sets the notification page as the active page for subsequent interactions.', - handler: handleWaitForNotification as ToolHandler, - }, - switch_to_tab: { - schema: switchToTabInputSchema, - description: `Switch the active page to a different tracked tab. Use this to direct ${TOOL_PREFIX}_click, ${TOOL_PREFIX}_type, and other interaction tools to a specific page.`, - handler: handleSwitchToTab as ToolHandler, - }, - close_tab: { - schema: closeTabInputSchema, - description: - 'Close a specific tab by role or URL. Cannot close the extension home page. If closing the active tab, automatically switches to extension home.', - handler: handleCloseTab as ToolHandler, - }, - list_testids: { - schema: listTestIdsInputSchema, - description: - 'List all visible data-testid attributes on the current page. Use to discover available interaction targets.', - handler: handleListTestIds as ToolHandler, - }, - accessibility_snapshot: { - schema: accessibilitySnapshotInputSchema, - description: `Get trimmed accessibility tree with deterministic refs (e1, e2, ...). Use refs with ${TOOL_PREFIX}_click/${TOOL_PREFIX}_type.`, - handler: handleAccessibilitySnapshot as ToolHandler, - }, - describe_screen: { - schema: describeScreenInputSchema, - description: - 'Get comprehensive screen state: extension state + testIds + accessibility snapshot. Optional screenshot.', - handler: handleDescribeScreen as ToolHandler, - }, - screenshot: { - schema: screenshotInputSchema, - description: 'Take a screenshot and save to test-artifacts/screenshots/', - handler: handleScreenshot as ToolHandler, - }, - click: { - schema: clickInputSchema, - description: - 'Click an element. Specify exactly one of: a11yRef, testId, or selector.', - handler: handleClick as ToolHandler, - }, - type: { - schema: typeInputSchema, - description: - 'Type text into an element. Specify exactly one of: a11yRef, testId, or selector.', - handler: handleType as ToolHandler, - }, - wait_for: { - schema: waitForInputSchema, - description: - 'Wait for an element to become visible. Specify exactly one of: a11yRef, testId, or selector.', - handler: handleWaitFor as ToolHandler, - }, - knowledge_last: { - schema: knowledgeLastInputSchema, - description: - 'Get the last N step records from the knowledge store for the current session.', - handler: handleKnowledgeLast as ToolHandler, - }, - knowledge_search: { - schema: knowledgeSearchInputSchema, - description: - 'Search step records by tool name, screen, testId, or accessibility names. Default searches all sessions.', - handler: handleKnowledgeSearch as ToolHandler, - }, - knowledge_summarize: { - schema: knowledgeSummarizeInputSchema, - description: 'Generate a recipe-like summary of steps taken in a session.', - handler: handleKnowledgeSummarize as ToolHandler, - }, - knowledge_sessions: { - schema: knowledgeSessionsInputSchema, - description: - 'List recent sessions with metadata for cross-session knowledge retrieval.', - handler: handleKnowledgeSessions as ToolHandler, - }, - seed_contract: { - schema: seedContractInputSchema, - description: - 'Deploy a smart contract to the local Anvil node. Available: hst (ERC20 TST token), nfts (ERC721), erc1155, piggybank, failing (reverts), multisig, entrypoint (ERC-4337), simpleAccountFactory, verifyingPaymaster.', - handler: createSeedContractHandler(), - }, - seed_contracts: { - schema: seedContractsInputSchema, - description: 'Deploy multiple smart contracts in sequence.', - handler: createSeedContractsHandler(), - }, - get_contract_address: { - schema: getContractAddressInputSchema, - description: 'Get the deployed address of a smart contract.', - handler: createGetContractAddressHandler(), - }, - list_contracts: { - schema: listDeployedContractsInputSchema, - description: 'List all smart contracts deployed in this session.', - handler: createListDeployedContractsHandler(), - }, - run_steps: { - schema: runStepsInputSchema, - description: - 'Execute multiple tools in sequence. Reduces round trips for multi-step flows.', - handler: handleRunSteps as ToolHandler, - }, - set_context: { - schema: setContextInputSchema, - description: - 'Switch workflow context (e2e or prod). Cannot switch during active session.', - handler: handleSetContext as ToolHandler, - }, - get_context: { - schema: getContextInputSchema, - description: - 'Get current context, available capabilities, and whether context can be switched.', - handler: handleGetContext as ToolHandler, - }, - clipboard: { - schema: clipboardInputSchema, - description: - "Write text to or read text from the browser clipboard. Use action='write' with text parameter to write, or action='read' to read current clipboard content. Useful for pasting SRP or other data into components that have paste handlers.", - handler: handleClipboard as ToolHandler, - }, -}; - -/** - * Zod v4's toJSONSchema() marks properties with defaults as required. - * This is incorrect for MCP tool input schemas where LLM clients shouldn't - * be required to provide values that have defaults. This function recursively - * removes those properties from the required array. - * - * @param schema The JSON schema to process - * @returns The modified schema with defaults removed from required array - */ -function removeDefaultsFromRequired( - schema: Record, -): Record { - const result = { ...schema }; - - if (Array.isArray(result.allOf)) { - result.allOf = result.allOf.map((item: Record) => - removeDefaultsFromRequired(item), - ); - } - - if (Array.isArray(result.anyOf)) { - result.anyOf = result.anyOf.map((item: Record) => - removeDefaultsFromRequired(item), - ); - } - - if (Array.isArray(result.oneOf)) { - result.oneOf = result.oneOf.map((item: Record) => - removeDefaultsFromRequired(item), - ); - } - - if ( - result.properties && - typeof result.properties === 'object' && - result.properties !== null - ) { - const newProperties: Record = {}; - for (const [key, value] of Object.entries( - result.properties as Record, - )) { - if (value && typeof value === 'object') { - newProperties[key] = removeDefaultsFromRequired( - value as Record, - ); - } else { - newProperties[key] = value; - } - } - result.properties = newProperties; - } - - if ( - Array.isArray(result.required) && - result.properties && - typeof result.properties === 'object' - ) { - const properties = result.properties as Record< - string, - Record - >; - result.required = result.required.filter((propName: string) => { - const prop = properties[propName]; - return prop && !('default' in prop); - }); - - if ((result.required as string[]).length === 0) { - delete result.required; - } - } - - return result; -} - -/** - * MCP protocol doesn't support allOf/oneOf/anyOf at the top level of input schemas. - * This flattens allOf into a single merged object schema. - * - * @param schema The JSON schema to flatten - * @returns The flattened schema with allOf merged into properties - */ -function flattenTopLevelAllOf( - schema: Record, -): Record { - if (!Array.isArray(schema.allOf)) { - return schema; - } - - const mergedProperties: Record = {}; - const mergedRequired: string[] = []; - - for (const subSchema of schema.allOf as Record[]) { - if (subSchema.properties && typeof subSchema.properties === 'object') { - Object.assign(mergedProperties, subSchema.properties); - } - if (Array.isArray(subSchema.required)) { - mergedRequired.push(...subSchema.required); - } - } - - const result: Record = { - type: 'object', - properties: mergedProperties, - additionalProperties: false, - }; - - if (mergedRequired.length > 0) { - result.required = [...new Set(mergedRequired)]; - } - - return result; -} - -/** - * Convert a Zod schema to a JSON schema suitable for MCP tool definitions. - * - * @param schema The Zod schema to convert - * @returns The converted JSON schema with defaults removed and allOf flattened - */ -function zodSchemaToJsonSchema(schema: ZodSchema): Record { - const jsonSchema = schema.toJSONSchema(); - const { $schema: _, ...rest } = jsonSchema; - - const flattened = flattenTopLevelAllOf(rest); - - if (flattened.type === 'object' && !('additionalProperties' in flattened)) { - flattened.additionalProperties = false; - } - - return removeDefaultsFromRequired(flattened); -} - -/** - * Get all tool definitions with their schemas and descriptions. - * - * @returns Array of tool definitions for all available MCP tools - */ -export function getToolDefinitions(): ToolDefinition[] { - return Object.entries(tools).map(([baseName, tool]) => ({ - name: `${TOOL_PREFIX}_${baseName}`, - description: tool.description, - inputSchema: zodSchemaToJsonSchema(tool.schema), - })); -} - -/** - * Get the handler function for a specific tool by name. - * - * @param name The tool name (with or without mm_ prefix) - * @returns The tool handler function or undefined if tool not found - */ -export function getToolHandler(name: string): ToolHandler | undefined { - const prefixedMatch = Object.entries(tools).find( - ([baseName]) => `${TOOL_PREFIX}_${baseName}` === name, - ); - if (prefixedMatch) { - return prefixedMatch[1].handler; - } - - const tool = tools[name]; - return tool?.handler; -} - -/** - * Check if a tool handler exists for the given tool name. - * - * @param name The tool name to check - * @returns True if a handler exists for the tool, false otherwise - */ -export function hasToolHandler(name: string): boolean { - return getToolHandler(name) !== undefined; -} - -/** - * Extract the base name from a tool name by removing the mm_ prefix. - * - * @param toolName The tool name (with or without mm_ prefix) - * @returns The base name without the prefix - */ -export function extractBaseName(toolName: string): string { - const prefixWithUnderscore = `${TOOL_PREFIX}_`; - if (toolName.startsWith(prefixWithUnderscore)) { - return toolName.slice(prefixWithUnderscore.length); - } - return toolName; -} - -/** - * Validate tool input against the tool's schema and return parsed data. - * - * @param toolName The tool name to validate input for - * @param input The input data to validate - * @returns The validated and parsed input data - */ -export function validateToolInput( - toolName: string, - input: unknown, -): Type { - const baseName = extractBaseName(toolName); - const tool = tools[baseName]; - - if (!tool) { - throw new Error(`Unknown tool: ${toolName}`); - } - - return tool.schema.parse(input ?? {}) as Type; -} - -/** - * Safely validate tool input without throwing errors. - * - * @param toolName The tool name to validate input for - * @param input The input data to validate - * @returns Object with success flag and either parsed data or error message - */ -export function safeValidateToolInput( - toolName: string, - input: unknown, -): - | { - /** - * Indicates validation succeeded - */ - success: true; - /** - * The validated and parsed input data - */ - data: unknown; - } - | { - /** - * Indicates validation failed - */ - success: false; - /** - * Error message describing validation failure - */ - error: string; - } { - const baseName = extractBaseName(toolName); - const tool = tools[baseName]; - - if (!tool) { - return { success: false, error: `Unknown tool: ${toolName}` }; - } - - const result = tool.schema.safeParse(input ?? {}); - if (!result.success) { - const errorMessage = result.error.issues - .map((issue) => `${issue.path.join('.')}: ${issue.message}`) - .join('; '); - return { success: false, error: errorMessage }; - } - - return { success: true, data: result.data }; -} - -/** - * Get all available tool base names (without mm_ prefix). - * - * @returns Array of tool base names - */ -export function getToolNames(): string[] { - return Object.keys(tools); -} - -/** - * Get all available tool names with mm_ prefix. - * - * @returns Array of prefixed tool names - */ -export function getPrefixedToolNames(): string[] { - return Object.keys(tools).map((name) => `${TOOL_PREFIX}_${name}`); -} - -/** - * Build a record mapping prefixed tool names to their handler functions. - * - * @returns Record of tool name to handler function mappings - */ -export function buildToolHandlersRecord(): Record { - const handlers: Record = {}; - for (const [baseName, tool] of Object.entries(tools)) { - handlers[`${TOOL_PREFIX}_${baseName}`] = tool.handler; - } - return handlers; -} - -export type { ToolEntry }; diff --git a/src/mcp-server/tools/discovery-tools.ts b/src/mcp-server/tools/discovery-tools.ts deleted file mode 100644 index be5dae6..0000000 --- a/src/mcp-server/tools/discovery-tools.ts +++ /dev/null @@ -1,223 +0,0 @@ -import { - DEFAULT_TESTID_LIMIT, - OBSERVATION_TESTID_LIMIT, -} from '../constants.js'; -import { collectTestIds, collectTrimmedA11ySnapshot } from '../discovery.js'; -import { - knowledgeStore, - createDefaultObservation, -} from '../knowledge-store.js'; -import { getSessionManager } from '../session-manager.js'; -import { classifyDiscoveryError } from './error-classification.js'; -import { runTool } from './run-tool.js'; -import type { - ListTestIdsInput, - ListTestIdsResult, - AccessibilitySnapshotInput, - AccessibilitySnapshotResult, - DescribeScreenInput, - DescribeScreenResult, - McpResponse, - PriorKnowledgeContext, - HandlerOptions, -} from '../types'; - -/** - * Handle listing all visible data-testid attributes on the current page. - * - * @param input The input containing optional limit for number of items - * @param options Optional handler options for the operation - * @returns Promise resolving to list of visible test IDs with metadata - */ -export async function handleListTestIds( - input: ListTestIdsInput, - options?: HandlerOptions, -): Promise> { - const limit = input.limit ?? DEFAULT_TESTID_LIMIT; - - return runTool({ - toolName: 'mm_list_testids', - input, - options, - observationPolicy: 'custom', - - /** - * Execute the list test IDs operation. - * - * @param context The workflow context containing the page - * @returns The result with test ID items and observation data - */ - execute: async (context) => { - const items = await collectTestIds(context.page, limit); - const state = await getSessionManager().getExtensionState(); - const { nodes, refMap } = await collectTrimmedA11ySnapshot(context.page); - - getSessionManager().setRefMap(refMap); - - return { - result: { items }, - observation: createDefaultObservation(state, items, nodes), - }; - }, - - classifyError: classifyDiscoveryError, - - /** - * Sanitizes input for recording by extracting only the limit parameter. - * - * @returns Sanitized input with limit value - */ - sanitizeInputForRecording: () => ({ limit }), - }); -} - -/** - * Handle getting a trimmed accessibility tree with deterministic refs. - * - * @param input The input containing optional root selector for scoping - * @param options Optional handler options for the operation - * @returns Promise resolving to accessibility nodes with deterministic refs - */ -export async function handleAccessibilitySnapshot( - input: AccessibilitySnapshotInput, - options?: HandlerOptions, -): Promise> { - return runTool({ - toolName: 'mm_accessibility_snapshot', - input, - options, - observationPolicy: 'custom', - - /** - * Execute the accessibility snapshot operation. - * - * @param context The workflow context containing the page - * @returns The result with accessibility nodes and observation data - */ - execute: async (context) => { - const { nodes, refMap } = await collectTrimmedA11ySnapshot( - context.page, - input.rootSelector, - ); - - getSessionManager().setRefMap(refMap); - - const state = await getSessionManager().getExtensionState(); - const testIds = await collectTestIds( - context.page, - OBSERVATION_TESTID_LIMIT, - ); - - return { - result: { nodes }, - observation: createDefaultObservation(state, testIds, nodes), - }; - }, - - classifyError: classifyDiscoveryError, - - /** - * Sanitizes input for recording by extracting only the root selector. - * - * @returns Sanitized input with rootSelector value - */ - sanitizeInputForRecording: () => ({ rootSelector: input.rootSelector }), - }); -} - -/** - * Handle getting comprehensive screen state with state, testIds, a11y, and optional screenshot. - * - * @param input The input containing screenshot options and selector - * @param options Optional handler options for the operation - * @returns Promise resolving to comprehensive screen description with prior knowledge - */ -export async function handleDescribeScreen( - input: DescribeScreenInput, - options?: HandlerOptions, -): Promise> { - return runTool({ - toolName: 'mm_describe_screen', - input, - options, - observationPolicy: 'custom', - - /** - * Execute the describe screen operation. - * - * @param context The workflow context containing the page - * @returns The result with state, testIds, a11y, screenshot, and prior knowledge - */ - execute: async (context) => { - const sessionManager = getSessionManager(); - const { page } = context; - - const state = await sessionManager.getExtensionState(); - const testIds = await collectTestIds(page, DEFAULT_TESTID_LIMIT); - const { nodes, refMap } = await collectTrimmedA11ySnapshot(page); - - sessionManager.setRefMap(refMap); - - let screenshot: DescribeScreenResult['screenshot'] = null; - - if (input.includeScreenshot) { - const screenshotName = input.screenshotName ?? 'describe-screen'; - const result = await sessionManager.screenshot({ - name: screenshotName, - fullPage: true, - }); - - screenshot = { - path: result.path, - width: result.width, - height: result.height, - base64: input.includeScreenshotBase64 ? result.base64 : null, - }; - } - - const sessionMetadata = sessionManager.getSessionMetadata(); - const priorKnowledgeContext: PriorKnowledgeContext = { - currentScreen: state.currentScreen, - currentUrl: state.currentUrl, - visibleTestIds: testIds, - a11yNodes: nodes, - currentSessionFlowTags: sessionMetadata?.flowTags, - }; - - const priorKnowledge = await knowledgeStore.generatePriorKnowledge( - priorKnowledgeContext, - context.sessionId, - ); - - const observation = createDefaultObservation( - state, - testIds, - nodes, - priorKnowledge, - ); - - return { - result: { - state, - testIds: { items: testIds }, - a11y: { nodes }, - screenshot, - priorKnowledge, - }, - observation, - }; - }, - - classifyError: classifyDiscoveryError, - - /** - * Sanitizes input for recording by extracting screenshot-related parameters. - * - * @returns Sanitized input with screenshot options - */ - sanitizeInputForRecording: () => ({ - includeScreenshot: input.includeScreenshot, - screenshotName: input.screenshotName, - }), - }); -} diff --git a/src/mcp-server/tools/helpers.test.ts b/src/mcp-server/tools/helpers.test.ts deleted file mode 100644 index 64e463d..0000000 --- a/src/mcp-server/tools/helpers.test.ts +++ /dev/null @@ -1,745 +0,0 @@ -/** - * Unit tests for tool helper functions. - * - * Tests session validation, observation collection, error handling, and step recording. - */ - -import type { Page } from '@playwright/test'; -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { - requireActiveSession, - collectObservation, - withActiveSession, - recordToolStep, - collectObservationAndRecord, - handleToolError, -} from './helpers'; -import type { ObservationLevel, RecordStepParams } from './helpers'; -import * as discoveryModule from '../discovery.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types'; - -describe('helpers', () => { - let mockSessionManager: ReturnType; - - beforeEach(() => { - mockSessionManager = createMockSessionManager(); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('requireActiveSession', () => { - describe('when no active session exists', () => { - it('returns error response with NO_ACTIVE_SESSION code', () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const startTime = Date.now(); - - // Act - const result = requireActiveSession(startTime); - - // Assert - expect(result).toBeDefined(); - expect(result?.ok).toBe(false); - if (result && !result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - expect(result.error.message).toBe( - 'No active session. Call launch first.', - ); - } - }); - - it('includes timestamp in error response', () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const startTime = Date.now(); - - // Act - const result = requireActiveSession(startTime); - - // Assert - if (result && !result.ok) { - expect(result.meta.timestamp).toBeDefined(); - } - }); - }); - - describe('when active session exists', () => { - it('returns undefined', () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - const startTime = Date.now(); - - // Act - const result = requireActiveSession(startTime); - - // Assert - expect(result).toBeUndefined(); - }); - }); - }); - - describe('collectObservation', () => { - describe('when level is "none"', () => { - it('returns default observation with empty arrays', async () => { - // Arrange - const mockPage = {} as Page; - const level: ObservationLevel = 'none'; - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - const result = await collectObservation(mockPage, level); - - // Assert - expect(result.testIds).toStrictEqual([]); - expect(result.a11y.nodes).toStrictEqual([]); - }); - - it('does not query extension state', async () => { - // Arrange - const mockPage = {} as Page; - const level: ObservationLevel = 'none'; - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - await collectObservation(mockPage, level); - - // Assert - expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled(); - }); - }); - - describe('when level is "minimal"', () => { - it('returns observation with state only', async () => { - // Arrange - const mockPage = {} as Page; - const level: ObservationLevel = 'minimal'; - const mockState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home' as const, - accountAddress: '0x123', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }; - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue( - mockState, - ); - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: mockState, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - const result = await collectObservation(mockPage, level); - - // Assert - expect(result.state).toStrictEqual(mockState); - expect(result.testIds).toStrictEqual([]); - expect(result.a11y.nodes).toStrictEqual([]); - }); - - it('uses preset state when provided', async () => { - // Arrange - const mockPage = {} as Page; - const level: ObservationLevel = 'minimal'; - const presetState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-456/home.html', - extensionId: 'ext-456', - isUnlocked: false, - currentScreen: 'unlock' as const, - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: presetState, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - const result = await collectObservation(mockPage, level, presetState); - - // Assert - expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled(); - expect(result.state).toStrictEqual(presetState); - }); - }); - - describe('when level is "full"', () => { - it('collects state, testIds, and a11y tree', async () => { - // Arrange - const mockPage = { locator: vi.fn() } as unknown as Page; - const level: ObservationLevel = 'full'; - const mockState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home' as const, - accountAddress: '0x123', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }; - const mockTestIds = [ - { testId: 'send-button', tag: 'button', text: 'Send', visible: true }, - ]; - const mockA11yNodes = [ - { ref: 'e1', role: 'button', name: 'Send', path: [] }, - ]; - const mockRefMap = new Map([['e1', '[data-testid="send-button"]']]); - - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue( - mockState, - ); - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue( - mockTestIds, - ); - vi.spyOn( - discoveryModule, - 'collectTrimmedA11ySnapshot', - ).mockResolvedValue({ - nodes: mockA11yNodes, - refMap: mockRefMap, - }); - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: mockState, - testIds: mockTestIds, - a11y: { nodes: mockA11yNodes }, - }); - - // Act - const result = await collectObservation(mockPage, level); - - // Assert - expect(result.state).toStrictEqual(mockState); - expect(result.testIds).toStrictEqual(mockTestIds); - expect(result.a11y.nodes).toStrictEqual(mockA11yNodes); - expect(mockSessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); - }); - - it('returns default observation when page is undefined', async () => { - // Arrange - const level: ObservationLevel = 'full'; - const mockState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home' as const, - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue( - mockState, - ); - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: mockState, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - const result = await collectObservation(undefined, level); - - // Assert - expect(result.testIds).toStrictEqual([]); - expect(result.a11y.nodes).toStrictEqual([]); - }); - - it('returns default observation when discovery throws error', async () => { - // Arrange - const mockPage = { locator: vi.fn() } as unknown as Page; - const level: ObservationLevel = 'full'; - const mockState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home' as const, - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue( - mockState, - ); - vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue( - new Error('Page closed'), - ); - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue({ - state: mockState, - testIds: [], - a11y: { nodes: [] }, - }); - - // Act - const result = await collectObservation(mockPage, level); - - // Assert - expect(result.testIds).toStrictEqual([]); - expect(result.a11y.nodes).toStrictEqual([]); - }); - }); - }); - - describe('withActiveSession', () => { - describe('when no active session exists', () => { - it('returns error response without calling handler', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const handler = vi.fn(); - const wrappedHandler = withActiveSession(handler); - - // Act - const result = await wrappedHandler({ test: 'input' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - expect(handler).not.toHaveBeenCalled(); - }); - }); - - describe('when session ID is missing', () => { - it('returns error response', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined); - const handler = vi.fn(); - const wrappedHandler = withActiveSession(handler); - - // Act - const result = await wrappedHandler({ test: 'input' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - expect(result.error.message).toBe('Session ID not found'); - } - expect(handler).not.toHaveBeenCalled(); - }); - }); - - describe('when active session exists', () => { - it('calls handler with input, context, and startTime', async () => { - // Arrange - const mockPage = { url: () => 'test-url' } as unknown as Page; - const mockRefMap = new Map([['e1', '[data-testid="test"]']]); - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue( - 'session-123', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(mockRefMap); - - const handler = vi.fn().mockResolvedValue({ - ok: true, - ts: Date.now(), - durationMs: 100, - result: { success: true }, - }); - const wrappedHandler = withActiveSession(handler); - const input = { test: 'input' }; - - // Act - const result = await wrappedHandler(input); - - // Assert - expect(handler).toHaveBeenCalledWith( - input, - { - sessionId: 'session-123', - page: mockPage, - refMap: mockRefMap, - }, - expect.any(Number), - ); - expect(result.ok).toBe(true); - }); - - it('passes through handler result', async () => { - // Arrange - const mockPage = { url: () => 'test-url' } as unknown as Page; - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue( - 'session-123', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - const expectedResult = { - ok: true, - ts: Date.now(), - durationMs: 100, - result: { data: 'test-data' }, - }; - const handler = vi.fn().mockResolvedValue(expectedResult); - const wrappedHandler = withActiveSession(handler); - - // Act - const result = await wrappedHandler({ test: 'input' }); - - // Assert - expect(result).toStrictEqual(expectedResult); - }); - }); - }); - - describe('recordToolStep', () => { - it('records step with all parameters', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue( - 'session-123', - ); - const mockRecordStep = vi.fn().mockResolvedValue(undefined); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: mockRecordStep, - } as any); - - const params: RecordStepParams = { - toolName: 'mm_click', - input: { testId: 'send-button' }, - startTime: Date.now() - 100, - observation: { - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }, - target: { testId: 'send-button' }, - screenshotPath: '/path/to/screenshot.png', - screenshotDimensions: { width: 1280, height: 720 }, - }; - - // Act - await recordToolStep(params); - - // Assert - expect(mockRecordStep).toHaveBeenCalledWith({ - sessionId: 'session-123', - toolName: 'mm_click', - input: { testId: 'send-button' }, - target: { testId: 'send-button' }, - outcome: { ok: true }, - observation: params.observation, - durationMs: expect.any(Number), - screenshotPath: '/path/to/screenshot.png', - screenshotDimensions: { width: 1280, height: 720 }, - }); - }); - - it('uses empty string when session ID is undefined', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined); - const mockRecordStep = vi.fn().mockResolvedValue(undefined); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: mockRecordStep, - } as any); - - const params: RecordStepParams = { - toolName: 'mm_click', - input: { testId: 'send-button' }, - startTime: Date.now(), - observation: { - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }, - }; - - // Act - await recordToolStep(params); - - // Assert - expect(mockRecordStep).toHaveBeenCalledWith( - expect.objectContaining({ - sessionId: '', - }), - ); - }); - }); - - describe('collectObservationAndRecord', () => { - it('collects observation and records step', async () => { - // Arrange - const mockPage = { locator: vi.fn() } as unknown as Page; - const mockObservation = { - state: {} as any, - testIds: [ - { testId: 'send-button', tag: 'button', text: 'Send', visible: true }, - ], - a11y: { - nodes: [{ ref: 'e1', role: 'button', name: 'Send', path: [] }], - }, - }; - const mockRecordStep = vi.fn().mockResolvedValue(undefined); - - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue(mockObservation); - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue( - mockObservation.testIds, - ); - vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( - { - nodes: mockObservation.a11y.nodes, - refMap: new Map(), - }, - ); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: mockRecordStep, - } as any); - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue( - 'session-123', - ); - - // Act - const result = await collectObservationAndRecord( - mockPage, - 'mm_click', - { testId: 'send-button' }, - Date.now(), - { - target: { testId: 'send-button' }, - screenshotPath: '/path/to/screenshot.png', - screenshotDimensions: { width: 1280, height: 720 }, - }, - ); - - // Assert - expect(result).toStrictEqual(mockObservation); - expect(mockRecordStep).toHaveBeenCalledWith( - expect.objectContaining({ - toolName: 'mm_click', - input: { testId: 'send-button' }, - observation: mockObservation, - target: { testId: 'send-button' }, - screenshotPath: '/path/to/screenshot.png', - screenshotDimensions: { width: 1280, height: 720 }, - }), - ); - }); - - it('works without optional parameters', async () => { - // Arrange - const mockPage = { locator: vi.fn() } as unknown as Page; - const mockObservation = { - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }; - const mockRecordStep = vi.fn().mockResolvedValue(undefined); - - vi.spyOn( - knowledgeStoreModule, - 'createDefaultObservation', - ).mockReturnValue(mockObservation); - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( - { - nodes: [], - refMap: new Map(), - }, - ); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: mockRecordStep, - } as any); - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue( - 'session-123', - ); - - // Act - const result = await collectObservationAndRecord( - mockPage, - 'mm_get_state', - {}, - Date.now(), - ); - - // Assert - expect(result).toStrictEqual(mockObservation); - expect(mockRecordStep).toHaveBeenCalledWith( - expect.objectContaining({ - toolName: 'mm_get_state', - input: {}, - observation: mockObservation, - target: undefined, - screenshotPath: undefined, - screenshotDimensions: undefined, - }), - ); - }); - }); - - describe('handleToolError', () => { - describe('when error contains "Unknown a11yRef"', () => { - it('returns TARGET_NOT_FOUND error code', () => { - // Arrange - const error = new Error('Unknown a11yRef: e99'); - const startTime = Date.now(); - - // Act - const result = handleToolError( - error, - ErrorCodes.MM_CLICK_FAILED, - 'Click failed', - { a11yRef: 'e99' }, - 'session-123', - startTime, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TARGET_NOT_FOUND); - expect(result.error.message).toContain('Unknown a11yRef: e99'); - } - }); - }); - - describe('when error contains "not found"', () => { - it('returns TARGET_NOT_FOUND error code', () => { - // Arrange - const error = new Error('Element not found'); - const startTime = Date.now(); - - // Act - const result = handleToolError( - error, - ErrorCodes.MM_TYPE_FAILED, - 'Type failed', - { testId: 'missing-input' }, - 'session-123', - startTime, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TARGET_NOT_FOUND); - expect(result.error.message).toContain('not found'); - } - }); - }); - - describe('when error does not match special patterns', () => { - it('returns default error code with combined message', () => { - // Arrange - const error = new Error('Timeout exceeded'); - const startTime = Date.now(); - - // Act - const result = handleToolError( - error, - ErrorCodes.MM_CLICK_FAILED, - 'Click failed', - { testId: 'slow-button' }, - 'session-123', - startTime, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED); - expect(result.error.message).toBe('Click failed: Timeout exceeded'); - } - }); - - it('includes input in error details', () => { - // Arrange - const error = new Error('Generic error'); - const input = { testId: 'test-button', timeoutMs: 5000 }; - const startTime = Date.now(); - - // Act - const result = handleToolError( - error, - ErrorCodes.MM_CLICK_FAILED, - 'Click failed', - input, - 'session-123', - startTime, - ); - - // Assert - if (!result.ok) { - expect(result.error.details).toStrictEqual({ input }); - } - }); - - it('includes session ID in response', () => { - // Arrange - const error = new Error('Generic error'); - const startTime = Date.now(); - - // Act - const result = handleToolError( - error, - ErrorCodes.MM_CLICK_FAILED, - 'Click failed', - {}, - 'session-456', - startTime, - ); - - // Assert - if (!result.ok) { - expect(result.meta.sessionId).toBe('session-456'); - } - }); - }); - }); -}); diff --git a/src/mcp-server/tools/helpers.ts b/src/mcp-server/tools/helpers.ts deleted file mode 100644 index cf94f48..0000000 --- a/src/mcp-server/tools/helpers.ts +++ /dev/null @@ -1,313 +0,0 @@ -import type { Page } from '@playwright/test'; - -import type { ExtensionState } from '../../capabilities/types.js'; -import { OBSERVATION_TESTID_LIMIT } from '../constants.js'; -import { collectTestIds, collectTrimmedA11ySnapshot } from '../discovery.js'; -import { - knowledgeStore, - createDefaultObservation, -} from '../knowledge-store.js'; -import { getSessionManager } from '../session-manager.js'; -import type { - McpResponse, - ErrorCode, - TestIdItem, - StepRecordObservation, -} from '../types'; -import { ErrorCodes } from '../types'; -import { createErrorResponse, extractErrorMessage, debugWarn } from '../utils'; - -/** - * Level of detail to collect for observation data. - * - "full": Collect state, testIds, and a11y tree - * - "minimal": Collect state only (no testIds or a11y) - * - "none": Return empty observation - */ -export type ObservationLevel = 'full' | 'minimal' | 'none'; - -/** - * Parameters for recording a tool step in the knowledge store. - */ -export type RecordStepParams = { - /** - * Name of the tool that was executed - */ - toolName: string; - /** - * Input parameters passed to the tool - */ - input: Record; - /** - * Timestamp when the tool execution started - */ - startTime: number; - /** - * Observation data collected after tool execution - */ - observation: StepRecordObservation; - /** - * Target element information (selector, testId, etc.) - */ - target?: Record; - /** - * Path to screenshot file if captured - */ - screenshotPath?: string; - /** - * Screenshot dimensions if captured - */ - screenshotDimensions?: { - /** - * Screenshot width in pixels - */ - width: number; - /** - * Screenshot height in pixels - */ - height: number; - }; -}; - -/** - * Context information for an active session. - */ -export type ActiveSessionContext = { - /** - * Unique session identifier - */ - sessionId: string; - /** - * Current active page instance - */ - page: Page; - /** - * Map of accessibility references to selectors - */ - refMap: Map; -}; - -/** - * Check if an active session exists and return error if not. - * - * @param startTime - Timestamp when the operation started - * @returns Error response if no active session, undefined otherwise - */ -export function requireActiveSession( - startTime: number, -): McpResponse | undefined { - const sessionManager = getSessionManager(); - if (!sessionManager.hasActiveSession()) { - return createErrorResponse( - ErrorCodes.MM_NO_ACTIVE_SESSION, - 'No active session. Call launch first.', - undefined, - undefined, - startTime, - ) as McpResponse; - } - return undefined; -} - -/** - * Collect observation data from the current page state. - * - * @param page - The page to collect observation from - * @param level - Level of detail to collect (full, minimal, or none) - * @param presetState - Optional pre-fetched extension state to use instead of querying - * @returns Observation data with state, testIds, and accessibility tree - */ -export async function collectObservation( - page: Page | undefined, - level: ObservationLevel, - presetState?: ExtensionState, -): Promise { - const sessionManager = getSessionManager(); - - if (level === 'none') { - return createDefaultObservation({} as ExtensionState, [], []); - } - - const state = presetState ?? (await sessionManager.getExtensionState()); - - if (level === 'minimal') { - return createDefaultObservation(state, [], []); - } - - if (!page) { - debugWarn('collectObservation', 'Page not provided for full observation'); - return createDefaultObservation(state, [], []); - } - - try { - const testIds: TestIdItem[] = await collectTestIds( - page, - OBSERVATION_TESTID_LIMIT, - ); - const { nodes, refMap } = await collectTrimmedA11ySnapshot(page); - sessionManager.setRefMap(refMap); - return createDefaultObservation(state, testIds, nodes); - } catch (error) { - debugWarn('collectObservation', error); - return createDefaultObservation(state, [], []); - } -} - -/** - * Wrapper that ensures an active session exists before executing a handler. - * - * @param handler - Function to execute with active session context - * @returns Wrapped function that validates session before calling handler - */ -export function withActiveSession( - handler: ( - input: TInput, - ctx: ActiveSessionContext, - startTime: number, - ) => Promise>, -): (input: TInput) => Promise> { - return async (input: TInput): Promise> => { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - - const sessionError = requireActiveSession(startTime); - if (sessionError) { - return sessionError; - } - - const sessionId = sessionManager.getSessionId(); - if (!sessionId) { - return createErrorResponse( - ErrorCodes.MM_NO_ACTIVE_SESSION, - 'Session ID not found', - undefined, - undefined, - startTime, - ) as McpResponse; - } - const page = sessionManager.getPage(); - const refMap = sessionManager.getRefMap(); - - return handler(input, { sessionId, page, refMap }, startTime); - }; -} - -/** - * Record a tool execution step in the knowledge store. - * - * @param params - Parameters containing tool name, input, observation, and metadata - */ -export async function recordToolStep(params: RecordStepParams): Promise { - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId() ?? ''; - - await knowledgeStore.recordStep({ - sessionId, - toolName: params.toolName, - input: params.input, - target: params.target, - outcome: { ok: true }, - observation: params.observation, - durationMs: Date.now() - params.startTime, - screenshotPath: params.screenshotPath, - screenshotDimensions: params.screenshotDimensions, - }); -} - -/** - * Collect observation data and record the tool step in the knowledge store. - * - * @param page - The page to collect observation from - * @param toolName - Name of the tool that was executed - * @param input - Input parameters passed to the tool - * @param startTime - Timestamp when the tool execution started - * @param options - Optional metadata for the step record - * @param options.target - Target element information - * @param options.screenshotPath - Path to screenshot file if captured - * @param options.screenshotDimensions - Screenshot dimensions - * @param options.screenshotDimensions.width - Screenshot width in pixels - * @param options.screenshotDimensions.height - Screenshot height in pixels - * @returns Observation data collected after tool execution - */ -export async function collectObservationAndRecord( - page: Page, - toolName: string, - input: Record, - startTime: number, - options: { - /** - * Target element information (selector, testId, etc.) - */ - target?: Record; - /** - * Path to screenshot file if captured - */ - screenshotPath?: string; - /** - * Screenshot dimensions if captured - */ - screenshotDimensions?: { - /** - * Screenshot width in pixels - */ - width: number; - /** - * Screenshot height in pixels - */ - height: number; - }; - } = {}, -): Promise { - const observation = await collectObservation(page, 'full'); - - await recordToolStep({ - toolName, - input, - startTime, - observation, - target: options.target, - screenshotPath: options.screenshotPath, - screenshotDimensions: options.screenshotDimensions, - }); - - return observation; -} - -/** - * Handle tool execution errors and return appropriate error response. - * - * @param error - The error that occurred during tool execution - * @param defaultCode - Default error code to use if no specific match found - * @param defaultMessage - Default error message to use - * @param input - Input parameters that were passed to the tool - * @param sessionId - Current session ID for error context - * @param startTime - Timestamp when the tool execution started - * @returns Error response with appropriate code and message - */ -export function handleToolError( - error: unknown, - defaultCode: ErrorCode, - defaultMessage: string, - input: unknown, - sessionId: string | undefined, - startTime: number, -): McpResponse { - const message = extractErrorMessage(error); - - if (message.includes('Unknown a11yRef') || message.includes('not found')) { - return createErrorResponse( - ErrorCodes.MM_TARGET_NOT_FOUND, - message, - { input }, - sessionId, - startTime, - ) as McpResponse; - } - - return createErrorResponse( - defaultCode, - `${defaultMessage}: ${message}`, - { input }, - sessionId, - startTime, - ) as McpResponse; -} diff --git a/src/mcp-server/tools/index.ts b/src/mcp-server/tools/index.ts deleted file mode 100644 index 2621238..0000000 --- a/src/mcp-server/tools/index.ts +++ /dev/null @@ -1,10 +0,0 @@ -export * from './error-classification.js'; -export * from './run-tool.js'; -export * from './helpers.js'; -export * from './interaction.js'; -export * from './navigation.js'; -export * from './discovery-tools.js'; -export * from './screenshot.js'; -export * from './knowledge.js'; -export * from './batch.js'; -export * from './clipboard.js'; diff --git a/src/mcp-server/tools/interaction.test.ts b/src/mcp-server/tools/interaction.test.ts deleted file mode 100644 index cd4fea7..0000000 --- a/src/mcp-server/tools/interaction.test.ts +++ /dev/null @@ -1,822 +0,0 @@ -/** - * Unit tests for interaction tool handlers. - * - * Tests handleClick, handleType, and handleWaitFor with various target types, - * error scenarios, and page closure detection. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { handleClick, handleType, handleWaitFor } from './interaction'; -import * as discoveryModule from '../discovery.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { - createMockSessionManager, - createMockPage, - createMockLocator, -} from '../test-utils'; -import { ErrorCodes } from '../types'; -import * as utilsModule from '../utils'; - -describe('interaction', () => { - let mockSessionManager: ReturnType; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleClick', () => { - describe('with testId target', () => { - it('clicks element by testId', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ testId: 'my-button' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.clicked).toBe(true); - expect(result.result.target).toBe('testId:my-button'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'my-button', - expect.any(Map), - 15000, - ); - expect(mockLocator.click).toHaveBeenCalled(); - }); - - it('uses custom timeout when provided', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - await handleClick({ testId: 'my-button', timeoutMs: 5000 }); - - // Assert - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'my-button', - expect.any(Map), - 5000, - ); - }); - }); - - describe('with selector target', () => { - it('clicks element by CSS selector', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ selector: 'button.primary' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.clicked).toBe(true); - expect(result.result.target).toBe('selector:button.primary'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'selector', - 'button.primary', - expect.any(Map), - 15000, - ); - }); - }); - - describe('with a11yRef target', () => { - it('clicks element by accessibility reference', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - const refMap = new Map([['e5', 'button[aria-label="Submit"]']]); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ a11yRef: 'e5' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.clicked).toBe(true); - expect(result.result.target).toBe('a11yRef:e5'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'a11yRef', - 'e5', - refMap, - 15000, - ); - }); - }); - - describe('with invalid target selection', () => { - it('returns error when no target specified', async () => { - // Act - const result = await handleClick({} as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when multiple targets specified', async () => { - // Act - const result = await handleClick({ - testId: 'button', - selector: '.button', - } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { - // Arrange - vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({ - valid: true, - // Missing type and value properties - will fail isValidTargetSelection - } as any); - - // Act - const result = await handleClick({ testId: 'button' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toBe('Invalid target selection'); - } - }); - }); - - describe('with page closure after click', () => { - it('handles page closure gracefully', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockLocator, 'click').mockRejectedValue( - new Error('Target page, context or browser has been closed'), - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ testId: 'close-btn' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.clicked).toBe(true); - expect(result.result.pageClosedAfterClick).toBe(true); - expect(result.result.target).toBe('testId:close-btn'); - } - }); - - it('handles browser closed error gracefully', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockLocator, 'click').mockRejectedValue( - new Error('browser has been closed'), - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ testId: 'close-btn' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.pageClosedAfterClick).toBe(true); - } - }); - }); - - describe('with click errors', () => { - it('returns error when click fails with non-closure error', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockLocator, 'click').mockRejectedValue( - new Error('Element is not clickable'), - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleClick({ testId: 'my-button' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED); - } - }); - - it('returns error when element not found', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( - new Error('Timeout waiting for element'), - ); - - // Act - const result = await handleClick({ testId: 'nonexistent' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleClick({ testId: 'my-button' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); - - describe('handleType', () => { - describe('with testId target', () => { - it('types text into element by testId', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleType({ - testId: 'amount-input', - text: '0.5', - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.typed).toBe(true); - expect(result.result.target).toBe('testId:amount-input'); - expect(result.result.textLength).toBe(3); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'amount-input', - expect.any(Map), - 15000, - ); - expect(mockLocator.fill).toHaveBeenCalledWith('0.5'); - }); - - it('uses custom timeout when provided', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - await handleType({ testId: 'input', text: 'test', timeoutMs: 3000 }); - - // Assert - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'input', - expect.any(Map), - 3000, - ); - }); - }); - - describe('with selector target', () => { - it('types text into element by CSS selector', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleType({ - selector: 'input[name="email"]', - text: 'test@example.com', - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.typed).toBe(true); - expect(result.result.target).toBe('selector:input[name="email"]'); - expect(result.result.textLength).toBe(16); - } - expect(mockLocator.fill).toHaveBeenCalledWith('test@example.com'); - }); - }); - - describe('with a11yRef target', () => { - it('types text into element by accessibility reference', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - const refMap = new Map([['e3', 'input[aria-label="Amount"]']]); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleType({ a11yRef: 'e3', text: '100' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.typed).toBe(true); - expect(result.result.target).toBe('a11yRef:e3'); - expect(result.result.textLength).toBe(3); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'a11yRef', - 'e3', - refMap, - 15000, - ); - }); - }); - - describe('with empty text', () => { - it('types empty string and reports zero length', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleType({ testId: 'input', text: '' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.typed).toBe(true); - expect(result.result.textLength).toBe(0); - } - expect(mockLocator.fill).toHaveBeenCalledWith(''); - }); - }); - - describe('with invalid target selection', () => { - it('returns error when no target specified', async () => { - // Act - const result = await handleType({ text: 'test' } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when multiple targets specified', async () => { - // Act - const result = await handleType({ - testId: 'input', - selector: 'input', - text: 'test', - } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { - // Arrange - vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({ - valid: true, - // Missing type and value properties - will fail isValidTargetSelection - } as any); - - // Act - const result = await handleType({ testId: 'input', text: 'test' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toBe('Invalid target selection'); - } - }); - }); - - describe('with type errors', () => { - it('returns error when fill fails', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockLocator, 'fill').mockRejectedValue( - new Error('Element is not editable'), - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleType({ testId: 'input', text: 'test' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TYPE_FAILED); - } - }); - - it('returns error when element not found', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( - new Error('Timeout waiting for element'), - ); - - // Act - const result = await handleType({ - testId: 'nonexistent', - text: 'test', - }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleType({ testId: 'input', text: 'test' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); - - describe('handleWaitFor', () => { - describe('with testId target', () => { - it('waits for element by testId', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockPage, 'locator').mockReturnValue(mockLocator); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleWaitFor({ testId: 'loading-spinner' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.found).toBe(true); - expect(result.result.target).toBe('testId:loading-spinner'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'loading-spinner', - expect.any(Map), - 15000, - ); - }); - - it('uses custom timeout when provided', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - await handleWaitFor({ testId: 'element', timeoutMs: 30000 }); - - // Assert - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'testId', - 'element', - expect.any(Map), - 30000, - ); - }); - }); - - describe('with selector target', () => { - it('waits for element by CSS selector', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleWaitFor({ selector: '.success-message' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.found).toBe(true); - expect(result.result.target).toBe('selector:.success-message'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'selector', - '.success-message', - expect.any(Map), - 15000, - ); - }); - }); - - describe('with a11yRef target', () => { - it('waits for element by accessibility reference', async () => { - // Arrange - const mockPage = createMockPage(); - const mockLocator = createMockLocator(); - const refMap = new Map([['e10', 'button[aria-label="Confirm"]']]); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(refMap); - - vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( - mockLocator as any, - ); - - // Act - const result = await handleWaitFor({ a11yRef: 'e10' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.found).toBe(true); - expect(result.result.target).toBe('a11yRef:e10'); - } - expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( - mockPage, - 'a11yRef', - 'e10', - refMap, - 15000, - ); - }); - }); - - describe('with invalid target selection', () => { - it('returns error when no target specified', async () => { - // Act - const result = await handleWaitFor({} as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when multiple targets specified', async () => { - // Act - const result = await handleWaitFor({ - testId: 'element', - selector: '.element', - } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Exactly one'); - } - }); - - it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { - // Arrange - vi.spyOn(utilsModule, 'validateTargetSelection').mockReturnValue({ - valid: true, - // Missing type and value properties - will fail isValidTargetSelection - } as any); - - // Act - const result = await handleWaitFor({ testId: 'element' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toBe('Invalid target selection'); - } - }); - }); - - describe('with timeout errors', () => { - it('returns error when element not found within timeout', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( - new Error('Timeout 15000ms exceeded'), - ); - - // Act - const result = await handleWaitFor({ testId: 'nonexistent' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); - } - }); - - it('returns error when page closed during wait', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( - new Error('Target page has been closed'), - ); - - // Act - const result = await handleWaitFor({ testId: 'element' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleWaitFor({ testId: 'element' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); -}); diff --git a/src/mcp-server/tools/interaction.ts b/src/mcp-server/tools/interaction.ts deleted file mode 100644 index 80c02e1..0000000 --- a/src/mcp-server/tools/interaction.ts +++ /dev/null @@ -1,296 +0,0 @@ -import { DEFAULT_INTERACTION_TIMEOUT_MS } from '../constants.js'; -import { waitForTarget } from '../discovery.js'; -import { getSessionManager } from '../session-manager.js'; -import { - classifyClickError, - classifyTypeError, - classifyWaitError, - isPageClosedError, -} from './error-classification.js'; -import { runTool } from './run-tool.js'; -import type { - ClickInput, - ClickResult, - TypeInput, - TypeResult, - WaitForInput, - WaitForResult, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createErrorResponse, - validateTargetSelection, - isValidTargetSelection, - isInvalidTargetSelection, -} from '../utils'; - -/** - * Handles clicking on an element specified by testId, selector, or accessibility reference. - * - * @param input The click input containing target selection and timeout options - * @param options Optional handler configuration - * @returns Promise resolving to click result with target information - */ -export async function handleClick( - input: ClickInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; - - const validation = validateTargetSelection(input); - if (isInvalidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - validation.error, - { input }, - sessionId, - startTime, - ); - } - - if (!isValidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Invalid target selection', - { input }, - sessionId, - startTime, - ); - } - - const { type: targetType, value: targetValue } = validation; - - return runTool({ - toolName: 'mm_click', - input, - options, - - /** - * Executes the click action on the target element. - * - * @param context The tool execution context containing page and reference map - * @returns Promise resolving to click result with success status and target info - */ - execute: async (context) => { - const locator = await waitForTarget( - context.page, - targetType, - targetValue, - context.refMap, - timeoutMs, - ); - - try { - await locator.click(); - return { - clicked: true, - target: `${targetType}:${targetValue}`, - }; - } catch (clickError) { - if (isPageClosedError(clickError)) { - return { - clicked: true, - target: `${targetType}:${targetValue}`, - pageClosedAfterClick: true, - }; - } - throw clickError; - } - }, - - /** - * Returns the target element information for recording. - * - * @returns Object containing the target type and value - */ - getTarget: () => ({ [targetType]: targetValue }), - - classifyError: classifyClickError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with timeout information - */ - sanitizeInputForRecording: () => ({ timeoutMs }), - }); -} - -/** - * Handles typing text into an element specified by testId, selector, or accessibility reference. - * - * @param input The type input containing target selection, text, and timeout options - * @param options Optional handler configuration - * @returns Promise resolving to type result with target and text length information - */ -export async function handleType( - input: TypeInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; - - const validation = validateTargetSelection(input); - if (isInvalidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - validation.error, - { input }, - sessionId, - startTime, - ); - } - - if (!isValidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Invalid target selection', - { input }, - sessionId, - startTime, - ); - } - - const { type: targetType, value: targetValue } = validation; - - return runTool({ - toolName: 'mm_type', - input, - options, - - /** - * Executes the type action on the target element. - * - * @param context The tool execution context containing page and reference map - * @returns Promise resolving to type result with success status and text length - */ - execute: async (context) => { - const locator = await waitForTarget( - context.page, - targetType, - targetValue, - context.refMap, - timeoutMs, - ); - await locator.fill(input.text); - - return { - typed: true, - target: `${targetType}:${targetValue}`, - textLength: input.text.length, - }; - }, - - /** - * Returns the target element information for recording. - * - * @returns Object containing the target type and value - */ - getTarget: () => ({ [targetType]: targetValue }), - - classifyError: classifyTypeError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with timeout and text information - */ - sanitizeInputForRecording: () => ({ - timeoutMs, - text: input.text, - testId: input.testId, - selector: input.selector, - a11yRef: input.a11yRef, - }), - }); -} - -/** - * Handles waiting for an element to become visible. - * - * @param input The wait input containing target selection and timeout options - * @param options Optional handler configuration - * @returns Promise resolving to wait result with target information - */ -export async function handleWaitFor( - input: WaitForInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; - - const validation = validateTargetSelection(input); - if (isInvalidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - validation.error, - { input }, - sessionId, - startTime, - ); - } - - if (!isValidTargetSelection(validation)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Invalid target selection', - { input }, - sessionId, - startTime, - ); - } - - const { type: targetType, value: targetValue } = validation; - - return runTool({ - toolName: 'mm_wait_for', - input, - options, - - /** - * Executes the wait action for the target element. - * - * @param context The tool execution context containing page and reference map - * @returns Promise resolving to wait result with success status and target info - */ - execute: async (context) => { - await waitForTarget( - context.page, - targetType, - targetValue, - context.refMap, - timeoutMs, - ); - - return { - found: true, - target: `${targetType}:${targetValue}`, - }; - }, - - /** - * Returns the target element information for recording. - * - * @returns Object containing the target type and value - */ - getTarget: () => ({ [targetType]: targetValue }), - - classifyError: classifyWaitError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with timeout information - */ - sanitizeInputForRecording: () => ({ timeoutMs }), - }); -} diff --git a/src/mcp-server/tools/knowledge.ts b/src/mcp-server/tools/knowledge.ts deleted file mode 100644 index eddbfff..0000000 --- a/src/mcp-server/tools/knowledge.ts +++ /dev/null @@ -1,212 +0,0 @@ -import { knowledgeStore } from '../knowledge-store.js'; -import { getSessionManager } from '../session-manager.js'; -import type { - KnowledgeLastInput, - KnowledgeLastResult, - KnowledgeSearchInput, - KnowledgeSearchResult, - KnowledgeSummarizeInput, - KnowledgeSummarizeResult, - KnowledgeSessionsInput, - KnowledgeSessionsResult, - KnowledgeScope, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createSuccessResponse, - createErrorResponse, - extractErrorMessage, -} from '../utils'; - -/** - * Handles retrieving the last N steps from knowledge store. - * - * @param input - Input with number of steps and scope. - * @param _options - Handler options (unused). - * @returns Response with step records. - */ -export async function handleKnowledgeLast( - input: KnowledgeLastInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const nSteps = input.n ?? 20; - const scope: KnowledgeScope = input.scope ?? 'current'; - - try { - const steps = await knowledgeStore.getLastSteps( - nSteps, - scope, - sessionId, - input.filters, - ); - - return createSuccessResponse( - { steps }, - sessionId, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - return createErrorResponse( - ErrorCodes.MM_KNOWLEDGE_ERROR, - `Failed to retrieve steps: ${message}`, - { nSteps, scope }, - sessionId, - startTime, - ); - } -} - -/** - * Handles searching step records in knowledge store. - * - * @param input - Input with search query and filters. - * @param _options - Handler options (unused). - * @returns Response with matching steps. - */ -export async function handleKnowledgeSearch( - input: KnowledgeSearchInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const limit = input.limit ?? 20; - const scope: KnowledgeScope = input.scope ?? 'all'; - - try { - const matches = await knowledgeStore.searchSteps( - input.query, - limit, - scope, - sessionId, - input.filters, - ); - - return createSuccessResponse( - { - matches, - query: input.query, - }, - sessionId, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - return createErrorResponse( - ErrorCodes.MM_KNOWLEDGE_ERROR, - `Search failed: ${message}`, - { query: input.query, limit, scope }, - sessionId, - startTime, - ); - } -} - -/** - * Handles summarizing a session's steps as a recipe. - * - * @param input - Input with session scope or ID. - * @param _options - Handler options (unused). - * @returns Response with session summary. - */ -export async function handleKnowledgeSummarize( - input: KnowledgeSummarizeInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const currentSessionId = sessionManager.getSessionId(); - - let targetSessionId: string | undefined; - - if (input.sessionId) { - targetSessionId = input.sessionId; - } else if (input.scope) { - if (input.scope === 'all') { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Cannot summarize all sessions. Use scope="current" or provide a specific sessionId.', - { input }, - currentSessionId, - startTime, - ); - } else if (input.scope === 'current') { - targetSessionId = currentSessionId; - } else if (typeof input.scope === 'object' && 'sessionId' in input.scope) { - targetSessionId = input.scope.sessionId; - } - } else { - targetSessionId = currentSessionId; - } - - if (!targetSessionId) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'No sessionId provided and no active session', - { input }, - undefined, - startTime, - ); - } - - try { - const summary = await knowledgeStore.summarizeSession(targetSessionId); - - return createSuccessResponse( - summary, - targetSessionId, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - return createErrorResponse( - ErrorCodes.MM_KNOWLEDGE_ERROR, - `Summarize failed: ${message}`, - { sessionId: targetSessionId }, - targetSessionId, - startTime, - ); - } -} - -/** - * Handles listing recent sessions with metadata. - * - * @param input - Input with limit and filters. - * @param _options - Handler options (unused). - * @returns Response with session list. - */ -export async function handleKnowledgeSessions( - input: KnowledgeSessionsInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const limit = input.limit ?? 10; - - try { - const sessions = await knowledgeStore.listSessions(limit, input.filters); - - return createSuccessResponse( - { sessions }, - sessionId, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - return createErrorResponse( - ErrorCodes.MM_KNOWLEDGE_ERROR, - `Failed to list sessions: ${message}`, - { limit, filters: input.filters }, - sessionId, - startTime, - ); - } -} diff --git a/src/mcp-server/tools/launch.test.ts b/src/mcp-server/tools/launch.test.ts deleted file mode 100644 index 81cab1b..0000000 --- a/src/mcp-server/tools/launch.test.ts +++ /dev/null @@ -1,384 +0,0 @@ -/** - * Unit tests for launch tool handler. - * - * Tests session launch with various states and error scenarios. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { handleLaunch } from './launch.js'; -import type { ExtensionState } from '../../capabilities/types.js'; -import * as sessionManagerModule from '../session-manager.js'; -import type { SessionLaunchResult } from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; -import { ErrorCodes } from '../types'; -import type { LaunchInput } from '../types'; - -describe('handleLaunch', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - describe('successful launch', () => { - it('returns session info on successful launch', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'test-session-123', - extensionId: 'ext-123', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.sessionId).toBe('test-session-123'); - expect(result.result.extensionId).toBe('ext-123'); - expect(result.result.state).toStrictEqual(mockState); - expect(result.meta.sessionId).toBe('test-session-123'); - } - expect(mockSessionManager.launch).toHaveBeenCalledWith(input); - }); - - it('includes prerequisites in prod mode', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-456/home.html', - extensionId: 'ext-456', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '10 ETH', - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'prod-session-456', - extensionId: 'ext-456', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - environmentMode: 'prod', - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.prerequisites).toBeDefined(); - expect(result.result.prerequisites).toHaveLength(3); - expect(result.result.prerequisites?.[0].step).toBe('Unlock Wallet'); - expect(result.result.prerequisites?.[1].step).toBe('Configure Network'); - expect(result.result.prerequisites?.[2].step).toBe('Set Up Accounts'); - } - }); - - it('does not include prerequisites in e2e mode', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'e2e-session-789', - extensionId: 'ext-123', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - environmentMode: 'e2e', - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.prerequisites).toBeUndefined(); - } - }); - - it('passes through all launch input parameters', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'custom-session', - extensionId: 'ext-123', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { - stateMode: 'custom', - fixturePreset: 'test-preset', - autoBuild: false, - slowMo: 100, - goal: 'Test send flow', - flowTags: ['send', 'transaction'], - tags: ['smoke-test'], - seedContracts: ['hst', 'nfts'], - ports: { - anvil: 8546, - fixtureServer: 12346, - }, - }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - expect(mockSessionManager.launch).toHaveBeenCalledWith(input); - }); - }); - - describe('session already running', () => { - it('returns error when session already active', async () => { - const mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'existing-session-999', - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SESSION_ALREADY_RUNNING); - expect(result.error.message).toBe( - 'A session is already running. Call mm_cleanup first.', - ); - expect(result.error.details).toStrictEqual({ - currentSessionId: 'existing-session-999', - }); - expect(result.meta.sessionId).toBe('existing-session-999'); - } - expect(mockSessionManager.launch).not.toHaveBeenCalled(); - }); - }); - - describe('launch failures', () => { - it('returns port conflict error for EADDRINUSE', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'launch').mockRejectedValue( - new Error('listen EADDRINUSE: address already in use :::8545'), - ); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE); - expect(result.error.message).toContain('Port conflict'); - expect(result.error.message).toContain('EADDRINUSE'); - expect(result.error.details).toStrictEqual({ input }); - } - }); - - it('returns port conflict error for port keyword in message', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'launch').mockRejectedValue( - new Error('port 8545 is already in use'), - ); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE); - expect(result.error.message).toContain('Port conflict'); - } - }); - - it('returns generic launch failed error for other errors', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'launch').mockRejectedValue( - new Error('Browser failed to start'), - ); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED); - expect(result.error.message).toContain('Launch failed'); - expect(result.error.message).toContain('Browser failed to start'); - expect(result.error.details).toStrictEqual({ input }); - } - }); - - it('handles non-Error exceptions', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(mockSessionManager, 'launch').mockRejectedValue('string error'); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED); - expect(result.error.message).toContain('Launch failed'); - } - }); - }); - - describe('response metadata', () => { - it('includes timestamp in response', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'test-session-123', - extensionId: 'ext-123', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.timestamp).toBeDefined(); - expect(typeof result.meta.timestamp).toBe('string'); - expect(new Date(result.meta.timestamp).getTime()).toBeGreaterThan(0); - } - }); - - it('includes durationMs in response', async () => { - const mockState: ExtensionState = { - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }; - - const mockLaunchResult: SessionLaunchResult = { - sessionId: 'test-session-123', - extensionId: 'ext-123', - state: mockState, - }; - - const mockSessionManager = createMockSessionManager({ - hasActive: false, - launchResult: mockLaunchResult, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - const input: LaunchInput = { stateMode: 'default' }; - - const result = await handleLaunch(input); - - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.meta.durationMs).toBeGreaterThanOrEqual(0); - expect(typeof result.meta.durationMs).toBe('number'); - } - }); - }); -}); diff --git a/src/mcp-server/tools/launch.ts b/src/mcp-server/tools/launch.ts deleted file mode 100644 index 11b5d3c..0000000 --- a/src/mcp-server/tools/launch.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { getSessionManager } from '../session-manager.js'; -import type { - LaunchInput, - LaunchResult, - LaunchPrerequisite, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createSuccessResponse, - createErrorResponse, - extractErrorMessage, -} from '../utils'; - -const PROD_MODE_PREREQUISITES: LaunchPrerequisite[] = [ - { - step: 'Unlock Wallet', - description: - 'The wallet must be unlocked before interacting with it. Use the extension UI to enter your password.', - }, - { - step: 'Configure Network', - description: - 'Ensure the correct network is selected (e.g., Ethereum Mainnet, Sepolia, or custom network).', - }, - { - step: 'Set Up Accounts', - description: - 'Import or create accounts as needed. Ensure the active account has sufficient funds for transactions.', - }, -]; - -/** - * Handles the launch tool request to start a browser session. - * - * @param input - The launch configuration parameters. - * @param _options - Handler options (unused). - * @returns Response with session info or error. - */ -export async function handleLaunch( - input: LaunchInput, - _options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - - try { - if (sessionManager.hasActiveSession()) { - return createErrorResponse( - ErrorCodes.MM_SESSION_ALREADY_RUNNING, - 'A session is already running. Call mm_cleanup first.', - { currentSessionId: sessionManager.getSessionId() }, - sessionManager.getSessionId(), - startTime, - ); - } - - const result = await sessionManager.launch(input); - - const isProdMode = sessionManager.getEnvironmentMode() === 'prod'; - const launchResult: LaunchResult = { - ...result, - ...(isProdMode && { prerequisites: PROD_MODE_PREREQUISITES }), - }; - - return createSuccessResponse( - launchResult, - result.sessionId, - startTime, - ); - } catch (error) { - const message = extractErrorMessage(error); - - if (message.includes('EADDRINUSE') || message.includes('port')) { - return createErrorResponse( - ErrorCodes.MM_PORT_IN_USE, - `Port conflict: ${message}`, - { input }, - undefined, - startTime, - ); - } - - return createErrorResponse( - ErrorCodes.MM_LAUNCH_FAILED, - `Launch failed: ${message}`, - { input }, - undefined, - startTime, - ); - } -} diff --git a/src/mcp-server/tools/navigation.test.ts b/src/mcp-server/tools/navigation.test.ts deleted file mode 100644 index 84cbdca..0000000 --- a/src/mcp-server/tools/navigation.test.ts +++ /dev/null @@ -1,787 +0,0 @@ -/** - * Unit tests for navigation tool handlers. - * - * Tests handleNavigate, handleWaitForNotification, handleSwitchToTab, and handleCloseTab - * with various navigation targets, tab operations, and error scenarios. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { - handleNavigate, - handleWaitForNotification, - handleSwitchToTab, - handleCloseTab, -} from './navigation'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager, createMockPage } from '../test-utils'; -import { ErrorCodes } from '../types'; - -describe('navigation', () => { - let mockSessionManager: ReturnType; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleNavigate', () => { - describe('with home screen', () => { - it('navigates to home screen', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'navigateToHome').mockResolvedValue( - undefined, - ); - - // Act - const result = await handleNavigate({ screen: 'home' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.navigated).toBe(true); - expect(result.result.currentUrl).toBe( - 'chrome-extension://ext-123/home.html', - ); - } - expect(mockSessionManager.navigateToHome).toHaveBeenCalled(); - }); - }); - - describe('with settings screen', () => { - it('navigates to settings screen', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/settings.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'navigateToSettings').mockResolvedValue( - undefined, - ); - - // Act - const result = await handleNavigate({ screen: 'settings' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.navigated).toBe(true); - expect(result.result.currentUrl).toBe( - 'chrome-extension://ext-123/settings.html', - ); - } - expect(mockSessionManager.navigateToSettings).toHaveBeenCalled(); - }); - }); - - describe('with notification screen', () => { - it('navigates to notification screen', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/notification.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn( - mockSessionManager, - 'navigateToNotification', - ).mockResolvedValue(undefined); - - // Act - const result = await handleNavigate({ screen: 'notification' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.navigated).toBe(true); - expect(result.result.currentUrl).toBe( - 'chrome-extension://ext-123/notification.html', - ); - } - expect(mockSessionManager.navigateToNotification).toHaveBeenCalled(); - }); - }); - - describe('with URL screen', () => { - it('navigates to custom URL', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue('https://app.uniswap.org'); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'navigateToUrl').mockResolvedValue( - mockPage, - ); - - // Act - const result = await handleNavigate({ - screen: 'url', - url: 'https://app.uniswap.org', - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.navigated).toBe(true); - expect(result.result.currentUrl).toBe('https://app.uniswap.org'); - } - expect(mockSessionManager.navigateToUrl).toHaveBeenCalledWith( - 'https://app.uniswap.org', - ); - }); - - it('returns error when URL is missing', async () => { - // Act - const result = await handleNavigate({ screen: 'url' } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('url is required'); - } - }); - }); - - describe('with invalid screen', () => { - it('returns error for unknown screen', async () => { - // Act - const result = await handleNavigate({ screen: 'invalid' } as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain('Unknown screen'); - } - }); - }); - - describe('with navigation errors', () => { - it('returns error when navigation fails', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'navigateToHome').mockRejectedValue( - new Error('Navigation failed'), - ); - - // Act - const result = await handleNavigate({ screen: 'home' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED); - } - }); - - it('returns error when page closed during navigation', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'navigateToSettings').mockRejectedValue( - new Error('Target page, context or browser has been closed'), - ); - - // Act - const result = await handleNavigate({ screen: 'settings' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED); - expect(result.error.message).toContain( - 'Page closed during navigation', - ); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleNavigate({ screen: 'home' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); - - describe('handleWaitForNotification', () => { - describe('with default timeout', () => { - it('waits for notification popup', async () => { - // Arrange - const mockNotificationPage = createMockPage(); - vi.spyOn(mockNotificationPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/notification.html', - ); - vi.spyOn( - mockSessionManager, - 'waitForNotificationPage', - ).mockResolvedValue(mockNotificationPage); - - // Act - const result = await handleWaitForNotification({}); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.found).toBe(true); - expect(result.result.pageUrl).toBe( - 'chrome-extension://ext-123/notification.html', - ); - } - expect(mockSessionManager.waitForNotificationPage).toHaveBeenCalledWith( - 15000, - ); - }); - }); - - describe('with custom timeout', () => { - it('uses custom timeout value', async () => { - // Arrange - const mockNotificationPage = createMockPage(); - vi.spyOn(mockNotificationPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/notification.html', - ); - vi.spyOn( - mockSessionManager, - 'waitForNotificationPage', - ).mockResolvedValue(mockNotificationPage); - - // Act - const result = await handleWaitForNotification({ timeoutMs: 30000 }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.found).toBe(true); - } - expect(mockSessionManager.waitForNotificationPage).toHaveBeenCalledWith( - 30000, - ); - }); - }); - - describe('with timeout errors', () => { - it('returns error when notification not found within timeout', async () => { - // Arrange - vi.spyOn( - mockSessionManager, - 'waitForNotificationPage', - ).mockRejectedValue(new Error('Timeout 15000ms exceeded')); - - // Act - const result = await handleWaitForNotification({}); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT); - } - }); - - it('returns error when browser closed during wait', async () => { - // Arrange - vi.spyOn( - mockSessionManager, - 'waitForNotificationPage', - ).mockRejectedValue(new Error('browser has been closed')); - - // Act - const result = await handleWaitForNotification({}); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT); - expect(result.error.message).toContain( - 'Browser closed while waiting for notification', - ); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleWaitForNotification({}); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); - - describe('handleSwitchToTab', () => { - describe('with role matching', () => { - it('switches to tab by role', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue( - undefined, - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org', - ); - vi.spyOn(mockDappPage, 'bringToFront').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org', - }, - ]); - vi.spyOn(mockSessionManager, 'setActivePage'); - - // Act - const result = await handleSwitchToTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.switched).toBe(true); - expect(result.result.activeTab.role).toBe('dapp'); - expect(result.result.activeTab.url).toBe('https://app.uniswap.org'); - } - expect(mockDappPage.bringToFront).toHaveBeenCalled(); - expect(mockSessionManager.setActivePage).toHaveBeenCalledWith( - mockDappPage, - ); - }); - }); - - describe('with URL matching', () => { - it('switches to tab by URL prefix', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue( - undefined, - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org/swap', - ); - vi.spyOn(mockDappPage, 'bringToFront').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org/swap', - }, - ]); - vi.spyOn(mockSessionManager, 'setActivePage'); - - // Act - const result = await handleSwitchToTab({ - url: 'https://app.uniswap.org', - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.switched).toBe(true); - expect(result.result.activeTab.url).toBe( - 'https://app.uniswap.org/swap', - ); - } - expect(mockDappPage.bringToFront).toHaveBeenCalled(); - }); - }); - - describe('with invalid input', () => { - it('returns error when neither role nor url provided', async () => { - // Act - const result = await handleSwitchToTab({} as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain( - 'Either role or url must be provided', - ); - } - }); - }); - - describe('with tab not found', () => { - it('returns error when no matching tab found by role', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - // Act - const result = await handleSwitchToTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); - expect(result.error.message).toContain('No tab found matching: dapp'); - } - }); - - it('returns error when no matching tab found by URL', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - // Act - const result = await handleSwitchToTab({ - url: 'https://app.uniswap.org', - }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleSwitchToTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); - - describe('handleCloseTab', () => { - describe('with role matching', () => { - it('closes tab by role', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org', - ); - vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue( - mockExtensionPage, - ); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org', - }, - ]); - - // Act - const result = await handleCloseTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.closed).toBe(true); - expect(result.result.closedUrl).toBe('https://app.uniswap.org'); - } - expect(mockDappPage.close).toHaveBeenCalled(); - }); - }); - - describe('with URL matching', () => { - it('closes tab by URL prefix', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org/swap', - ); - vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue( - mockExtensionPage, - ); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org/swap', - }, - ]); - - // Act - const result = await handleCloseTab({ url: 'https://app.uniswap.org' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.closed).toBe(true); - expect(result.result.closedUrl).toBe('https://app.uniswap.org/swap'); - } - expect(mockDappPage.close).toHaveBeenCalled(); - }); - }); - - describe('with active tab closure', () => { - it('switches to extension tab when closing active tab', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue( - undefined, - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org', - ); - vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockDappPage); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org', - }, - ]); - vi.spyOn(mockSessionManager, 'setActivePage'); - - // Act - const result = await handleCloseTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.closed).toBe(true); - } - expect(mockExtensionPage.bringToFront).toHaveBeenCalled(); - expect(mockSessionManager.setActivePage).toHaveBeenCalledWith( - mockExtensionPage, - ); - expect(mockDappPage.close).toHaveBeenCalled(); - }); - - it('does not switch when closing non-active tab', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockExtensionPage, 'bringToFront').mockResolvedValue( - undefined, - ); - - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org', - ); - vi.spyOn(mockDappPage, 'close').mockResolvedValue(undefined); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue( - mockExtensionPage, - ); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org', - }, - ]); - vi.spyOn(mockSessionManager, 'setActivePage'); - - // Act - const result = await handleCloseTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(true); - expect(mockExtensionPage.bringToFront).not.toHaveBeenCalled(); - expect(mockSessionManager.setActivePage).not.toHaveBeenCalled(); - expect(mockDappPage.close).toHaveBeenCalled(); - }); - }); - - describe('with invalid input', () => { - it('returns error when neither role nor url provided', async () => { - // Act - const result = await handleCloseTab({} as any); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); - expect(result.error.message).toContain( - 'Either role or url must be provided', - ); - } - }); - }); - - describe('with tab not found', () => { - it('returns error when no matching tab found by role', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - // Act - const result = await handleCloseTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); - expect(result.error.message).toContain('No tab found matching: dapp'); - } - }); - - it('returns error when no matching tab found by URL', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - // Act - const result = await handleCloseTab({ url: 'https://app.uniswap.org' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); - } - }); - }); - - describe('without active session', () => { - it('returns error when no session active', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleCloseTab({ role: 'dapp' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - }); -}); diff --git a/src/mcp-server/tools/navigation.ts b/src/mcp-server/tools/navigation.ts deleted file mode 100644 index 83a59a2..0000000 --- a/src/mcp-server/tools/navigation.ts +++ /dev/null @@ -1,329 +0,0 @@ -import { DEFAULT_INTERACTION_TIMEOUT_MS } from '../constants.js'; -import { getSessionManager } from '../session-manager.js'; -import { - classifyNavigationError, - classifyTabError, - classifyNotificationError, -} from './error-classification.js'; -import { runTool } from './run-tool.js'; -import type { - NavigateInput, - NavigateResult, - WaitForNotificationInput, - WaitForNotificationResult, - SwitchToTabInput, - SwitchToTabResult, - CloseTabInput, - CloseTabResult, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { createErrorResponse } from '../utils'; - -/** - * Handles navigation to a specific screen or URL. - * - * @param input The navigate input containing target screen and optional URL - * @param options Optional handler configuration - * @returns Promise resolving to navigate result with current URL information - */ -export async function handleNavigate( - input: NavigateInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - - if (input.screen === 'url' && !input.url) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'url is required when screen is "url"', - { input }, - sessionId, - startTime, - ); - } - - const validScreens = ['home', 'settings', 'url', 'notification']; - if (!validScreens.includes(input.screen)) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - `Unknown screen: ${String(input.screen)}`, - { input }, - sessionId, - startTime, - ); - } - - return runTool({ - toolName: 'mm_navigate', - input, - options, - - /** - * Executes the navigation action to the target screen. - * - * @param context The tool execution context containing page and reference map - * @returns Promise resolving to navigate result with success status and URL - */ - execute: async (context) => { - switch (input.screen) { - case 'home': - await sessionManager.navigateToHome(); - break; - case 'settings': - await sessionManager.navigateToSettings(); - break; - case 'url': - await sessionManager.navigateToUrl(input.url as string); - break; - case 'notification': - await sessionManager.navigateToNotification(); - break; - default: - throw new Error(`Unsupported screen: ${String(input.screen)}`); - } - - return { - navigated: true, - currentUrl: context.page.url(), - }; - }, - - classifyError: classifyNavigationError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with screen and URL information - */ - sanitizeInputForRecording: () => ({ - screen: input.screen, - url: input.url, - }), - }); -} - -/** - * Handles waiting for a notification popup to appear. - * - * @param input The wait input containing timeout options - * @param options Optional handler configuration - * @returns Promise resolving to wait result with notification page URL - */ -export async function handleWaitForNotification( - input: WaitForNotificationInput, - options?: HandlerOptions, -): Promise> { - const sessionManager = getSessionManager(); - const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; - - return runTool({ - toolName: 'mm_wait_for_notification', - input, - options, - - /** - * Executes the wait action for notification popup. - * - * @returns Promise resolving to wait result with notification page URL - */ - execute: async () => { - const notificationPage = - await sessionManager.waitForNotificationPage(timeoutMs); - const pageUrl = notificationPage.url(); - - return { - found: true, - pageUrl, - }; - }, - - classifyError: classifyNotificationError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with timeout information - */ - sanitizeInputForRecording: () => ({ timeoutMs }), - }); -} - -/** - * Handles switching to a different tab by role or URL. - * - * @param input The switch input containing tab role or URL to match - * @param options Optional handler configuration - * @returns Promise resolving to switch result with active tab information - */ -export async function handleSwitchToTab( - input: SwitchToTabInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - - if (!input.role && !input.url) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Either role or url must be provided', - { input }, - sessionId, - startTime, - ); - } - - return runTool({ - toolName: 'mm_switch_to_tab', - input, - options, - - /** - * Executes the tab switch action. - * - * @param _context The tool execution context containing page and reference map - * @returns Promise resolving to switch result with active tab information - */ - execute: async (_context) => { - const trackedPages = sessionManager.getTrackedPages(); - const targetPage = trackedPages.find((trackedPage) => { - if (input.role) { - return trackedPage.role === input.role; - } - if (input.url) { - return trackedPage.url.startsWith(input.url); - } - return false; - }); - - if (!targetPage) { - const availableTabs = trackedPages.map((trackedPage) => ({ - role: trackedPage.role, - url: trackedPage.url, - })); - throw new Error( - `No tab found matching: ${input.role ?? input.url}. Available tabs: ${JSON.stringify(availableTabs)}`, - ); - } - - await targetPage.page.bringToFront(); - sessionManager.setActivePage(targetPage.page); - - const updatedTrackedPages = sessionManager.getTrackedPages(); - const activeTabInfo = updatedTrackedPages.find( - (trackedPage) => trackedPage.page === targetPage.page, - ); - - return { - switched: true, - activeTab: { - role: activeTabInfo?.role ?? 'other', - url: targetPage.page.url(), - }, - }; - }, - - classifyError: classifyTabError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with role and URL information - */ - sanitizeInputForRecording: () => ({ - role: input.role, - url: input.url, - }), - }); -} - -/** - * Handles closing a tab by role or URL. - * - * @param input The close input containing tab role or URL to match - * @param options Optional handler configuration - * @returns Promise resolving to close result with closed tab URL - */ -export async function handleCloseTab( - input: CloseTabInput, - options?: HandlerOptions, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - - if (!input.role && !input.url) { - return createErrorResponse( - ErrorCodes.MM_INVALID_INPUT, - 'Either role or url must be provided', - { input }, - sessionId, - startTime, - ); - } - - return runTool({ - toolName: 'mm_close_tab', - input, - options, - - /** - * Executes the tab close action. - * - * @param context The tool execution context containing page and reference map - * @returns Promise resolving to close result with closed tab URL - */ - execute: async (context) => { - const trackedPages = sessionManager.getTrackedPages(); - const targetPage = trackedPages.find((trackedPage) => { - if (input.role) { - return trackedPage.role === input.role; - } - if (input.url) { - return trackedPage.url.startsWith(input.url); - } - return false; - }); - - if (!targetPage) { - throw new Error(`No tab found matching: ${input.role ?? input.url}`); - } - - const closedUrl = targetPage.url; - - const currentActivePage = context.page; - if (targetPage.page === currentActivePage) { - const extensionPage = trackedPages.find( - (trackedPage) => trackedPage.role === 'extension', - ); - if (extensionPage) { - await extensionPage.page.bringToFront(); - sessionManager.setActivePage(extensionPage.page); - } - } - - await targetPage.page.close(); - - return { - closed: true, - closedUrl, - }; - }, - - classifyError: classifyTabError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object with role and URL information - */ - sanitizeInputForRecording: () => ({ - role: input.role, - url: input.url, - }), - }); -} diff --git a/src/mcp-server/tools/registry.test.ts b/src/mcp-server/tools/registry.test.ts deleted file mode 100644 index 084c489..0000000 --- a/src/mcp-server/tools/registry.test.ts +++ /dev/null @@ -1,156 +0,0 @@ -import { describe, it, expect } from 'vitest'; - -import { - getToolHandler, - hasToolHandler, - buildToolHandlersRecord, - toolHandlers, -} from './registry.js'; - -describe('tool registry', () => { - describe('getToolHandler', () => { - it('returns handler for prefixed tool name', () => { - const handler = getToolHandler('mm_launch'); - - expect(handler).toBeDefined(); - expect(typeof handler).toBe('function'); - }); - - it('returns handler for base tool name', () => { - const handler = getToolHandler('launch'); - - expect(handler).toBeDefined(); - expect(typeof handler).toBe('function'); - }); - - it('returns undefined for unknown tool', () => { - const handler = getToolHandler('mm_unknown_tool'); - - expect(handler).toBeUndefined(); - }); - - it('returns undefined for empty string', () => { - const handler = getToolHandler(''); - - expect(handler).toBeUndefined(); - }); - - it('returns different handlers for different tools', () => { - const launchHandler = getToolHandler('mm_launch'); - const cleanupHandler = getToolHandler('mm_cleanup'); - - expect(launchHandler).not.toBe(cleanupHandler); - }); - }); - - describe('hasToolHandler', () => { - it('returns true for existing prefixed tool', () => { - const result = hasToolHandler('mm_click'); - - expect(result).toBe(true); - }); - - it('returns true for existing base tool', () => { - const result = hasToolHandler('click'); - - expect(result).toBe(true); - }); - - it('returns false for non-existent tool', () => { - const result = hasToolHandler('mm_nonexistent'); - - expect(result).toBe(false); - }); - - it('returns false for empty string', () => { - const result = hasToolHandler(''); - - expect(result).toBe(false); - }); - }); - - describe('buildToolHandlersRecord', () => { - it('returns record with prefixed tool names', () => { - const handlers = buildToolHandlersRecord(); - - expect(handlers.mm_launch).toBeDefined(); - expect(handlers.mm_cleanup).toBeDefined(); - expect(handlers.mm_click).toBeDefined(); - expect(handlers.mm_type).toBeDefined(); - }); - - it('returns fresh record on each call', () => { - const handlers1 = buildToolHandlersRecord(); - const handlers2 = buildToolHandlersRecord(); - - expect(handlers1).not.toBe(handlers2); - expect(handlers1).toStrictEqual(handlers2); - }); - - it('includes all 27 tools', () => { - const handlers = buildToolHandlersRecord(); - - expect(Object.keys(handlers)).toHaveLength(27); - }); - - it('all handlers are functions', () => { - const handlers = buildToolHandlersRecord(); - - for (const handler of Object.values(handlers)) { - expect(typeof handler).toBe('function'); - } - }); - }); - - describe('toolHandlers export', () => { - it('exports pre-built handlers record', () => { - expect(toolHandlers).toBeDefined(); - expect(typeof toolHandlers).toBe('object'); - }); - - it('contains all expected tools', () => { - const expectedTools = [ - 'mm_build', - 'mm_launch', - 'mm_cleanup', - 'mm_get_state', - 'mm_navigate', - 'mm_wait_for_notification', - 'mm_switch_to_tab', - 'mm_close_tab', - 'mm_list_testids', - 'mm_accessibility_snapshot', - 'mm_describe_screen', - 'mm_screenshot', - 'mm_click', - 'mm_type', - 'mm_wait_for', - 'mm_knowledge_last', - 'mm_knowledge_search', - 'mm_knowledge_summarize', - 'mm_knowledge_sessions', - 'mm_seed_contract', - 'mm_seed_contracts', - 'mm_get_contract_address', - 'mm_list_contracts', - 'mm_run_steps', - 'mm_set_context', - 'mm_get_context', - 'mm_clipboard', - ]; - - for (const tool of expectedTools) { - expect(toolHandlers[tool]).toBeDefined(); - expect(typeof toolHandlers[tool]).toBe('function'); - } - }); - - it('matches buildToolHandlersRecord output', () => { - const freshHandlers = buildToolHandlersRecord(); - - expect(Object.keys(toolHandlers)).toStrictEqual( - Object.keys(freshHandlers), - ); - }); - }); -}); diff --git a/src/mcp-server/tools/registry.ts b/src/mcp-server/tools/registry.ts deleted file mode 100644 index 3b67886..0000000 --- a/src/mcp-server/tools/registry.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { buildToolHandlersRecord } from './definitions.js'; - -export { - getToolHandler, - hasToolHandler, - buildToolHandlersRecord, -} from './definitions.js'; -export type { ToolHandler } from './batch.js'; - -export const toolHandlers = buildToolHandlersRecord(); diff --git a/src/mcp-server/tools/run-tool.test.ts b/src/mcp-server/tools/run-tool.test.ts deleted file mode 100644 index 3592062..0000000 --- a/src/mcp-server/tools/run-tool.test.ts +++ /dev/null @@ -1,958 +0,0 @@ -/** - * Unit tests for the generic tool execution wrapper (runTool). - * - * Tests execution flow, observation collection policies, knowledge store recording, - * error classification, timeout handling, and page closure detection. - */ - -import type { Page } from '@playwright/test'; -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { runTool } from './run-tool'; -import type { ToolExecutionConfig } from './run-tool'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types'; -import * as helpersModule from './helpers.js'; - -describe('runTool', () => { - let mockSessionManager: ReturnType; - let mockKnowledgeStore: { - recordStep: ReturnType; - getLastSteps: ReturnType; - searchSteps: ReturnType; - summarizeSession: ReturnType; - listSessions: ReturnType; - generatePriorKnowledge: ReturnType; - writeSessionMetadata: ReturnType; - }; - let mockPage: Page; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - environmentMode: 'e2e', - }); - mockPage = { - url: () => 'chrome-extension://test/home.html', - isClosed: () => false, - } as unknown as Page; - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getRefMap').mockReturnValue(new Map()); - - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - mockKnowledgeStore = { - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - }; - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue( - mockKnowledgeStore as any, - ); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('basic execution', () => { - it('executes tool and returns success response', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig<{ value: string }, string> = { - toolName: 'mm_test_tool', - input: { value: 'test-input' }, - execute: vi.fn().mockResolvedValue('success'), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toBe('success'); - expect(result.meta.sessionId).toBe('test-session-123'); - expect(result.meta.durationMs).toBeGreaterThanOrEqual(0); - } - }); - - it('passes context to execute function', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const executeFn = vi.fn().mockResolvedValue({ result: 'ok' }); - const config: ToolExecutionConfig<{ value: string }, { result: string }> = - { - toolName: 'mm_test_tool', - input: { value: 'test' }, - execute: executeFn, - }; - - // Act - await runTool(config); - - // Assert - expect(executeFn).toHaveBeenCalledWith({ - sessionId: 'test-session-123', - page: mockPage, - refMap: expect.any(Map), - startTime: expect.any(Number), - }); - }); - - it('handles ToolExecuteResult with custom observation', async () => { - // Arrange - const customObservation = { - state: { isLoaded: true } as any, - testIds: [{ testId: 'custom', tag: 'div', text: '', visible: true }], - a11y: { nodes: [] }, - }; - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'custom', - execute: vi.fn().mockResolvedValue({ - result: { data: 'test' }, - observation: customObservation, - }), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toStrictEqual({ data: 'test' }); - } - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith( - expect.objectContaining({ - observation: customObservation, - }), - ); - }); - }); - - describe('session validation', () => { - it('returns error when no active session and requiresSession is true', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - requiresSession: true, - execute: vi.fn(), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - expect(result.error.message).toBe( - 'No active session. Call launch first.', - ); - } - expect(config.execute).not.toHaveBeenCalled(); - }); - - it('executes tool when no active session but requiresSession is false', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const executeFn = vi.fn().mockResolvedValue({ done: true }); - const config: ToolExecutionConfig = { - toolName: 'mm_build', - input: {}, - requiresSession: false, - execute: executeFn, - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - expect(executeFn).toHaveBeenCalled(); - }); - - it('defaults requiresSession to true when not specified', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - execute: vi.fn(), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - }); - - describe('observation policies', () => { - describe('policy: none', () => { - it('collects minimal observation on success', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'none', - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal'); - }); - }); - - describe('policy: default', () => { - it('collects full observation on success', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'default', - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full'); - }); - }); - - describe('policy: failures', () => { - it('collects minimal observation on success', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'failures', - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal'); - }); - - it('collects full observation on failure', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'failures', - execute: vi.fn().mockRejectedValue(new Error('Test failure')), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full'); - }); - }); - - describe('policy: custom', () => { - it('uses observation from execute result', async () => { - // Arrange - const customObservation = { - state: { isLoaded: true } as any, - testIds: [], - a11y: { - nodes: [{ ref: 'e1', role: 'button', name: 'Test', path: [] }], - }, - }; - const collectObservationSpy = vi.spyOn( - helpersModule, - 'collectObservation', - ); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'custom', - execute: vi.fn().mockResolvedValue({ - result: { data: 'test' }, - observation: customObservation, - }), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).not.toHaveBeenCalled(); - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith( - expect.objectContaining({ - observation: customObservation, - }), - ); - }); - }); - - it('uses options.observationPolicy over config.observationPolicy', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - observationPolicy: 'default', - options: { observationPolicy: 'none' }, - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'minimal'); - }); - - it('skips observation collection when requiresSession is false', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - const collectObservationSpy = vi.spyOn( - helpersModule, - 'collectObservation', - ); - const config: ToolExecutionConfig = { - toolName: 'mm_build', - input: {}, - requiresSession: false, - observationPolicy: 'default', - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).not.toHaveBeenCalled(); - }); - }); - - describe('knowledge store recording', () => { - it('records successful step with all parameters', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig< - { testId: string }, - { clicked: boolean } - > = { - toolName: 'mm_click', - input: { testId: 'send-button' }, - execute: vi.fn().mockResolvedValue({ clicked: true }), - getTarget: (input) => ({ testId: input.testId }), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith({ - sessionId: 'test-session-123', - toolName: 'mm_click', - input: { testId: 'send-button' }, - target: { testId: 'send-button' }, - outcome: { ok: true }, - observation: expect.any(Object), - durationMs: expect.any(Number), - context: 'e2e', - }); - }); - - it('records failed step with error details', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig<{ testId: string }, object> = { - toolName: 'mm_click', - input: { testId: 'missing-button' }, - execute: vi.fn().mockRejectedValue(new Error('Element not found')), - getTarget: (input) => ({ testId: input.testId }), - classifyError: () => ({ - code: 'MM_TARGET_NOT_FOUND', - message: 'Element not found', - }), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith({ - sessionId: 'test-session-123', - toolName: 'mm_click', - input: { testId: 'missing-button' }, - target: { testId: 'missing-button' }, - outcome: { - ok: false, - error: { code: 'MM_TARGET_NOT_FOUND', message: 'Element not found' }, - }, - observation: expect.any(Object), - durationMs: expect.any(Number), - context: 'e2e', - }); - }); - - it('uses sanitizeInputForRecording when provided', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig< - { action: string; text: string }, - { success: boolean } - > = { - toolName: 'mm_clipboard', - input: { action: 'write', text: 'secret-srp-phrase' }, - execute: vi.fn().mockResolvedValue({ success: true }), - sanitizeInputForRecording: (input) => ({ - action: input.action, - textLength: input.text.length, - }), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith( - expect.objectContaining({ - input: { action: 'write', textLength: 17 }, - }), - ); - }); - - it('skips recording when sessionId is undefined', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined); - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).not.toHaveBeenCalled(); - }); - }); - - describe('error classification', () => { - it('uses classifyError when provided', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - execute: vi - .fn() - .mockRejectedValue(new Error('Timeout waiting for selector')), - classifyError: () => ({ - code: 'MM_WAIT_TIMEOUT', - message: 'Element wait timeout', - }), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_WAIT_TIMEOUT'); - expect(result.error.message).toBe('Element wait timeout'); - } - }); - - it('generates default error code when classifyError not provided', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_my_tool', - input: {}, - execute: vi.fn().mockRejectedValue(new Error('Something went wrong')), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_MY_TOOL_FAILED'); - expect(result.error.message).toBe('Something went wrong'); - } - }); - - it('removes MM_ prefix when generating default error code', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - execute: vi.fn().mockRejectedValue(new Error('Click failed')), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('MM_CLICK_FAILED'); - } - }); - }); - - describe('error handling', () => { - it('returns error response when execute throws', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig<{ testId: string }, object> = { - toolName: 'mm_click', - input: { testId: 'test-button' }, - execute: vi.fn().mockRejectedValue(new Error('Execution failed')), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.message).toBe('Execution failed'); - expect(result.error.details).toStrictEqual({ - input: { testId: 'test-button' }, - }); - } - }); - - it('collects full observation on failure with default policy', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - observationPolicy: 'default', - execute: vi.fn().mockRejectedValue(new Error('Failed')), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(mockPage, 'full'); - }); - - it('collects minimal observation on failure with none policy', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - observationPolicy: 'none', - execute: vi.fn().mockRejectedValue(new Error('Failed')), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledWith(undefined, 'minimal'); - }); - - it('handles observation collection failure gracefully', async () => { - // Arrange - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockRejectedValueOnce(new Error('Page closed')) - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - observationPolicy: 'failures', - execute: vi.fn().mockRejectedValue(new Error('Execution failed')), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.message).toBe('Execution failed'); - } - expect(collectObservationSpy).toHaveBeenCalled(); - }); - }); - - describe('page closure detection', () => { - it('creates empty observation when page is closed during failure handling', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(true); - const collectObservationSpy = vi - .spyOn(helpersModule, 'collectObservation') - .mockRejectedValueOnce( - new Error('Target page, context or browser has been closed'), - ) - .mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_click', - input: {}, - observationPolicy: 'default', - execute: vi.fn().mockRejectedValue(new Error('Click failed')), - }; - - // Act - await runTool(config); - - // Assert - expect(collectObservationSpy).toHaveBeenCalledTimes(2); - expect(collectObservationSpy).toHaveBeenLastCalledWith( - undefined, - 'minimal', - ); - }); - }); - - describe('timeout handling', () => { - it('includes duration in response even on timeout error', async () => { - // Arrange - vi.useFakeTimers(); - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_wait_for', - input: {}, - execute: vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 100)); - throw new Error('Timeout waiting for element'); - }), - classifyError: () => ({ - code: 'MM_WAIT_TIMEOUT', - message: 'Wait timeout', - }), - }; - - // Act - const resultPromise = runTool(config); - await vi.advanceTimersByTimeAsync(100); - const result = await resultPromise; - - // Assert - expect(result.ok).toBe(false); - expect(result.meta.durationMs).toBe(100); - - // Cleanup - vi.useRealTimers(); - }); - }); - - describe('getTarget function', () => { - it('extracts target from input when getTarget provided', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig< - { testId?: string; selector?: string; a11yRef?: string }, - object - > = { - toolName: 'mm_click', - input: { testId: 'send-button', selector: '.btn' }, - execute: vi.fn().mockResolvedValue({}), - getTarget: (input) => ({ - testId: input.testId, - selector: input.selector, - a11yRef: input.a11yRef, - }), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith( - expect.objectContaining({ - target: { - testId: 'send-button', - selector: '.btn', - a11yRef: undefined, - }, - }), - ); - }); - - it('records undefined target when getTarget not provided', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig<{ testId: string }, object> = { - toolName: 'mm_click', - input: { testId: 'send-button' }, - execute: vi.fn().mockResolvedValue({}), - }; - - // Act - await runTool(config); - - // Assert - expect(mockKnowledgeStore.recordStep).toHaveBeenCalledWith( - expect.objectContaining({ - target: undefined, - }), - ); - }); - }); - - describe('isToolExecuteResult type guard', () => { - it('handles plain result (not ToolExecuteResult)', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockResolvedValue({ simple: 'value' }), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toStrictEqual({ simple: 'value' }); - } - }); - - it('handles ToolExecuteResult wrapper', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockResolvedValue({ - result: { wrapped: 'value' }, - }), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toStrictEqual({ wrapped: 'value' }); - } - }); - - it('handles null result', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockResolvedValue(null), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toBeNull(); - } - }); - - it('handles primitive result', async () => { - // Arrange - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockResolvedValue('string-result'), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result).toBe('string-result'); - } - }); - }); - - describe('createEmptyObservation', () => { - it('creates empty observation when session has no ID on failure', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined); - vi.spyOn(helpersModule, 'collectObservation').mockResolvedValue({ - state: {} as any, - testIds: [], - a11y: { nodes: [] }, - }); - const config: ToolExecutionConfig = { - toolName: 'mm_test_tool', - input: {}, - execute: vi.fn().mockRejectedValue(new Error('Failed')), - }; - - // Act - const result = await runTool(config); - - // Assert - expect(result.ok).toBe(false); - expect(mockKnowledgeStore.recordStep).not.toHaveBeenCalled(); - }); - }); -}); diff --git a/src/mcp-server/tools/run-tool.ts b/src/mcp-server/tools/run-tool.ts deleted file mode 100644 index d74c206..0000000 --- a/src/mcp-server/tools/run-tool.ts +++ /dev/null @@ -1,220 +0,0 @@ -import type { Page } from '@playwright/test'; - -import type { ExtensionState } from '../../capabilities/types.js'; -import { knowledgeStore } from '../knowledge-store.js'; -import { getSessionManager } from '../session-manager.js'; -import { collectObservation } from './helpers.js'; -import type { - McpResponse, - HandlerOptions, - StepRecordObservation, - ErrorCode, -} from '../types'; -import { ErrorCodes } from '../types'; -import { - createSuccessResponse, - createErrorResponse, - extractErrorMessage, - debugWarn, -} from '../utils'; - -/** - * Creates an empty observation object for step recording. - * - * @returns Empty observation with default state, testIds, and a11y nodes - */ -function createEmptyObservation(): StepRecordObservation { - return { - state: {} as ExtensionState, - testIds: [], - a11y: { nodes: [] }, - }; -} - -export type ObservationPolicy = 'none' | 'default' | 'custom' | 'failures'; - -export type ToolExecutionContext = { - sessionId: string | undefined; - page: Page; - refMap: Map; - startTime: number; -}; - -export type ToolExecuteResult = { - result: TResult; - observation?: StepRecordObservation; -}; - -export type ToolExecutionConfig = { - toolName: string; - input: TInput; - options?: HandlerOptions; - requiresSession?: boolean; - observationPolicy?: ObservationPolicy; - execute: ( - context: ToolExecutionContext, - ) => Promise>; - classifyError?: (error: unknown) => { - code: string; - message: string; - }; - getTarget?: (input: TInput) => - | { - testId?: string; - selector?: string; - a11yRef?: string; - } - | undefined; - sanitizeInputForRecording?: (input: TInput) => Record; -}; - -/** - * Type guard to check if result is a ToolExecuteResult with observation. - * - * @param result The result to check - * @returns True if result is a ToolExecuteResult with observation property - */ -function isToolExecuteResult( - result: TResult | ToolExecuteResult, -): result is ToolExecuteResult { - return ( - typeof result === 'object' && - result !== null && - 'result' in result && - Object.prototype.hasOwnProperty.call(result, 'result') - ); -} - -/** - * Executes a tool with error handling, observation collection, and knowledge store recording. - * - * @param config The tool execution configuration with input, execute function, and error handling - * @returns Promise resolving to MCP response with tool result or error information - */ -export async function runTool( - config: ToolExecutionConfig, -): Promise> { - const startTime = Date.now(); - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - const requiresSession = config.requiresSession ?? true; - - const effectivePolicy = - config.options?.observationPolicy ?? config.observationPolicy ?? 'default'; - - try { - if (requiresSession && !sessionManager.hasActiveSession()) { - return createErrorResponse( - ErrorCodes.MM_NO_ACTIVE_SESSION, - 'No active session. Call launch first.', - { input: config.input }, - undefined, - startTime, - ); - } - - const context: ToolExecutionContext = { - sessionId, - page: requiresSession ? sessionManager.getPage() : (undefined as never), - refMap: requiresSession ? sessionManager.getRefMap() : new Map(), - startTime, - }; - - const executeResult = await config.execute(context); - - let result: TResult; - let customObservation: StepRecordObservation | undefined; - - if (isToolExecuteResult(executeResult)) { - result = executeResult.result; - customObservation = executeResult.observation; - } else { - result = executeResult; - } - - let observation: StepRecordObservation | undefined; - - if (effectivePolicy === 'custom' && customObservation) { - observation = customObservation; - } else if (effectivePolicy === 'default' && requiresSession) { - observation = await collectObservation(context.page, 'full'); - } else if ( - (effectivePolicy === 'none' || effectivePolicy === 'failures') && - requiresSession - ) { - observation = await collectObservation(context.page, 'minimal'); - } - - if (sessionId) { - const recordInput = config.sanitizeInputForRecording - ? config.sanitizeInputForRecording(config.input) - : (config.input as Record); - - await knowledgeStore.recordStep({ - sessionId, - toolName: config.toolName, - input: recordInput, - target: config.getTarget?.(config.input), - outcome: { ok: true }, - observation: observation ?? createEmptyObservation(), - durationMs: Date.now() - startTime, - context: sessionManager.getEnvironmentMode(), - }); - } - - return createSuccessResponse(result, sessionId, startTime); - } catch (error) { - const errorInfo = config.classifyError?.(error) ?? { - code: `MM_${config.toolName.toUpperCase().replace(/^MM_/u, '')}_FAILED`, - message: extractErrorMessage(error), - }; - - let failureObservation: StepRecordObservation = createEmptyObservation(); - - if (requiresSession && sessionManager.hasActiveSession()) { - if (effectivePolicy === 'failures' || effectivePolicy === 'default') { - try { - const page = sessionManager.getPage(); - failureObservation = await collectObservation(page, 'full'); - } catch (collectError) { - debugWarn('run-tool.collectObservation', collectError); - failureObservation = await collectObservation(undefined, 'minimal'); - } - } else if (effectivePolicy === 'none') { - try { - failureObservation = await collectObservation(undefined, 'minimal'); - } catch (collectError) { - debugWarn('run-tool.collectObservation', collectError); - } - } - } - - if (sessionId) { - const recordInput = config.sanitizeInputForRecording - ? config.sanitizeInputForRecording(config.input) - : (config.input as Record); - - await knowledgeStore.recordStep({ - sessionId, - toolName: config.toolName, - input: recordInput, - target: config.getTarget?.(config.input), - outcome: { - ok: false, - error: { code: errorInfo.code, message: errorInfo.message }, - }, - observation: failureObservation, - durationMs: Date.now() - startTime, - context: sessionManager.getEnvironmentMode(), - }); - } - - return createErrorResponse( - errorInfo.code as ErrorCode, - errorInfo.message, - { input: config.input }, - sessionId, - startTime, - ); - } -} diff --git a/src/mcp-server/tools/screenshot.test.ts b/src/mcp-server/tools/screenshot.test.ts deleted file mode 100644 index b21fda8..0000000 --- a/src/mcp-server/tools/screenshot.test.ts +++ /dev/null @@ -1,307 +0,0 @@ -/** - * Unit tests for screenshot tool handler. - * - * Tests handleScreenshot with various options including base64 encoding, - * selector scoping, and error handling. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { handleScreenshot } from './screenshot.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types/errors.js'; - -describe('screenshot', () => { - let mockSessionManager: ReturnType; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - sessionMetadata: { - schemaVersion: 1, - sessionId: 'test-session-123', - createdAt: new Date().toISOString(), - flowTags: [], - tags: [], - launch: { stateMode: 'default' }, - }, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleScreenshot', () => { - describe('basic screenshot', () => { - it('captures full page screenshot by default', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - base64: undefined, - }); - - // Act - const result = await handleScreenshot({ name: 'test-screenshot' }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.path).toBe('/path/to/screenshot.png'); - expect(result.result.width).toBe(1280); - expect(result.result.height).toBe(720); - expect(result.result.base64).toBeUndefined(); - } - expect(mockSessionManager.screenshot).toHaveBeenCalledWith({ - name: 'test-screenshot', - fullPage: true, - selector: undefined, - }); - }); - - it('captures viewport-only screenshot when fullPage is false', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - base64: undefined, - }); - - // Act - const result = await handleScreenshot({ - name: 'viewport-screenshot', - fullPage: false, - }); - - // Assert - expect(result.ok).toBe(true); - expect(mockSessionManager.screenshot).toHaveBeenCalledWith({ - name: 'viewport-screenshot', - fullPage: false, - selector: undefined, - }); - }); - }); - - describe('with base64 encoding', () => { - it('includes base64 when includeBase64 is true', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - base64: - 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', - }); - - // Act - const result = await handleScreenshot({ - name: 'base64-screenshot', - includeBase64: true, - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.base64).toBe( - 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', - ); - } - }); - - it('excludes base64 when includeBase64 is false', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - base64: - 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', - }); - - // Act - const result = await handleScreenshot({ - name: 'no-base64-screenshot', - includeBase64: false, - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.base64).toBeUndefined(); - } - }); - }); - - describe('with selector scoping', () => { - it('captures screenshot of specific element', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/element-screenshot.png', - width: 400, - height: 200, - base64: undefined, - }); - - // Act - const result = await handleScreenshot({ - name: 'element-screenshot', - selector: '[data-testid="account-menu"]', - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.width).toBe(400); - expect(result.result.height).toBe(200); - } - expect(mockSessionManager.screenshot).toHaveBeenCalledWith({ - name: 'element-screenshot', - fullPage: true, - selector: '[data-testid="account-menu"]', - }); - }); - - it('combines selector with fullPage false', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/element-screenshot.png', - width: 400, - height: 200, - base64: undefined, - }); - - // Act - const result = await handleScreenshot({ - name: 'element-viewport-screenshot', - selector: '.modal-content', - fullPage: false, - }); - - // Assert - expect(result.ok).toBe(true); - expect(mockSessionManager.screenshot).toHaveBeenCalledWith({ - name: 'element-viewport-screenshot', - fullPage: false, - selector: '.modal-content', - }); - }); - }); - - describe('error handling', () => { - it('returns error when no active session', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleScreenshot({ name: 'test-screenshot' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - - it('returns error when screenshot fails', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockRejectedValue( - new Error('Screenshot failed'), - ); - - // Act - const result = await handleScreenshot({ name: 'test-screenshot' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SCREENSHOT_FAILED); - expect(result.error.message).toContain('Screenshot failed'); - } - }); - - it('returns error when page is closed', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockRejectedValue( - new Error('Target page, context or browser has been closed'), - ); - - // Act - const result = await handleScreenshot({ name: 'test-screenshot' }); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED); - } - }); - }); - - describe('input sanitization', () => { - it('sanitizes input for knowledge store recording', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'screenshot').mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - base64: 'very-long-base64-string-that-should-not-be-recorded', - }); - - const recordStepMock = vi.fn().mockResolvedValue(undefined); - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue( - { - recordStep: recordStepMock, - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi.fn().mockResolvedValue({ - sessionId: 'test', - stepCount: 0, - recipe: [], - }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any, - ); - - // Act - await handleScreenshot({ - name: 'test-screenshot', - includeBase64: true, - selector: '[data-testid="test"]', - }); - - // Assert - expect(recordStepMock).toHaveBeenCalled(); - const recordedInput = recordStepMock.mock.calls[0][0].input; - expect(recordedInput).toStrictEqual({ - name: 'test-screenshot', - fullPage: undefined, - selector: '[data-testid="test"]', - }); - expect(recordedInput.includeBase64).toBeUndefined(); - }); - }); - }); -}); diff --git a/src/mcp-server/tools/screenshot.ts b/src/mcp-server/tools/screenshot.ts deleted file mode 100644 index d6696ad..0000000 --- a/src/mcp-server/tools/screenshot.ts +++ /dev/null @@ -1,67 +0,0 @@ -import { getSessionManager } from '../session-manager.js'; -import { classifyScreenshotError } from './error-classification.js'; -import { runTool } from './run-tool.js'; -import type { - ScreenshotInput, - ScreenshotToolResult, - McpResponse, - HandlerOptions, -} from '../types'; - -/** - * Handles the screenshot tool request. - * - * @param input - The screenshot input parameters. - * @param options - Handler options including abort signal. - * @returns Response with screenshot path and dimensions. - */ -export async function handleScreenshot( - input: ScreenshotInput, - options?: HandlerOptions, -): Promise> { - return runTool({ - toolName: 'mm_screenshot', - input, - options, - observationPolicy: 'none', - - /** - * Executes the screenshot capture. - * - * @returns The screenshot result. - */ - execute: async () => { - const sessionManager = getSessionManager(); - const result = await sessionManager.screenshot({ - name: input.name, - fullPage: input.fullPage ?? true, - selector: input.selector, - }); - - const response: ScreenshotToolResult = { - path: result.path, - width: result.width, - height: result.height, - }; - - if (input.includeBase64) { - response.base64 = result.base64; - } - - return response; - }, - - classifyError: classifyScreenshotError, - - /** - * Sanitizes input for knowledge store recording. - * - * @returns Sanitized input object. - */ - sanitizeInputForRecording: () => ({ - name: input.name, - fullPage: input.fullPage, - selector: input.selector, - }), - }); -} diff --git a/src/mcp-server/tools/seeding.test.ts b/src/mcp-server/tools/seeding.test.ts deleted file mode 100644 index e77efbb..0000000 --- a/src/mcp-server/tools/seeding.test.ts +++ /dev/null @@ -1,552 +0,0 @@ -/** - * Unit tests for seeding tool handlers. - * - * Tests contract deployment handlers including single/multiple contract deployment, - * address lookup, and contract listing with ContractSeedingCapability. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { - handleSeedContract, - handleSeedContracts, - handleGetContractAddress, - handleListDeployedContracts, -} from './seeding.js'; -import type { ContractSeedingCapability } from '../../capabilities/types.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types/errors.js'; - -describe('seeding', () => { - let mockSessionManager: ReturnType; - let mockSeedingCapability: ContractSeedingCapability; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - sessionMetadata: { - schemaVersion: 1, - sessionId: 'test-session-123', - createdAt: new Date().toISOString(), - flowTags: [], - tags: [], - launch: { stateMode: 'default' }, - }, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - - // Create fresh mock seeding capability - mockSeedingCapability = { - deployContract: vi.fn(), - deployContracts: vi.fn(), - getContractAddress: vi.fn(), - listDeployedContracts: vi.fn(), - getAvailableContracts: vi.fn(), - clearRegistry: vi.fn(), - initialize: vi.fn(), - }; - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleSeedContract', () => { - it('deploys a single contract successfully', async () => { - // Arrange - const deployedAt = new Date().toISOString(); - const mockedDeployContract = vi - .spyOn(mockSeedingCapability, 'deployContract') - .mockResolvedValue({ - name: 'hst', - address: '0x1234567890123456789012345678901234567890', - deployedAt, - }); - - // Act - const result = await handleSeedContract( - { contractName: 'hst' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.contractName).toBe('hst'); - expect(result.result.contractAddress).toBe( - '0x1234567890123456789012345678901234567890', - ); - expect(result.result.deployedAt).toBe(deployedAt); - } - expect(mockedDeployContract).toHaveBeenCalledWith('hst', { - hardfork: undefined, - deployerOptions: undefined, - }); - }); - - it('deploys contract with custom hardfork', async () => { - // Arrange - const deployedAt = new Date().toISOString(); - const mockedDeployContract = vi - .spyOn(mockSeedingCapability, 'deployContract') - .mockResolvedValue({ - name: 'nfts', - address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', - deployedAt, - }); - - // Act - const result = await handleSeedContract( - { contractName: 'nfts', hardfork: 'shanghai' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - expect(mockedDeployContract).toHaveBeenCalledWith('nfts', { - hardfork: 'shanghai', - deployerOptions: undefined, - }); - }); - - it('deploys contract with deployer options', async () => { - // Arrange - const deployedAt = new Date().toISOString(); - const mockedDeployContract = vi - .spyOn(mockSeedingCapability, 'deployContract') - .mockResolvedValue({ - name: 'piggybank', - address: '0x9876543210987654321098765432109876543210', - deployedAt, - }); - - // Act - const result = await handleSeedContract( - { - contractName: 'piggybank', - deployerOptions: { - fromAddress: '0x1111111111111111111111111111111111111111', - }, - }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - expect(mockedDeployContract).toHaveBeenCalledWith('piggybank', { - hardfork: undefined, - deployerOptions: { - fromAddress: '0x1111111111111111111111111111111111111111', - }, - }); - }); - - it('returns error when seeding capability not available', async () => { - // Act - const result = await handleSeedContract( - { contractName: 'hst' }, - { seedingCapability: undefined }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); - expect(result.error.message).toContain( - 'ContractSeedingCapability not available', - ); - } - }); - - it('returns error when deployment fails', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'deployContract').mockRejectedValue( - new Error('Contract not found: unknown'), - ); - - // Act - const result = await handleSeedContract( - { contractName: 'hst' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CONTRACT_NOT_FOUND); - expect(result.error.message).toContain('Contract not found'); - } - }); - - it('returns error when deployment fails with generic error', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'deployContract').mockRejectedValue( - new Error('Deployment failed'), - ); - - // Act - const result = await handleSeedContract( - { contractName: 'hst' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); - expect(result.error.message).toContain('Deployment failed'); - } - }); - }); - - describe('handleSeedContracts', () => { - it('deploys multiple contracts successfully', async () => { - // Arrange - const deployedAt1 = new Date().toISOString(); - const deployedAt2 = new Date(Date.now() + 1000).toISOString(); - const mockedDeployContracts = vi - .spyOn(mockSeedingCapability, 'deployContracts') - .mockResolvedValue({ - deployed: [ - { - name: 'hst', - address: '0x1234567890123456789012345678901234567890', - deployedAt: deployedAt1, - }, - { - name: 'nfts', - address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', - deployedAt: deployedAt2, - }, - ], - failed: [], - }); - - // Act - const result = await handleSeedContracts( - { contracts: ['hst', 'nfts'] }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.deployed).toHaveLength(2); - expect(result.result.deployed[0].contractName).toBe('hst'); - expect(result.result.deployed[1].contractName).toBe('nfts'); - expect(result.result.failed).toHaveLength(0); - } - expect(mockedDeployContracts).toHaveBeenCalledWith(['hst', 'nfts'], { - hardfork: undefined, - }); - }); - - it('deploys contracts with custom hardfork', async () => { - // Arrange - const deployedAt = new Date().toISOString(); - const mockedDeployContracts = vi - .spyOn(mockSeedingCapability, 'deployContracts') - .mockResolvedValue({ - deployed: [ - { - name: 'hst', - address: '0x1234567890123456789012345678901234567890', - deployedAt, - }, - ], - failed: [], - }); - - // Act - const result = await handleSeedContracts( - { contracts: ['hst'], hardfork: 'shanghai' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - expect(mockedDeployContracts).toHaveBeenCalledWith(['hst'], { - hardfork: 'shanghai', - }); - }); - - it('handles partial deployment failures', async () => { - // Arrange - const deployedAt = new Date().toISOString(); - vi.spyOn(mockSeedingCapability, 'deployContracts').mockResolvedValue({ - deployed: [ - { - name: 'hst', - address: '0x1234567890123456789012345678901234567890', - deployedAt, - }, - ], - failed: [ - { - name: 'nfts', - error: 'Contract deployment failed', - }, - ], - }); - - // Act - const result = await handleSeedContracts( - { contracts: ['hst', 'nfts'] }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.deployed).toHaveLength(1); - expect(result.result.failed).toHaveLength(1); - expect(result.result.failed[0].contractName).toBe('nfts'); - expect(result.result.failed[0].error).toBe( - 'Contract deployment failed', - ); - } - }); - - it('returns error when seeding capability not available', async () => { - // Act - const result = await handleSeedContracts( - { contracts: ['hst'] }, - { seedingCapability: undefined }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); - expect(result.error.message).toContain( - 'ContractSeedingCapability not available', - ); - } - }); - - it('returns error when deployment fails completely', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'deployContracts').mockRejectedValue( - new Error('Anvil not running'), - ); - - // Act - const result = await handleSeedContracts( - { contracts: ['hst'] }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); - expect(result.error.message).toContain('Anvil not running'); - } - }); - }); - - describe('handleGetContractAddress', () => { - it('returns contract address when found', async () => { - // Arrange - const mockedGetContractAddress = vi - .spyOn(mockSeedingCapability, 'getContractAddress') - .mockReturnValue('0x1234567890123456789012345678901234567890'); - - // Act - const result = await handleGetContractAddress( - { contractName: 'hst' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.contractName).toBe('hst'); - expect(result.result.contractAddress).toBe( - '0x1234567890123456789012345678901234567890', - ); - } - expect(mockedGetContractAddress).toHaveBeenCalledWith('hst'); - }); - - it('returns null when contract not found', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'getContractAddress').mockReturnValue( - null, - ); - - // Act - const result = await handleGetContractAddress( - { contractName: 'nfts' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.contractName).toBe('nfts'); - expect(result.result.contractAddress).toBeNull(); - } - }); - - it('returns error when seeding capability not available', async () => { - // Act - const result = await handleGetContractAddress( - { contractName: 'hst' }, - { seedingCapability: undefined }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); - expect(result.error.message).toContain( - 'ContractSeedingCapability not available', - ); - } - }); - - it('returns error when lookup fails', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'getContractAddress').mockImplementation( - () => { - throw new Error('Registry error'); - }, - ); - - // Act - const result = await handleGetContractAddress( - { contractName: 'hst' }, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); - expect(result.error.message).toContain('Registry error'); - } - }); - }); - - describe('handleListDeployedContracts', () => { - it('returns list of deployed contracts', async () => { - // Arrange - const deployedAt1 = new Date().toISOString(); - const deployedAt2 = new Date(Date.now() + 1000).toISOString(); - const mockedListDeployedContracts = vi - .spyOn(mockSeedingCapability, 'listDeployedContracts') - .mockReturnValue([ - { - name: 'hst', - address: '0x1234567890123456789012345678901234567890', - deployedAt: deployedAt1, - }, - { - name: 'nfts', - address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', - deployedAt: deployedAt2, - }, - ]); - - // Act - const result = await handleListDeployedContracts( - {}, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.contracts).toHaveLength(2); - expect(result.result.contracts[0].contractName).toBe('hst'); - expect(result.result.contracts[0].contractAddress).toBe( - '0x1234567890123456789012345678901234567890', - ); - expect(result.result.contracts[0].deployedAt).toBe(deployedAt1); - expect(result.result.contracts[1].contractName).toBe('nfts'); - expect(result.result.contracts[1].contractAddress).toBe( - '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', - ); - expect(result.result.contracts[1].deployedAt).toBe(deployedAt2); - } - expect(mockedListDeployedContracts).toHaveBeenCalled(); - }); - - it('returns empty list when no contracts deployed', async () => { - // Arrange - vi.spyOn(mockSeedingCapability, 'listDeployedContracts').mockReturnValue( - [], - ); - - // Act - const result = await handleListDeployedContracts( - {}, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.contracts).toHaveLength(0); - } - }); - - it('returns error when seeding capability not available', async () => { - // Act - const result = await handleListDeployedContracts( - {}, - { seedingCapability: undefined }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); - expect(result.error.message).toContain( - 'ContractSeedingCapability not available', - ); - } - }); - - it('returns error when listing fails', async () => { - // Arrange - vi.spyOn( - mockSeedingCapability, - 'listDeployedContracts', - ).mockImplementation(() => { - throw new Error('Registry error'); - }); - - // Act - const result = await handleListDeployedContracts( - {}, - { seedingCapability: mockSeedingCapability }, - ); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); - expect(result.error.message).toContain('Registry error'); - } - }); - }); -}); diff --git a/src/mcp-server/tools/seeding.ts b/src/mcp-server/tools/seeding.ts deleted file mode 100644 index 0fd578c..0000000 --- a/src/mcp-server/tools/seeding.ts +++ /dev/null @@ -1,327 +0,0 @@ -import type { ContractSeedingCapability } from '../../capabilities/types.js'; -import { getSessionManager } from '../session-manager.js'; -import { classifySeedingError } from './error-classification.js'; -import { runTool } from './run-tool.js'; -import type { - SeedContractInput, - SeedContractsInput, - GetContractAddressInput, - ListDeployedContractsInput, - SeedContractResult, - SeedContractsResult, - GetContractAddressResult, - ListDeployedContractsResult, - McpResponse, - HandlerOptions, -} from '../types'; -import { ErrorCodes } from '../types'; -import { createErrorResponse } from '../utils'; - -export type SeedingToolOptions = HandlerOptions & { - seedingCapability?: ContractSeedingCapability; -}; - -/** - * Validates that the seeding capability is available, returning either the capability or an error response. - * - * @param toolName The name of the tool requesting the capability - * @param input The input provided to the tool - * @param options Tool options containing the seeding capability - * @param startTime Timestamp when the tool execution started - * @returns The seeding capability if available, or an error response if not - */ -function checkSeedingCapability( - toolName: string, - input: unknown, - options: SeedingToolOptions | undefined, - startTime: number, -): McpResponse | ContractSeedingCapability { - const sessionManager = getSessionManager(); - const sessionId = sessionManager.getSessionId(); - - if (!options?.seedingCapability) { - return createErrorResponse( - ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE, - `ContractSeedingCapability not available. The ${toolName} tool requires running in e2e mode with the MetaMask extension wrapper, which provides Anvil chain and contract deployment support.`, - { capability: 'ContractSeedingCapability', input }, - sessionId, - startTime, - ) as McpResponse; - } - - return options.seedingCapability; -} - -/** - * Type guard to check if a result is a ContractSeedingCapability. - * - * @param result The value to check - * @returns True if result is a ContractSeedingCapability, false if it's an error response - */ -function isCapability( - result: McpResponse | ContractSeedingCapability, -): result is ContractSeedingCapability { - return ( - typeof result === 'object' && result !== null && 'deployContract' in result - ); -} - -/** - * Handles the mm_seed_contract tool to deploy a single smart contract. - * - * @param input The contract name and deployment options - * @param options Tool options including seeding capability - * @returns Promise resolving to the deployment result with contract address - */ -export async function handleSeedContract( - input: SeedContractInput, - options?: SeedingToolOptions, -): Promise> { - const startTime = Date.now(); - const capabilityOrError = checkSeedingCapability( - 'mm_seed_contract', - input, - options, - startTime, - ); - - if (!isCapability(capabilityOrError)) { - return capabilityOrError; - } - - const seedingCapability = capabilityOrError; - - return runTool({ - toolName: 'mm_seed_contract', - input, - options, - observationPolicy: 'none', - - /** - * Executes the contract deployment using the seeding capability. - * - * @returns The deployed contract details including name, address, and timestamp - */ - execute: async () => { - const deployed = await seedingCapability.deployContract( - input.contractName, - { - hardfork: input.hardfork, - deployerOptions: input.deployerOptions, - }, - ); - - return { - contractName: deployed.name, - contractAddress: deployed.address, - deployedAt: deployed.deployedAt, - }; - }, - - classifyError: classifySeedingError, - - /** - * Sanitizes the input for recording in the knowledge store. - * - * @returns The sanitized input containing contract name and hardfork - */ - sanitizeInputForRecording: () => ({ - contractName: input.contractName, - hardfork: input.hardfork ?? 'prague', - }), - }); -} - -/** - * Handles the mm_seed_contracts tool to deploy multiple smart contracts. - * - * @param input The list of contract names and deployment options - * @param options Tool options including seeding capability - * @returns Promise resolving to deployment results with deployed and failed contracts - */ -export async function handleSeedContracts( - input: SeedContractsInput, - options?: SeedingToolOptions, -): Promise> { - const startTime = Date.now(); - const capabilityOrError = checkSeedingCapability( - 'mm_seed_contracts', - input, - options, - startTime, - ); - - if (!isCapability(capabilityOrError)) { - return capabilityOrError; - } - - const seedingCapability = capabilityOrError; - - return runTool({ - toolName: 'mm_seed_contracts', - input, - options, - observationPolicy: 'none', - - /** - * Executes the multi-contract deployment using the seeding capability. - * - * @returns The deployment results with deployed and failed contract lists - */ - execute: async () => { - const seedResult = await seedingCapability.deployContracts( - input.contracts, - { hardfork: input.hardfork }, - ); - - return { - deployed: seedResult.deployed.map((deployedContract) => ({ - contractName: deployedContract.name, - contractAddress: deployedContract.address, - deployedAt: deployedContract.deployedAt, - })), - failed: seedResult.failed.map((failedDeployment) => ({ - contractName: failedDeployment.name, - error: failedDeployment.error, - })), - }; - }, - - classifyError: classifySeedingError, - - /** - * Sanitizes the input for recording in the knowledge store. - * - * @returns The sanitized input containing contracts list and hardfork - */ - sanitizeInputForRecording: () => ({ - contracts: input.contracts, - hardfork: input.hardfork ?? 'prague', - }), - }); -} - -/** - * Handles the mm_get_contract_address tool to retrieve a deployed contract's address. - * - * @param input The contract name to look up - * @param options Tool options including seeding capability - * @returns Promise resolving to the contract address or null if not found - */ -export async function handleGetContractAddress( - input: GetContractAddressInput, - options?: SeedingToolOptions, -): Promise> { - const startTime = Date.now(); - const capabilityOrError = checkSeedingCapability( - 'mm_get_contract_address', - input, - options, - startTime, - ); - - if (!isCapability(capabilityOrError)) { - return capabilityOrError; - } - - const seedingCapability = capabilityOrError; - - return runTool({ - toolName: 'mm_get_contract_address', - input, - options, - observationPolicy: 'none', - - /** - * Executes the contract address lookup using the seeding capability. - * - * @returns The contract name and its deployed address - */ - execute: async () => { - const address = seedingCapability.getContractAddress(input.contractName); - - return { - contractName: input.contractName, - contractAddress: address, - }; - }, - - classifyError: classifySeedingError, - - /** - * Sanitizes the input for recording in the knowledge store. - * - * @returns The sanitized input containing the contract name - */ - sanitizeInputForRecording: () => ({ - contractName: input.contractName, - }), - }); -} - -/** - * Handles the mm_list_contracts tool to list all deployed contracts in the session. - * - * @param _input Unused input parameter (no input required for this tool) - * @param options Tool options including seeding capability - * @returns Promise resolving to a list of all deployed contracts with their addresses - */ -export async function handleListDeployedContracts( - _input: ListDeployedContractsInput, - options?: SeedingToolOptions, -): Promise> { - const startTime = Date.now(); - const capabilityOrError = checkSeedingCapability( - 'mm_list_contracts', - _input, - options, - startTime, - ); - - if (!isCapability(capabilityOrError)) { - return capabilityOrError; - } - - const seedingCapability = capabilityOrError; - - return runTool({ - toolName: 'mm_list_contracts', - input: _input, - options, - observationPolicy: 'none', - - /** - * Executes the contract listing using the seeding capability. - * - * @returns The list of all deployed contracts with their details - */ - execute: async () => { - const deployed = seedingCapability.listDeployedContracts(); - - return { - contracts: deployed.map( - (deployedContract: { - /** - * The contract name - */ - name: string; - /** - * The contract's deployed address - */ - address: string; - /** - * The deployment timestamp - */ - deployedAt: string; - }) => ({ - contractName: deployedContract.name, - contractAddress: deployedContract.address, - deployedAt: deployedContract.deployedAt, - }), - ), - }; - }, - - classifyError: classifySeedingError, - }); -} diff --git a/src/mcp-server/tools/state.test.ts b/src/mcp-server/tools/state.test.ts deleted file mode 100644 index 902e230..0000000 --- a/src/mcp-server/tools/state.test.ts +++ /dev/null @@ -1,358 +0,0 @@ -/** - * Unit tests for state tool handler. - * - * Tests handleGetState with various scenarios including state snapshot capability, - * tab tracking, and error handling. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; - -import { handleGetState } from './state.js'; -import type { StateSnapshotCapability } from '../../capabilities/types.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager, createMockPage } from '../test-utils'; -import { ErrorCodes } from '../types/errors.js'; - -describe('state', () => { - let mockSessionManager: ReturnType; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ - hasActive: true, - sessionId: 'test-session-123', - sessionMetadata: { - schemaVersion: 1, - sessionId: 'test-session-123', - createdAt: new Date().toISOString(), - flowTags: [], - tags: [], - launch: { stateMode: 'default' }, - }, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - // Mock knowledge store to prevent "not initialized" errors - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi - .fn() - .mockResolvedValue({ sessionId: 'test', stepCount: 0, recipe: [] }), - listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - } as any); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - describe('handleGetState', () => { - describe('without state snapshot capability', () => { - it('returns extension state from session manager', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234567890123456789012345678901234567890', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.state).toStrictEqual({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234567890123456789012345678901234567890', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }); - expect(result.result.tabs).toStrictEqual({ - active: { - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - tracked: [ - { - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ], - }); - } - expect(mockSessionManager.getExtensionState).toHaveBeenCalled(); - }); - - it('includes multiple tracked pages in tabs', async () => { - // Arrange - const mockExtensionPage = createMockPage(); - vi.spyOn(mockExtensionPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - const mockDappPage = createMockPage(); - vi.spyOn(mockDappPage, 'url').mockReturnValue( - 'https://app.uniswap.org', - ); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue( - mockExtensionPage, - ); - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234567890123456789012345678901234567890', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockExtensionPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - { - page: mockDappPage, - role: 'dapp', - url: 'https://app.uniswap.org', - }, - ]); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.tabs).toBeDefined(); - expect(result.result.tabs?.tracked).toHaveLength(2); - expect(result.result.tabs?.tracked).toStrictEqual([ - { role: 'extension', url: 'chrome-extension://ext-123/home.html' }, - { role: 'dapp', url: 'https://app.uniswap.org' }, - ]); - } - }); - - it('handles active page without tracked page info', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getExtensionState').mockResolvedValue({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: false, - currentScreen: 'home', - accountAddress: null, - networkName: null, - chainId: null, - balance: null, - }); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([]); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.tabs).toBeDefined(); - expect(result.result.tabs?.active.role).toBe('other'); - expect(result.result.tabs?.active.url).toBe( - 'chrome-extension://ext-123/home.html', - ); - } - }); - }); - - describe('with state snapshot capability', () => { - it('uses state snapshot capability when provided', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getSessionState').mockReturnValue({ - extensionId: 'ext-123', - ports: { anvil: 8545 }, - }); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - const mockStateSnapshot: StateSnapshotCapability = { - getState: vi.fn().mockResolvedValue({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234567890123456789012345678901234567890', - networkName: 'Localhost 8545', - chainId: 1337, - balance: '25 ETH', - }), - detectCurrentScreen: vi.fn().mockResolvedValue('home'), - }; - - // Act - const result = await handleGetState({ - stateSnapshotCapability: mockStateSnapshot, - }); - - // Assert - expect(result.ok).toBe(true); - if (result.ok) { - expect(result.result.state.chainId).toBe(1337); - expect(result.result.state.networkName).toBe('Localhost 8545'); - expect(result.result.state.balance).toBe('25 ETH'); - } - expect(mockStateSnapshot.getState).toHaveBeenCalledWith(mockPage, { - extensionId: 'ext-123', - chainId: 1337, - }); - expect(mockSessionManager.getExtensionState).not.toHaveBeenCalled(); - }); - - it('uses chainId 1 when anvil port not present', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockPage, 'url').mockReturnValue( - 'chrome-extension://ext-123/home.html', - ); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getSessionState').mockReturnValue({ - extensionId: 'ext-123', - ports: {}, - }); - vi.spyOn(mockSessionManager, 'getTrackedPages').mockReturnValue([ - { - page: mockPage, - role: 'extension', - url: 'chrome-extension://ext-123/home.html', - }, - ]); - - const mockStateSnapshot: StateSnapshotCapability = { - getState: vi.fn().mockResolvedValue({ - isLoaded: true, - currentUrl: 'chrome-extension://ext-123/home.html', - extensionId: 'ext-123', - isUnlocked: true, - currentScreen: 'home', - accountAddress: '0x1234567890123456789012345678901234567890', - networkName: 'Ethereum Mainnet', - chainId: 1, - balance: '1.5 ETH', - }), - detectCurrentScreen: vi.fn().mockResolvedValue('home'), - }; - - // Act - const result = await handleGetState({ - stateSnapshotCapability: mockStateSnapshot, - }); - - // Assert - expect(result.ok).toBe(true); - expect(mockStateSnapshot.getState).toHaveBeenCalledWith(mockPage, { - extensionId: 'ext-123', - chainId: 1, - }); - }); - }); - - describe('error handling', () => { - it('returns error when no active session', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'hasActiveSession').mockReturnValue(false); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); - } - }); - - it('returns error when getExtensionState fails', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getExtensionState').mockRejectedValue( - new Error('Failed to get state'), - ); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_STATE_FAILED); - expect(result.error.message).toContain('Failed to get state'); - } - }); - - it('returns error when page is closed', async () => { - // Arrange - const mockPage = createMockPage(); - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(mockPage); - vi.spyOn(mockSessionManager, 'getExtensionState').mockRejectedValue( - new Error('Target page, context or browser has been closed'), - ); - - // Act - const result = await handleGetState(); - - // Assert - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED); - } - }); - }); - }); -}); diff --git a/src/mcp-server/tools/state.ts b/src/mcp-server/tools/state.ts deleted file mode 100644 index 8d1f71c..0000000 --- a/src/mcp-server/tools/state.ts +++ /dev/null @@ -1,102 +0,0 @@ -import type { Page } from 'playwright'; - -import { classifyStateError } from './error-classification.js'; -import { collectObservation } from './helpers.js'; -import { runTool } from './run-tool.js'; -import type { - StateSnapshotCapability, - ExtensionState, -} from '../../capabilities/types.js'; -import { getSessionManager } from '../session-manager.js'; -import type { GetStateResult, McpResponse, HandlerOptions } from '../types'; - -/** - * Tool options for state-related operations. - */ -export type StateToolOptions = HandlerOptions & { - /** - * Optional capability for taking state snapshots - */ - stateSnapshotCapability?: StateSnapshotCapability; -}; - -/** - * Retrieves the current extension state, using the snapshot capability if available. - * - * @param page The Playwright page object to query - * @param sessionManager The session manager instance - * @param stateSnapshotCapability Optional capability for detailed state snapshots - * @returns Promise resolving to the current extension state - */ -async function getState( - page: Page, - sessionManager: ReturnType, - stateSnapshotCapability?: StateSnapshotCapability, -): Promise { - if (stateSnapshotCapability) { - const extensionId = sessionManager.getSessionState()?.extensionId; - return stateSnapshotCapability.getState(page, { - extensionId, - chainId: sessionManager.getSessionState()?.ports?.anvil ? 1337 : 1, - }); - } - return sessionManager.getExtensionState(); -} - -/** - * Handles the mm_get_state tool to retrieve the current extension state. - * - * @param options Tool options including optional state snapshot capability - * @returns Promise resolving to the current extension state and tab information - */ -export async function handleGetState( - options?: StateToolOptions, -): Promise> { - return runTool, GetStateResult>({ - toolName: 'mm_get_state', - input: {}, - options, - observationPolicy: 'custom', - - /** - * Executes the state retrieval with tab and observation information. - * - * @param context The tool execution context containing the page - * @returns The extension state, tab information, and observation data - */ - execute: async (context) => { - const sessionManager = getSessionManager(); - const state = await getState( - context.page, - sessionManager, - options?.stateSnapshotCapability, - ); - - const trackedPages = sessionManager.getTrackedPages(); - const activePage = sessionManager.getPage(); - const activeTabInfo = trackedPages.find( - (trackedPage) => trackedPage.page === activePage, - ); - - const tabs = { - active: { - role: activeTabInfo?.role ?? 'other', - url: activePage.url(), - }, - tracked: trackedPages.map((trackedPage) => ({ - role: trackedPage.role, - url: trackedPage.url, - })), - }; - - const observation = await collectObservation(context.page, 'full', state); - - return { - result: { state, tabs }, - observation, - }; - }, - - classifyError: classifyStateError, - }); -} diff --git a/src/mcp-server/types/responses.ts b/src/mcp-server/types/responses.ts deleted file mode 100644 index edb96c4..0000000 --- a/src/mcp-server/types/responses.ts +++ /dev/null @@ -1,27 +0,0 @@ -export type ResponseMeta = { - timestamp: string; - sessionId?: string; - durationMs: number; -}; - -export type SuccessResponse = { - meta: ResponseMeta; - ok: true; - result: Result; -}; - -export type ErrorDetails = { - code: string; - message: string; - details?: Record; -}; - -export type ErrorResponse = { - error: ErrorDetails; - meta: ResponseMeta; - ok: false; -}; - -export type McpResponse = - | SuccessResponse - | ErrorResponse; diff --git a/src/mcp-server/utils/index.ts b/src/mcp-server/utils/index.ts deleted file mode 100644 index f4af15f..0000000 --- a/src/mcp-server/utils/index.ts +++ /dev/null @@ -1,14 +0,0 @@ -export { SENSITIVE_FIELD_PATTERNS, isSensitiveField } from './redaction.js'; -export { generateFilesafeTimestamp, generateSessionId } from './time.js'; -export { createSuccessResponse, createErrorResponse } from './response.js'; -export { - validateTargetSelection, - type TargetValidationResult, -} from './targets.js'; -export { extractErrorMessage } from './errors.js'; -export { debugWarn } from './logger.js'; -export { - isValidTargetSelection, - isInvalidTargetSelection, - type TargetType, -} from './type-guards.js'; diff --git a/src/mcp-server/utils/response.ts b/src/mcp-server/utils/response.ts deleted file mode 100644 index 91d968e..0000000 --- a/src/mcp-server/utils/response.ts +++ /dev/null @@ -1,57 +0,0 @@ -import type { SuccessResponse, ErrorResponse, ErrorCode } from '../types'; - -/** - * Creates a standardized success response. - * - * @param result - The result data to include in the response. - * @param sessionId - Optional session identifier. - * @param startTime - Optional start time for duration calculation. - * @returns A success response object. - */ -export function createSuccessResponse( - result: Result, - sessionId?: string, - startTime?: number, -): SuccessResponse { - return { - meta: { - timestamp: new Date().toISOString(), - sessionId, - durationMs: startTime ? Date.now() - startTime : 0, - }, - ok: true, - result, - }; -} - -/** - * Creates a standardized error response. - * - * @param code - The error code identifying the error type. - * @param message - Human-readable error message. - * @param details - Optional additional error details. - * @param sessionId - Optional session identifier. - * @param startTime - Optional start time for duration calculation. - * @returns An error response object. - */ -export function createErrorResponse( - code: ErrorCode, - message: string, - details?: Record, - sessionId?: string, - startTime?: number, -): ErrorResponse { - return { - error: { - code, - message, - details, - }, - meta: { - timestamp: new Date().toISOString(), - sessionId, - durationMs: startTime ? Date.now() - startTime : 0, - }, - ok: false, - }; -} diff --git a/src/server/create-server.test.ts b/src/server/create-server.test.ts new file mode 100644 index 0000000..58d3884 --- /dev/null +++ b/src/server/create-server.test.ts @@ -0,0 +1,1730 @@ +import * as fs from 'node:fs/promises'; +import * as http from 'node:http'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import type { MockInstance } from 'vitest'; + +import type { ServerInstance } from './create-server.js'; +import { + createServer, + extractTargetFromInput, + extractScreenshotInfo, + extractToolOutcome, + buildResponseBody, + shouldCollectObservations, + shouldIncludeObservationsInResponse, +} from './create-server.js'; +import { readDaemonState } from './daemon-state.js'; +import pkg from '../../package.json'; +import type { PortMap, WorkflowContext } from '../capabilities/context.js'; +import type { DaemonState, ServerConfig, ToolResponse } from '../types/http.js'; + +const tmpDir = path.join(os.tmpdir(), `mm-create-server-test-${Date.now()}`); + +vi.mock('node:child_process', () => ({ + execSync: () => Buffer.from(`${tmpDir}\n`), +})); + +vi.mock('../tools/utils/discovery.js', () => ({ + collectTestIds: vi.fn().mockResolvedValue([]), + collectTrimmedA11ySnapshot: vi.fn().mockResolvedValue({ + nodes: [], + refMap: new Map(), + }), + waitForTarget: vi.fn().mockResolvedValue({ + click: vi.fn().mockResolvedValue(undefined), + fill: vi.fn().mockResolvedValue(undefined), + textContent: vi.fn().mockResolvedValue(''), + }), +})); + +vi.mock('../knowledge-store/knowledge-store.js', () => { + const mockStore = { + recordStep: vi.fn().mockResolvedValue('/mock/path'), + writeSessionMetadata: vi.fn().mockResolvedValue('/mock/path'), + getLastSteps: vi.fn().mockResolvedValue([]), + searchSteps: vi.fn().mockResolvedValue([]), + summarizeSession: vi.fn().mockResolvedValue({ stepCount: 0, recipe: [] }), + listSessions: vi.fn().mockResolvedValue([]), + generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), + getAllSessionIds: vi.fn().mockResolvedValue([]), + resolveSessionIds: vi.fn().mockResolvedValue([]), + }; + return { + KnowledgeStore: vi.fn(() => mockStore), + createDefaultObservation: vi.fn( + (state: unknown, testIds?: unknown[], nodes?: unknown[]) => ({ + state: state ?? {}, + testIds: testIds ?? [], + a11y: { nodes: nodes ?? [] }, + }), + ), + createKnowledgeStore: vi.fn(() => mockStore), + setKnowledgeStore: vi.fn(), + hasKnowledgeStore: vi.fn(() => false), + knowledgeStore: mockStore, + }; +}); + +function createMockSessionManager() { + return { + hasActiveSession: vi.fn(() => false), + getSessionId: vi.fn(() => 'test-session'), + getSessionState: vi.fn(() => undefined), + getSessionMetadata: vi.fn(() => undefined), + launch: vi.fn(async () => ({ + sessionId: 'test-session', + extensionId: 'test-ext', + state: {}, + })), + cleanup: vi.fn(async () => true), + getPage: vi.fn(() => ({ + waitForLoadState: vi.fn(async () => undefined), + waitForFunction: vi.fn(async () => undefined), + })), + setActivePage: vi.fn(), + getTrackedPages: vi.fn(() => []), + classifyPageRole: vi.fn(() => 'extension'), + getContext: vi.fn(() => ({})), + getExtensionId: vi.fn(() => 'test-ext'), + getExtensionState: vi.fn(async () => ({})), + takeScreenshot: vi.fn(async () => ({ path: '', base64: '' })), + getRefMap: vi.fn(() => new Map()), + setRefMap: vi.fn(), + setWorkflowContext: vi.fn(), + getEnvironmentMode: vi.fn(() => 'e2e'), + setContext: vi.fn(), + getContextInfo: vi.fn(() => ({ + currentContext: 'e2e', + hasActiveSession: false, + sessionId: null, + capabilities: { available: [] }, + canSwitchContext: true, + })), + }; +} + +let exitSpy: MockInstance; + +function buildConfig(overrides: Partial = {}): ServerConfig { + return { + sessionManager: + createMockSessionManager() as unknown as ServerConfig['sessionManager'], + contextFactory: async () => + ({ + config: { environment: 'prod', extensionName: 'Test Extension' }, + }) satisfies WorkflowContext, + ...overrides, + }; +} + +async function httpRequest( + url: string, + options: { + method?: string; + headers?: Record; + body?: string; + } = {}, +): Promise<{ status: number; json: () => Promise }> { + return new Promise((resolve, reject) => { + const parsedUrl = new URL(url); + const req = http.request( + { + hostname: parsedUrl.hostname, + port: parsedUrl.port, + path: parsedUrl.pathname, + method: options.method ?? 'GET', + headers: options.headers, + }, + (res) => { + let data = ''; + res.on('data', (chunk: Buffer) => { + data += chunk.toString(); + }); + res.on('end', () => { + resolve({ + status: res.statusCode ?? 0, + json: async () => JSON.parse(data) as unknown, + }); + }); + }, + ); + req.on('error', reject); + if (options.body) { + req.write(options.body); + } + req.end(); + }); +} + +describe('extractTargetFromInput', () => { + it('returns undefined for null input', () => { + expect(extractTargetFromInput(null)).toBeUndefined(); + }); + + it('returns undefined for non-object input', () => { + expect(extractTargetFromInput('string')).toBeUndefined(); + expect(extractTargetFromInput(42)).toBeUndefined(); + }); + + it('returns undefined when no target fields present', () => { + expect(extractTargetFromInput({ name: 'click' })).toBeUndefined(); + }); + + it('extracts a11yRef', () => { + expect(extractTargetFromInput({ a11yRef: 'e1' })).toStrictEqual({ + a11yRef: 'e1', + testId: undefined, + selector: undefined, + }); + }); + + it('extracts testId', () => { + expect(extractTargetFromInput({ testId: 'btn' })).toStrictEqual({ + a11yRef: undefined, + testId: 'btn', + selector: undefined, + }); + }); + + it('extracts selector', () => { + expect(extractTargetFromInput({ selector: '.my-btn' })).toStrictEqual({ + a11yRef: undefined, + testId: undefined, + selector: '.my-btn', + }); + }); + + it('extracts multiple target fields', () => { + expect( + extractTargetFromInput({ a11yRef: 'e1', testId: 'btn' }), + ).toStrictEqual({ + a11yRef: 'e1', + testId: 'btn', + selector: undefined, + }); + }); + + it('ignores non-string target values', () => { + expect(extractTargetFromInput({ a11yRef: 42 })).toBeUndefined(); + }); +}); + +describe('extractScreenshotInfo', () => { + it('returns undefined for non-screenshot tools', () => { + expect(extractScreenshotInfo('click', {})).toBeUndefined(); + }); + + it('returns undefined when toolResult is not an object', () => { + expect(extractScreenshotInfo('screenshot', null)).toBeUndefined(); + expect(extractScreenshotInfo('screenshot', 'string')).toBeUndefined(); + }); + + it('returns undefined when result is not ok', () => { + expect(extractScreenshotInfo('screenshot', { ok: false })).toBeUndefined(); + }); + + it('returns undefined when result has no path', () => { + expect( + extractScreenshotInfo('screenshot', { ok: true, result: {} }), + ).toBeUndefined(); + }); + + it('extracts screenshot path from result.path', () => { + expect( + extractScreenshotInfo('screenshot', { + ok: true, + result: { path: '/img.png' }, + }), + ).toStrictEqual({ path: '/img.png' }); + }); + + it('extracts screenshot path with dimensions', () => { + expect( + extractScreenshotInfo('screenshot', { + ok: true, + result: { path: '/img.png', width: 1280, height: 720 }, + }), + ).toStrictEqual({ + path: '/img.png', + dimensions: { width: 1280, height: 720 }, + }); + }); + + it('extracts screenshot from nested screenshot object', () => { + expect( + extractScreenshotInfo('describe_screen', { + ok: true, + result: { screenshot: { path: '/ss.png', width: 800, height: 600 } }, + }), + ).toStrictEqual({ + path: '/ss.png', + dimensions: { width: 800, height: 600 }, + }); + }); + + it('extracts nested screenshot without dimensions', () => { + expect( + extractScreenshotInfo('describe_screen', { + ok: true, + result: { screenshot: { path: '/ss.png' } }, + }), + ).toStrictEqual({ path: '/ss.png' }); + }); + + it('returns undefined when result.result is null', () => { + expect( + extractScreenshotInfo('screenshot', { ok: true, result: null }), + ).toBeUndefined(); + }); + + it('returns undefined when nested screenshot has no path', () => { + expect( + extractScreenshotInfo('describe_screen', { + ok: true, + result: { screenshot: { width: 800 } }, + }), + ).toBeUndefined(); + }); + + it('returns undefined when nested screenshot is null', () => { + expect( + extractScreenshotInfo('describe_screen', { + ok: true, + result: { screenshot: null }, + }), + ).toBeUndefined(); + }); +}); + +describe('extractToolOutcome', () => { + it('returns ok:true for non-object input', () => { + expect(extractToolOutcome(null)).toStrictEqual({ ok: true }); + expect(extractToolOutcome('string')).toStrictEqual({ ok: true }); + }); + + it('returns ok:true when ok not in result', () => { + expect(extractToolOutcome({ result: 'data' })).toStrictEqual({ ok: true }); + }); + + it('returns ok:true for successful result', () => { + expect(extractToolOutcome({ ok: true, result: 'data' })).toStrictEqual({ + ok: true, + }); + }); + + it('returns ok:false with error for failed result', () => { + expect( + extractToolOutcome({ + ok: false, + error: { code: 'ERR', message: 'fail' }, + }), + ).toStrictEqual({ + ok: false, + error: { code: 'ERR', message: 'fail' }, + }); + }); + + it('returns ok:false without error when no error field', () => { + expect(extractToolOutcome({ ok: false })).toStrictEqual({ ok: false }); + }); +}); + +describe('buildResponseBody', () => { + it('returns toolResult as-is for non-object', () => { + expect(buildResponseBody('string', undefined)).toBe('string'); + expect(buildResponseBody(null, undefined)).toBeNull(); + }); + + it('returns toolResult when no observations', () => { + const result = { ok: true, data: 'test' }; + expect(buildResponseBody(result, undefined)).toStrictEqual(result); + }); + + it('merges observations into result', () => { + const result = { ok: true }; + const obs = { state: {}, testIds: [], a11y: { nodes: [] } }; + expect(buildResponseBody(result, obs as any)).toStrictEqual({ + ok: true, + observations: obs, + }); + }); +}); + +describe('shouldCollectObservations', () => { + it('returns true for mutating', () => { + expect(shouldCollectObservations('mutating')).toBe(true); + }); + + it('returns true for readonly (collected for knowledge store)', () => { + expect(shouldCollectObservations('readonly')).toBe(true); + }); + + it('returns true for discovery (collected for knowledge store)', () => { + expect(shouldCollectObservations('discovery')).toBe(true); + }); + + it('returns true for batch with default policy', () => { + expect(shouldCollectObservations('batch')).toBe(true); + }); + + it("returns true for batch with 'all' policy", () => { + expect( + shouldCollectObservations('batch', { includeObservations: 'all' }), + ).toBe(true); + }); + + it("returns false for batch with 'none' policy", () => { + expect( + shouldCollectObservations('batch', { includeObservations: 'none' }), + ).toBe(false); + }); + + it("returns true for batch with 'failures' policy", () => { + expect( + shouldCollectObservations('batch', { includeObservations: 'failures' }), + ).toBe(true); + }); +}); + +describe('shouldIncludeObservationsInResponse', () => { + const okResult: ToolResponse = { ok: true, result: {} }; + const failResult: ToolResponse = { + ok: false, + error: { code: 'ERR', message: 'fail' }, + }; + const summaryFailResult: ToolResponse = { + ok: true, + result: { summary: { ok: false } }, + }; + + it('returns true for mutating', () => { + expect(shouldIncludeObservationsInResponse('mutating', okResult)).toBe( + true, + ); + }); + + it('returns false for readonly', () => { + expect(shouldIncludeObservationsInResponse('readonly', okResult)).toBe( + false, + ); + }); + + it('returns false for discovery', () => { + expect(shouldIncludeObservationsInResponse('discovery', okResult)).toBe( + false, + ); + }); + + it("returns true for batch with 'all' (default)", () => { + expect(shouldIncludeObservationsInResponse('batch', okResult, {})).toBe( + true, + ); + }); + + it("returns false for batch with 'none'", () => { + expect( + shouldIncludeObservationsInResponse('batch', okResult, { + includeObservations: 'none', + }), + ).toBe(false); + }); + + it("returns true for batch with 'failures' when tool failed", () => { + expect( + shouldIncludeObservationsInResponse('batch', failResult, { + includeObservations: 'failures', + }), + ).toBe(true); + }); + + it("returns true for batch with 'failures' when summary.ok is false", () => { + expect( + shouldIncludeObservationsInResponse('batch', summaryFailResult, { + includeObservations: 'failures', + }), + ).toBe(true); + }); + + it("returns false for batch with 'failures' when tool succeeded", () => { + const batchOk: ToolResponse = { + ok: true, + result: { summary: { ok: true } }, + }; + expect( + shouldIncludeObservationsInResponse('batch', batchOk, { + includeObservations: 'failures', + }), + ).toBe(false); + }); + + it("returns false for batch with 'failures' when summary is missing", () => { + expect( + shouldIncludeObservationsInResponse('batch', okResult, { + includeObservations: 'failures', + }), + ).toBe(false); + }); +}); + +describe('createServer integration', () => { + let server: ServerInstance; + let state: DaemonState; + + beforeEach(async () => { + await fs.mkdir(tmpDir, { recursive: true }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + + server = createServer(buildConfig()); + state = await server.start(); + }); + + afterEach(async () => { + await server.stop(); + exitSpy.mockRestore(); + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}); + }); + + it('gET /health returns 200 with status and nonce', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/health`); + const body = (await res.json()) as { status: string; nonce: string }; + + expect(res.status).toBe(200); + expect(body.status).toBe('ok'); + expect(body.nonce).toBe(state.nonce); + }); + + it('gET /status returns daemon info', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/status`); + const body = (await res.json()) as { + daemon: { pid: number; port: number }; + ports: PortMap; + }; + + expect(res.status).toBe(200); + expect(body.daemon.pid).toBe(process.pid); + expect(body.daemon.port).toBe(state.port); + expect(body.ports).toStrictEqual({}); + }); + + it('pOST /launch delegates to session manager', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/launch`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ state: 'default' }), + }); + const body = (await res.json()) as { ok: boolean }; + + expect(res.status).toBe(200); + expect(body.ok).toBe(true); + }); + + it('pOST /cleanup delegates to session manager', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { ok: boolean }; + + expect(res.status).toBe(200); + expect(body.ok).toBe(true); + }); + + it('pOST /tool/nonexistent returns 404', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/nonexistent`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + const body = (await res.json()) as { + ok: boolean; + error: { code: string }; + }; + + expect(res.status).toBe(404); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('TOOL_NOT_FOUND'); + }); + + it('writes .mm-server on start', async () => { + const daemonState = await readDaemonState(tmpDir); + expect(daemonState).not.toBeNull(); + expect(daemonState?.port).toBe(state.port); + expect(daemonState?.nonce).toBe(state.nonce); + expect(daemonState?.version).toBe(pkg.version); + }); + + it('passes workflow context to session manager on start', async () => { + await server.stop(); + + const workflowContext: WorkflowContext = { + config: { environment: 'e2e', extensionName: 'Test Extension' }, + }; + const mockSM = createMockSessionManager(); + const customServer = createServer( + buildConfig({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + contextFactory: vi.fn().mockResolvedValue(workflowContext), + }), + ); + + await customServer.start(); + expect(mockSM.setWorkflowContext).toHaveBeenCalledWith(workflowContext); + await customServer.stop(); + }); + + it('fails startup when contextFactory rejects', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi + .fn() + .mockRejectedValue(new Error('port allocation failed')), + }), + ); + + await expect(customServer.start()).rejects.toThrowError( + 'contextFactory failed during server startup: port allocation failed', + ); + }); + + it('preserves original error as cause when contextFactory rejects', async () => { + await server.stop(); + + const cause = new Error('root cause'); + const customServer = createServer( + buildConfig({ + contextFactory: vi + .fn() + .mockRejectedValue(cause), + }), + ); + + await expect(customServer.start()).rejects.toThrowError( + expect.objectContaining({ cause }), + ); + }); + + it('fails startup when contextFactory resolves with null', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi.fn().mockResolvedValue(null), + }), + ); + + await expect(customServer.start()).rejects.toThrowError( + 'contextFactory must return an object with a valid config.environment field', + ); + }); + + it('fails startup when contextFactory resolves without config', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi.fn().mockResolvedValue({}), + }), + ); + + await expect(customServer.start()).rejects.toThrowError( + 'contextFactory must return an object with a valid config.environment field', + ); + }); + + it('fails startup when allocatedPorts contains non-number values', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi.fn().mockResolvedValue({ + config: { environment: 'prod', extensionName: 'Test' }, + allocatedPorts: { bad: 'not-a-number' }, + }), + }), + ); + + await expect(customServer.start()).rejects.toThrowError( + 'allocatedPorts["bad"] must be a finite number', + ); + }); + + it('does not call setWorkflowContext when contextFactory rejects', async () => { + await server.stop(); + + const mockSM = createMockSessionManager(); + const customServer = createServer( + buildConfig({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + contextFactory: vi + .fn() + .mockRejectedValue(new Error('boom')), + }), + ); + + await customServer.start().catch(() => {}); + expect(mockSM.setWorkflowContext).not.toHaveBeenCalled(); + }); + + it('does not write .mm-server when contextFactory rejects', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi + .fn() + .mockRejectedValue(new Error('boom')), + }), + ); + + await customServer.start().catch(() => {}); + const daemonState = await readDaemonState(tmpDir); + expect(daemonState).toBeNull(); + }); + + it('cleans up session when startup fails after contextFactory succeeds', async () => { + await server.stop(); + + const mockSM = createMockSessionManager(); + const customServer = createServer( + buildConfig({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + contextFactory: vi.fn().mockResolvedValue({ + config: { environment: 'prod', extensionName: 'Test' }, + } satisfies WorkflowContext), + }), + ); + + await fs.chmod(tmpDir, 0o444); + try { + await expect(customServer.start()).rejects.toThrowError(/EACCES/u); + expect(mockSM.cleanup).toHaveBeenCalled(); + } finally { + await fs.chmod(tmpDir, 0o755).catch(() => {}); + } + }); + + it('accepts a synchronous contextFactory', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: () => ({ + config: { environment: 'prod' as const, extensionName: 'Sync' }, + }), + }), + ); + + const customState = await customServer.start(); + expect(customState.port).toBeGreaterThan(0); + await customServer.stop(); + }); + + it('gET /status returns empty ports when allocatedPorts is undefined', async () => { + await server.stop(); + + const customServer = createServer( + buildConfig({ + contextFactory: vi.fn().mockResolvedValue({ + config: { environment: 'prod', extensionName: 'Test Extension' }, + } satisfies WorkflowContext), + }), + ); + + const customState = await customServer.start(); + const res = await httpRequest( + `http://127.0.0.1:${customState.port}/status`, + ); + const body = (await res.json()) as { ports: PortMap }; + + expect(res.status).toBe(200); + expect(body.ports).toStrictEqual({}); + + await customServer.stop(); + }); + + it('gET /status returns custom allocated ports', async () => { + await server.stop(); + + const allocatedPorts = { serviceA: 3001, serviceB: 3002 }; + const customServer = createServer( + buildConfig({ + contextFactory: vi.fn().mockResolvedValue({ + config: { environment: 'prod', extensionName: 'Test Extension' }, + allocatedPorts, + } satisfies WorkflowContext), + }), + ); + + const customState = await customServer.start(); + const res = await httpRequest( + `http://127.0.0.1:${customState.port}/status`, + ); + const body = (await res.json()) as { ports: PortMap }; + + expect(res.status).toBe(200); + expect(body.ports).toStrictEqual(allocatedPorts); + + await customServer.stop(); + }); + + it('removes .mm-server on stop', async () => { + await server.stop(); + const daemonState = await readDaemonState(tmpDir); + expect(daemonState).toBeNull(); + }); + + it('serializes concurrent launch requests through the queue', async () => { + const [res1, res2] = await Promise.all([ + httpRequest(`http://127.0.0.1:${state.port}/launch`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }), + httpRequest(`http://127.0.0.1:${state.port}/launch`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }), + ]); + + expect(res1.status).toBe(200); + expect(res2.status).toBe(200); + }); + + it('stop() is idempotent', async () => { + await server.stop(); + expect(await server.stop()).toBeUndefined(); + }); + + describe('POST /tool/:name input validation', () => { + it('returns 400 for missing required field', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/click`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + const body = (await res.json()) as { + ok: boolean; + error: { code: string; message: string }; + }; + + expect(res.status).toBe(400); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('VALIDATION_ERROR'); + }); + + it('returns 400 for invalid enum value', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/navigate`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ screen: 'nonexistent' }), + }, + ); + const body = (await res.json()) as { + ok: boolean; + error: { code: string; message: string }; + }; + + expect(res.status).toBe(400); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('VALIDATION_ERROR'); + }); + + it('returns 400 when cross-field refine fails', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/clipboard`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ action: 'write' }), + }, + ); + const body = (await res.json()) as { + ok: boolean; + error: { code: string; message: string }; + }; + + expect(res.status).toBe(400); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('VALIDATION_ERROR'); + expect(body.error.message).toContain( + "text is required when action is 'write'", + ); + }); + + it('returns 400 for wrong field type', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/wait_for_notification`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ timeoutMs: 'not-a-number' }), + }, + ); + const body = (await res.json()) as { + ok: boolean; + error: { code: string; message: string }; + }; + + expect(res.status).toBe(400); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('VALIDATION_ERROR'); + }); + + it('passes validation for valid input (empty schema)', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/get_state`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + + expect(res.status).not.toBe(400); + }); + }); +}); + +describe('createServer with active session', () => { + let server: ServerInstance; + let state: DaemonState; + let mockSM: ReturnType; + + beforeEach(async () => { + await fs.mkdir(tmpDir, { recursive: true }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + + mockSM = createMockSessionManager(); + mockSM.hasActiveSession.mockReturnValue(true); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentUrl: 'chrome-extension://test/home.html', + }); + + server = createServer( + buildConfig({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + }), + ); + state = await server.start(); + }); + + afterEach(async () => { + await server.stop(); + exitSpy.mockRestore(); + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}); + }); + + it('collects observations and records knowledge for tool execution', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { ok: boolean; observations?: unknown }; + + expect(res.status).toBe(200); + expect(body.ok).toBe(true); + expect(body.observations).toBeDefined(); + }); + + it('records error step when tool execution throws', async () => { + mockSM.cleanup.mockRejectedValueOnce(new Error('Browser crash')); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { + ok: boolean; + error: { code: string; message: string }; + }; + + expect(res.status).toBe(500); + expect(body.ok).toBe(false); + expect(body.error.code).toBe('TOOL_EXECUTION_FAILED'); + expect(body.error.message).toContain('Browser crash'); + }); + + it('handles observation collection failure gracefully', async () => { + mockSM.getPage.mockImplementation(() => { + throw new Error('Page closed'); + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { ok: boolean }; + + expect(res.status).toBe(200); + expect(body.ok).toBe(true); + }); + + it('records step with environment context', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/get_state`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + + expect(res.status).toBe(200); + }); + + it('read-only tool response omits observations', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/get_state`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + const body = (await res.json()) as { ok: boolean; observations?: unknown }; + + expect(res.status).toBe(200); + expect(body.observations).toBeUndefined(); + }); + + it('mutating tool response includes observations with state, testIds, a11y', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { state: unknown; testIds: unknown[]; a11y: unknown }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + expect(body.observations?.state).toBeDefined(); + expect(body.observations?.testIds).toBeDefined(); + expect(body.observations?.a11y).toBeDefined(); + }); + + it('playwright helpers called for read-only tools (knowledge store)', async () => { + const { collectTestIds, collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + const collectTestIdsSpy = vi.mocked(collectTestIds); + const collectA11ySpy = vi.mocked(collectTrimmedA11ySnapshot); + + collectTestIdsSpy.mockClear(); + collectA11ySpy.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/get_state`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + expect(collectTestIdsSpy).toHaveBeenCalled(); + expect(collectA11ySpy).toHaveBeenCalled(); + }); + + it('observation Playwright helpers called for mutating tools', async () => { + const { collectTestIds, collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + const collectTestIdsSpy = vi.mocked(collectTestIds); + const collectA11ySpy = vi.mocked(collectTrimmedA11ySnapshot); + + collectTestIdsSpy.mockClear(); + collectA11ySpy.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + expect(collectTestIdsSpy).toHaveBeenCalled(); + expect(collectA11ySpy).toHaveBeenCalled(); + }); + + it('recordStep is called for mutating tool routes', async () => { + const { KnowledgeStore } = + await import('../knowledge-store/knowledge-store.js'); + const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as { + recordStep: ReturnType; + }; + mockStore.recordStep.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + expect(mockStore.recordStep).toHaveBeenCalled(); + }); + + it('recordStep is called for read-only tool routes', async () => { + const { KnowledgeStore } = + await import('../knowledge-store/knowledge-store.js'); + const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as { + recordStep: ReturnType; + }; + mockStore.recordStep.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/get_state`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + expect(mockStore.recordStep).toHaveBeenCalled(); + }); + + describe('post-mutation state recheck', () => { + it('resolves immediately when getExtensionState returns a known screen', async () => { + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentScreen: 'home', + currentUrl: 'chrome-extension://test/home.html', + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { state: { currentScreen?: string } }; + }; + + expect(res.status).toBe(200); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1); + expect(body.observations?.state.currentScreen).toBe('home'); + }); + + it("retries when first call returns 'unknown', resolves on second call", async () => { + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState + .mockResolvedValueOnce({ + isLoaded: true, + currentScreen: 'unknown', + currentUrl: 'chrome-extension://test/unknown.html', + }) + .mockResolvedValueOnce({ + isLoaded: true, + currentScreen: 'home', + currentUrl: 'chrome-extension://test/home.html', + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { state: { currentScreen?: string } }; + }; + + expect(res.status).toBe(200); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(2); + expect(body.observations?.state.currentScreen).toBe('home'); + }); + + it("retries up to deadline and returns 'unknown' if all calls return 'unknown'", async () => { + vi.useFakeTimers(); + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentScreen: 'unknown', + currentUrl: 'chrome-extension://test/unknown.html', + }); + + const start = Date.now(); + const responsePromise = httpRequest( + `http://127.0.0.1:${state.port}/cleanup`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + + await vi.advanceTimersByTimeAsync(500); + vi.useRealTimers(); + + const res = await responsePromise; + const body = (await res.json()) as { + ok: boolean; + observations?: { state: { currentScreen?: string } }; + }; + + expect(res.status).toBe(200); + expect(Date.now() - start).toBeLessThanOrEqual(600); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(6); + expect(body.observations?.state.currentScreen).toBe('unknown'); + }); + + it('does not recheck for readonly tool category', async () => { + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentScreen: 'unknown', + currentUrl: 'chrome-extension://test/unknown.html', + }); + + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/knowledge_last`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + + expect(res.status).toBe(200); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1); + }); + + it('does not recheck for discovery tool category', async () => { + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentScreen: 'unknown', + currentUrl: 'chrome-extension://test/unknown.html', + }); + + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/list_testids`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + + expect(res.status).toBe(200); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1); + }); + + it('does not recheck for batch tool category', async () => { + mockSM.getExtensionState.mockReset(); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentScreen: 'unknown', + currentUrl: 'chrome-extension://test/unknown.html', + }); + + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/run_steps`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + steps: [{ tool: 'knowledge_last', args: {} }], + }), + }, + ); + + expect(res.status).toBe(200); + expect(mockSM.getExtensionState).toHaveBeenCalledTimes(1); + }); + }); +}); + +describe('createServer with logging', () => { + let server: ServerInstance; + let state: DaemonState; + + beforeEach(async () => { + await fs.mkdir(tmpDir, { recursive: true }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + + server = createServer( + buildConfig({ logFilePath: path.join(tmpDir, 'daemon.log') }), + ); + state = await server.start(); + }); + + afterEach(async () => { + await server.stop(); + exitSpy.mockRestore(); + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}); + }); + + it('writes request logs to file', async () => { + await httpRequest(`http://127.0.0.1:${state.port}/health`); + await new Promise((resolve) => setTimeout(resolve, 100)); + const logContent = await fs + .readFile(path.join(tmpDir, 'daemon.log'), 'utf-8') + .catch(() => ''); + expect(logContent).toContain('/health'); + }); + + it('logs fatal errors to stderr and file', async () => { + const stderrSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + + // Trigger a cleanup error by making sessionManager.cleanup() throw + const mockSM = createMockSessionManager(); + mockSM.hasActiveSession.mockReturnValue(true); + mockSM.cleanup.mockRejectedValue(new Error('Cleanup failed')); + + const testServer = createServer({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + contextFactory: vi.fn().mockResolvedValue({ + config: { + environment: 'e2e', + extensionName: 'Test', + defaultPassword: 'test', + artifactsDir: tmpDir, + defaultChainId: 1, + ports: { anvil: 8545, fixtureServer: 12345 }, + }, + } satisfies WorkflowContext), + logFilePath: path.join(tmpDir, 'error.log'), + }); + + await testServer.start(); + await testServer.stop(); + + // Verify stderr was called with fatal error + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('[ERROR] Cleanup failed'), + ); + + stderrSpy.mockRestore(); + }); + + it('handles log file write errors gracefully', async () => { + const stderrSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + + // Create a read-only directory to cause write errors + const readOnlyDir = path.join(tmpDir, 'readonly'); + await fs.mkdir(readOnlyDir, { recursive: true }); + const logPath = path.join(readOnlyDir, 'daemon.log'); + + // Make directory read-only + await fs.chmod(readOnlyDir, 0o444); + + try { + const testServer = createServer(buildConfig({ logFilePath: logPath })); + const testState = await testServer.start(); + + // Make a request to trigger logging + await httpRequest(`http://127.0.0.1:${testState.port}/health`); + await new Promise((resolve) => setTimeout(resolve, 100)); + + await testServer.stop(); + + // Verify that stderr was called with the write error message + expect(stderrSpy).toHaveBeenCalledWith( + expect.stringContaining('Failed to write log'), + ); + } finally { + stderrSpy.mockRestore(); + // Restore write permissions for cleanup + await fs.chmod(readOnlyDir, 0o755).catch(() => {}); + } + }); + + it('handles server close timeout with force close', async () => { + const testServer = createServer(buildConfig()); + const testState = await testServer.start(); + + // Make a request to ensure server is active + await httpRequest(`http://127.0.0.1:${testState.port}/health`); + + // Stop should complete even if server doesn't close gracefully + expect(await testServer.stop()).toBeUndefined(); + }); +}); + +describe('observation compaction in HTTP responses', () => { + let server: ServerInstance; + let state: DaemonState; + let mockSM: ReturnType; + + const comboboxAndOptions = [ + { ref: 'e1', role: 'combobox', name: 'Language', path: ['root'] }, + ...Array.from({ length: 10 }, (_, i) => ({ + ref: `e${i + 2}`, + role: 'option', + name: `Lang ${i + 1}`, + path: ['root', 'combobox'], + })), + { ref: 'e12', role: 'button', name: 'Submit', path: ['root'] }, + ]; + + const initialButtons = [ + { ref: 'e1', role: 'button', name: 'Continue', path: ['root'] }, + { ref: 'e2', role: 'button', name: 'Cancel', path: ['root'] }, + ]; + + const changedButtons = [ + { ref: 'e1', role: 'button', name: 'Continue', path: ['root'] }, + { ref: 'e3', role: 'button', name: 'Confirm', path: ['root'] }, + ]; + + const manyNewButtons = Array.from({ length: 10 }, (_, index) => ({ + ref: `e${index + 10}`, + role: 'button', + name: `Action ${index + 1}`, + path: ['root'], + })); + + beforeEach(async () => { + await fs.mkdir(tmpDir, { recursive: true }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + + mockSM = createMockSessionManager(); + mockSM.hasActiveSession.mockReturnValue(true); + mockSM.getExtensionState.mockResolvedValue({ + isLoaded: true, + currentUrl: 'chrome-extension://test/home.html', + }); + + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot).mockResolvedValue({ + nodes: comboboxAndOptions as never, + refMap: new Map(), + }); + + server = createServer( + buildConfig({ + sessionManager: mockSM as unknown as ServerConfig['sessionManager'], + }), + ); + state = await server.start(); + }); + + afterEach(async () => { + await server.stop(); + exitSpy.mockRestore(); + + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot).mockResolvedValue({ + nodes: [], + refMap: new Map(), + }); + + await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {}); + }); + + it('mutating tool returns compact observations in HTTP response', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { a11y: { nodes: unknown[] } }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + // 12 original nodes → compacted: combobox + summary + button = 3 + expect(body.observations?.a11y.nodes).toHaveLength(3); + }); + + it('first mutation returns a full compact observation when no baseline exists', async () => { + const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { + a11y: { + nodes: unknown[]; + diff?: unknown; + }; + }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + expect(body.observations?.a11y.diff).toBeUndefined(); + expect(body.observations?.a11y.nodes).toHaveLength(3); + }); + + it('second mutation returns a diff-based observation', async () => { + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot) + .mockResolvedValueOnce({ + nodes: initialButtons as never, + refMap: new Map(), + }) + .mockResolvedValueOnce({ + nodes: changedButtons as never, + refMap: new Map(), + }); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { + a11y: { + nodes: unknown[]; + diff?: { added: string[]; removed: string[]; unchanged: number }; + }; + }; + }; + + expect(res.status).toBe(200); + expect(body.observations?.a11y.diff).toStrictEqual({ + added: ['e3'], + removed: ['e2'], + unchanged: 1, + }); + expect(body.observations?.a11y.nodes).toHaveLength(1); + }); + + it('describe_screen resets the diff baseline', async () => { + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot) + .mockResolvedValueOnce({ + nodes: initialButtons as never, + refMap: new Map(), + }) + .mockResolvedValueOnce({ + nodes: initialButtons as never, + refMap: new Map(), + }) + .mockResolvedValueOnce({ + nodes: changedButtons as never, + refMap: new Map(), + }); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/describe_screen`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { + a11y: { + nodes: unknown[]; + diff?: unknown; + }; + }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + expect(body.observations?.a11y.diff).toBeUndefined(); + expect(body.observations?.a11y.nodes.length).toBeGreaterThan(1); + }); + + it('falls back to the full observation when the diff is not smaller', async () => { + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot) + .mockResolvedValueOnce({ + nodes: [initialButtons[0]] as never, + refMap: new Map(), + }) + .mockResolvedValueOnce({ + nodes: manyNewButtons as never, + refMap: new Map(), + }); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + + const res = await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + const body = (await res.json()) as { + ok: boolean; + observations?: { + a11y: { + nodes: unknown[]; + diff?: unknown; + }; + }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + expect(body.observations?.a11y.diff).toBeUndefined(); + expect(body.observations?.a11y.nodes).toHaveLength(10); + }); + + it('knowledge store always receives the full observation instead of the diff', async () => { + const { collectTrimmedA11ySnapshot } = + await import('../tools/utils/discovery.js'); + vi.mocked(collectTrimmedA11ySnapshot) + .mockResolvedValueOnce({ + nodes: initialButtons as never, + refMap: new Map(), + }) + .mockResolvedValueOnce({ + nodes: changedButtons as never, + refMap: new Map(), + }); + + const { KnowledgeStore } = + await import('../knowledge-store/knowledge-store.js'); + const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as { + recordStep: ReturnType; + }; + mockStore.recordStep.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + + await httpRequest(`http://127.0.0.1:${state.port}/tool/click`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ a11yRef: 'e1' }), + }); + + expect(mockStore.recordStep).toHaveBeenCalledTimes(2); + const recorded = mockStore.recordStep.mock.calls[1][0] as { + observation: { a11y: { nodes: unknown[]; diff?: unknown } }; + }; + + expect(recorded.observation.a11y.diff).toBeUndefined(); + expect(recorded.observation.a11y.nodes).toStrictEqual(changedButtons); + }); + + it('knowledge store receives full uncompacted observations', async () => { + const { KnowledgeStore } = + await import('../knowledge-store/knowledge-store.js'); + const mockStore = vi.mocked(KnowledgeStore).mock.results.at(-1)?.value as { + recordStep: ReturnType; + }; + mockStore.recordStep.mockClear(); + + await httpRequest(`http://127.0.0.1:${state.port}/cleanup`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + + expect(mockStore.recordStep).toHaveBeenCalled(); + const recorded = mockStore.recordStep.mock.calls[0][0] as { + observation: { a11y: { nodes: unknown[] } }; + }; + expect(recorded.observation.a11y.nodes).toHaveLength(12); + }); + + it('batch with includeObservations=all returns compact observations', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/run_steps`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + steps: [{ tool: 'get_state' }], + includeObservations: 'all', + }), + }, + ); + const body = (await res.json()) as { + ok: boolean; + observations?: { a11y: { nodes: unknown[] } }; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeDefined(); + expect(body.observations?.a11y.nodes).toHaveLength(3); + }); + + it('batch with includeObservations=none omits observations', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/run_steps`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + steps: [{ tool: 'get_state' }], + includeObservations: 'none', + }), + }, + ); + const body = (await res.json()) as { + ok: boolean; + observations?: unknown; + }; + + expect(res.status).toBe(200); + expect(body.observations).toBeUndefined(); + }); + + it('describe_screen response omits observations', async () => { + const res = await httpRequest( + `http://127.0.0.1:${state.port}/tool/describe_screen`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }, + ); + const body = (await res.json()) as { + ok: boolean; + observations?: unknown; + }; + + // Discovery tools never include observations in the HTTP response + expect(body.observations).toBeUndefined(); + }); +}); diff --git a/src/server/create-server.ts b/src/server/create-server.ts new file mode 100644 index 0000000..3c1e893 --- /dev/null +++ b/src/server/create-server.ts @@ -0,0 +1,813 @@ +import express from 'express'; +import { execSync } from 'node:child_process'; +import { randomUUID } from 'node:crypto'; +import * as fs from 'node:fs/promises'; +import * as http from 'node:http'; + +import { writeDaemonState, removeDaemonState } from './daemon-state.js'; +import { compactObservation } from './observation-compaction.js'; +import { RequestQueue } from './request-queue.js'; +import pkg from '../../package.json'; +import type { PortMap, WorkflowContext } from '../capabilities/context.js'; +import type { ExtensionState } from '../capabilities/types.js'; +import { + KnowledgeStore, + createDefaultObservation, +} from '../knowledge-store/knowledge-store.js'; +import { toolRegistry, getToolCategory } from '../tools/registry.js'; +import type { ToolCategory } from '../tools/registry.js'; +import type { + StepRecordObservation, + StepRecordOutcome, + StepRecordTool, +} from '../tools/types/step-record.js'; +import { OBSERVATION_TESTID_LIMIT } from '../tools/utils/constants.js'; +import { + collectTestIds, + collectTrimmedA11ySnapshot, +} from '../tools/utils/discovery.js'; +import type { + DaemonState, + ServerConfig, + ToolContext, + ToolResponse, +} from '../types/http.js'; +import { extractErrorMessage } from '../utils/errors.js'; +import type { ToolName } from '../validation/schemas.js'; +import { toolSchemas } from '../validation/schemas.js'; + +/** + * Extracts target selection fields from a tool's validated input. + * Interaction tools (click, type, wait_for) include a11yRef, testId, or selector. + * + * @param input - The validated tool input. + * @returns The target info for knowledge recording, or undefined if not applicable. + */ +export function extractTargetFromInput( + input: unknown, +): StepRecordTool['target'] | undefined { + if (typeof input !== 'object' || input === null) { + return undefined; + } + const obj = input as Record; + const a11yRef = typeof obj.a11yRef === 'string' ? obj.a11yRef : undefined; + const testId = typeof obj.testId === 'string' ? obj.testId : undefined; + const selector = typeof obj.selector === 'string' ? obj.selector : undefined; + if (!a11yRef && !testId && !selector) { + return undefined; + } + return { a11yRef, testId, selector }; +} + +/** + * Extracts screenshot artifact metadata from a successful tool result. + * Applies to `screenshot` and `describe_screen` tools. + * + * @param toolName - The name of the tool that produced the result. + * @param toolResult - The raw result from the tool execution. + * @returns Screenshot path and dimensions, or undefined if not applicable. + */ +export function extractScreenshotInfo( + toolName: string, + toolResult: unknown, +): + | { path: string; dimensions?: { width: number; height: number } } + | undefined { + if (toolName !== 'screenshot' && toolName !== 'describe_screen') { + return undefined; + } + if (typeof toolResult !== 'object' || toolResult === null) { + return undefined; + } + const result = toolResult as Record; + if ( + !result.ok || + typeof result.result !== 'object' || + result.result === null + ) { + return undefined; + } + const data = result.result as Record; + + if (typeof data.path === 'string') { + return { + path: data.path, + ...(typeof data.width === 'number' && typeof data.height === 'number' + ? { dimensions: { width: data.width, height: data.height } } + : {}), + }; + } + + if (typeof data.screenshot === 'object' && data.screenshot !== null) { + const ss = data.screenshot as Record; + if (typeof ss.path === 'string') { + return { + path: ss.path, + ...(typeof ss.width === 'number' && typeof ss.height === 'number' + ? { dimensions: { width: ss.width, height: ss.height } } + : {}), + }; + } + } + + return undefined; +} + +export type ServerInstance = { + start(): Promise; + stop(): Promise; +}; + +/** + * Extracts a structured outcome from a raw tool result for knowledge recording. + * + * @param toolResult - The raw result returned by a tool function. + * @returns A normalized outcome with ok status and optional error details. + */ +export function extractToolOutcome(toolResult: unknown): { + ok: boolean; + error?: { code: string; message: string }; +} { + if ( + typeof toolResult !== 'object' || + toolResult === null || + !('ok' in toolResult) + ) { + return { ok: true }; + } + + const typed = toolResult as { + ok: boolean; + error?: { code: string; message: string }; + }; + if (typed.ok) { + return { ok: true }; + } + + return typed.error ? { ok: false, error: typed.error } : { ok: false }; +} + +/** + * Merges a tool result with observation data into the HTTP response body. + * + * @param toolResult - The raw result returned by a tool function. + * @param observations - Optional observation snapshot to attach. + * @returns The response body suitable for res.json(). + */ +export function buildResponseBody( + toolResult: unknown, + observations: StepRecordObservation | undefined, +): unknown { + if (typeof toolResult !== 'object' || toolResult === null) { + return toolResult; + } + + if (!observations) { + return toolResult; + } + + return { ...(toolResult as Record), observations }; +} + +/** + * Whether to run Playwright observation collection for this tool invocation. + * + * Observations are always collected for the knowledge store, regardless of + * whether they appear in the HTTP response. The only exception is batch + * with `'none'` policy, which skips collection entirely for best performance. + * + * @param category - The tool category to check. + * @param validatedInput - The validated input payload (checked for batch policy). + * @returns True if observations should be collected. + */ +export function shouldCollectObservations( + category: ToolCategory, + validatedInput?: Record, +): boolean { + if (category === 'batch') { + const policy = + (validatedInput as { includeObservations?: string }) + ?.includeObservations ?? 'all'; + return policy !== 'none'; + } + return true; +} + +/** + * Whether to include observations in the HTTP response. + * + * @param category - The tool category. + * @param toolResult - The result returned by the tool. + * @param validatedInput - The validated input payload (used for batch policy). + * @returns True if observations should be included in the response. + */ +export function shouldIncludeObservationsInResponse( + category: ToolCategory, + toolResult: ToolResponse, + validatedInput?: Record, +): boolean { + if (category === 'mutating') { + return true; + } + if (category === 'batch') { + const policy = + (validatedInput as { includeObservations?: string }) + ?.includeObservations ?? 'all'; + if (policy === 'none') { + return false; + } + if (policy === 'failures') { + if (!toolResult.ok) { + return true; + } + const result = toolResult.result as Record; + const summary = result?.summary as Record | undefined; + return summary !== undefined && !summary.ok; + } + return true; // 'all' + } + return false; // readonly, discovery +} + +/** + * Creates an HTTP daemon server for agent-driven browser testing. + * + * @param config - The server configuration options. + * @returns The server instance with start and stop methods. + */ +export function createServer(config: ServerConfig): ServerInstance { + const app = express(); + const queue = new RequestQueue(config.requestTimeoutMs); + const nonce = randomUUID(); + const knowledgeStore = config.knowledgeStore ?? new KnowledgeStore(); + + let httpServer: http.Server | null = null; + let worktreeRoot = ''; + let startedAt = ''; + let daemonPort = 0; + let workflowContext: WorkflowContext | null = null; + let subPorts: PortMap = {}; + let shuttingDown = false; + let shutdownHandler: (() => void) | null = null; + let lastRequestTime = Date.now(); + let idleCheckInterval: ReturnType | null = null; + let lastObservation: StepRecordObservation | null = null; + + // eslint-disable-next-line import-x/no-named-as-default-member + app.use(express.json({ limit: '10mb' })); + + app.use((req, res, next) => { + lastRequestTime = Date.now(); + const requestStartedAt = lastRequestTime; + res.on('finish', () => { + const duration = Date.now() - requestStartedAt; + appendLog( + config.logFilePath, + `[INFO] ${req.method} ${req.path} ${res.statusCode} ${duration}ms`, + ); + }); + next(); + }); + + app.get('/health', (_req, res) => { + res.json({ status: 'ok', nonce }); + }); + + app.get('/status', (_req, res) => { + res.json({ + daemon: { + pid: process.pid, + port: daemonPort, + uptime: process.uptime(), + startedAt, + }, + ports: subPorts, + }); + }); + + /** + * Builds a lazy ToolContext where `page` and `refMap` are only accessed + * when a tool actually reads them, avoiding throws for non-session tools. + * + * @param wfCtx - The current workflow context to embed in the tool context. + * @returns A ToolContext with lazy page and refMap accessors. + */ + function buildToolContext(wfCtx: WorkflowContext): ToolContext { + return { + sessionManager: config.sessionManager, + get page(): ReturnType { + return config.sessionManager.getPage(); + }, + get refMap(): Map { + return config.sessionManager.hasActiveSession() + ? config.sessionManager.getRefMap() + : new Map(); + }, + workflowContext: wfCtx, + knowledgeStore, + toolRegistry, + }; + } + + /** + * Records a tool execution step to the knowledge store. + * Failures are silently caught — recording must never block tool responses. + * + * @param toolName - The registered tool name. + * @param validatedInput - The validated input payload. + * @param outcome - The tool execution outcome. + * @param observation - The post-execution observation snapshot. + * @param toolResult - The raw tool result (for screenshot extraction). + * @param startTime - The epoch timestamp when execution started. + */ + async function recordToolStep( + toolName: string, + validatedInput: unknown, + outcome: StepRecordOutcome, + observation: StepRecordObservation | undefined, + toolResult: unknown, + startTime: number, + ): Promise { + try { + const sessionId = config.sessionManager.getSessionId(); + if (!sessionId) { + return; + } + + const target = extractTargetFromInput(validatedInput); + const screenshotInfo = extractScreenshotInfo(toolName, toolResult); + + let executionContext: 'e2e' | 'prod' | undefined; + try { + executionContext = config.sessionManager.getEnvironmentMode(); + } catch { + // session manager may not support environment mode + } + + await knowledgeStore.recordStep({ + sessionId, + toolName, + input: validatedInput as Record, + target, + outcome, + observation: + observation ?? createDefaultObservation({} as ExtensionState), + durationMs: Date.now() - startTime, + ...(screenshotInfo ? { screenshotPath: screenshotInfo.path } : {}), + ...(screenshotInfo?.dimensions + ? { screenshotDimensions: screenshotInfo.dimensions } + : {}), + context: executionContext, + }); + } catch { + // non-fatal: recording failure must not block tool responses + } + } + + /** + * Shared tool executor — validates input, runs through the queue, + * records knowledge steps, and collects observations. + * + * @param toolName - The registered tool name to execute. + * @param rawInput - The unvalidated input payload from the request body. + * @param res - The Express response object to write the result to. + */ + async function executeTool( + toolName: string, + rawInput: unknown, + res: express.Response, + ): Promise { + const tool = toolRegistry.get(toolName); + if (!tool) { + res.status(404).json({ + ok: false, + error: { code: 'TOOL_NOT_FOUND', message: `Unknown tool: ${toolName}` }, + }); + return; + } + + if (!workflowContext) { + res.status(503).json({ + ok: false, + error: { + code: 'SERVER_NOT_STARTED', + message: 'Server has not been started yet.', + }, + }); + return; + } + + const schema = + toolName in toolSchemas ? toolSchemas[toolName as ToolName] : undefined; + let validatedInput = rawInput; + + if (schema) { + const parsed = schema.safeParse(rawInput); + if (!parsed.success) { + res.status(400).json({ + ok: false, + error: { + code: 'VALIDATION_ERROR', + message: parsed.error.issues + .map((i) => + i.path.length > 0 + ? `${i.path.join('.')}: ${i.message}` + : i.message, + ) + .join('; '), + }, + }); + return; + } + validatedInput = parsed.data; + } + + const startTime = Date.now(); + const currentWorkflowContext = workflowContext; + + const category = getToolCategory(toolName); + + try { + const { toolResult, observations } = await queue.enqueue(async () => { + const context = buildToolContext(currentWorkflowContext); + const result = await tool(validatedInput, context); + + let obs: StepRecordObservation | undefined; + if ( + shouldCollectObservations( + category, + validatedInput as Record, + ) && + config.sessionManager.hasActiveSession() + ) { + try { + const page = config.sessionManager.getPage(); + + if (category === 'mutating') { + await page + .waitForLoadState('domcontentloaded') + .catch(() => undefined); + await page + .waitForFunction( + async () => + new Promise((resolve) => { + requestAnimationFrame(() => { + const allSettled = document + .getAnimations() + .every((a: Animation) => a.playState !== 'running'); + resolve(allSettled); + }); + }), + { timeout: 3000 }, + ) + .catch(() => undefined); + } + let state = await config.sessionManager.getExtensionState(); + + // Post-mutation recheck: if currentScreen is 'unknown' after a mutation, + // the extension's internal router may not have updated yet. Poll briefly. + if (category === 'mutating' && state.currentScreen === 'unknown') { + const RECHECK_DEADLINE_MS = 500; + const RECHECK_INTERVAL_MS = 100; + const deadline = Date.now() + RECHECK_DEADLINE_MS; + + while (Date.now() < deadline) { + await new Promise((resolve) => + setTimeout(resolve, RECHECK_INTERVAL_MS), + ); + const rechecked = + await config.sessionManager.getExtensionState(); + if (rechecked.currentScreen !== 'unknown') { + state = rechecked; + break; + } + } + } + const testIds = await collectTestIds( + page, + OBSERVATION_TESTID_LIMIT, + ); + const { nodes, refMap: newRefMap } = + await collectTrimmedA11ySnapshot(page); + config.sessionManager.setRefMap(newRefMap); + obs = createDefaultObservation(state, testIds, nodes); + } catch { + // non-fatal: observation failure must not block the tool response + } + } + + return { toolResult: result, observations: obs }; + }); + + await recordToolStep( + toolName, + validatedInput, + extractToolOutcome(toolResult), + observations, + toolResult, + startTime, + ); + + const includeInResponse = shouldIncludeObservationsInResponse( + category, + toolResult, + validatedInput as Record, + ); + const responseObservations = + includeInResponse && observations + ? compactObservation(observations, lastObservation) + : undefined; + res.json(buildResponseBody(toolResult, responseObservations)); + + if ( + toolName === 'describe_screen' || + toolName === 'launch' || + toolName === 'cleanup' + ) { + lastObservation = null; + } else if (observations) { + lastObservation = observations; + } + } catch (error) { + await recordToolStep( + toolName, + validatedInput, + { + ok: false, + error: { + code: 'TOOL_EXECUTION_FAILED', + message: extractErrorMessage(error), + }, + }, + undefined, + undefined, + startTime, + ); + + res.status(500).json({ + ok: false, + error: { + code: 'TOOL_EXECUTION_FAILED', + message: extractErrorMessage(error), + }, + }); + } + } + + app.post('/launch', async (req, res) => { + await executeTool('launch', req.body, res); + }); + + app.post('/cleanup', async (_req, res) => { + await executeTool('cleanup', {}, res); + }); + + app.post( + '/tool/:name', + async (req: express.Request<{ name: string }>, res) => { + await executeTool(req.params.name, req.body, res); + }, + ); + + app.use( + ( + error: Error, + _req: express.Request, + res: express.Response, + _next: express.NextFunction, + ) => { + appendLog(config.logFilePath, `[ERROR] ${error.message}`); + res.status(500).json({ + ok: false, + error: { + code: 'INTERNAL_ERROR', + message: error.message, + }, + }); + }, + ); + + const instance: ServerInstance = { + async start(): Promise { + worktreeRoot = execSync('git rev-parse --show-toplevel', { + cwd: process.cwd(), + }) + .toString() + .trim(); + + try { + workflowContext = await config.contextFactory(); + } catch (error) { + throw new Error( + `contextFactory failed during server startup: ${error instanceof Error ? error.message : String(error)}`, + { cause: error }, + ); + } + + if ( + !workflowContext || + typeof workflowContext !== 'object' || + !workflowContext.config || + typeof workflowContext.config.environment !== 'string' + ) { + throw new Error( + 'contextFactory must return an object with a valid config.environment field', + ); + } + + const rawPorts = workflowContext.allocatedPorts; + if (rawPorts !== undefined) { + if (typeof rawPorts !== 'object' || rawPorts === null) { + throw new Error('allocatedPorts must be a plain object'); + } + for (const [key, val] of Object.entries(rawPorts)) { + if (typeof val !== 'number' || !Number.isFinite(val)) { + throw new Error( + `allocatedPorts["${key}"] must be a finite number, got ${String(val)}`, + ); + } + } + } + + subPorts = workflowContext.allocatedPorts ?? {}; + config.sessionManager.setWorkflowContext(workflowContext); + startedAt = new Date().toISOString(); + + // Everything after setWorkflowContext may have side-effects the + // consumer expects to be cleaned up. Wrap in try/catch so a + // listen() or writeDaemonState() failure still runs cleanup. + try { + // Bind daemon directly to port 0 to eliminate TOCTOU race — + // the OS assigns the port atomically at listen time. + httpServer = await new Promise((resolve, reject) => { + const srv = http.createServer(app); + srv.listen(0, '127.0.0.1', () => { + const addr = srv.address(); + if (addr && typeof addr !== 'string') { + daemonPort = addr.port; + } + resolve(srv); + }); + srv.on('error', reject); + }); + + const state: DaemonState = { + port: daemonPort, + pid: process.pid, + startedAt, + nonce, + version: pkg.version, + subPorts, + }; + + await writeDaemonState(worktreeRoot, state); + appendLog( + config.logFilePath, + `[INFO] Daemon started on port ${daemonPort} (pid ${process.pid})`, + ); + + shutdownHandler = (): void => { + instance + .stop() + .then(() => process.exit(0)) + .catch((error: Error) => { + appendLog( + config.logFilePath, + `[ERROR] Daemon failed to shut down: ${error.message}`, + ); + process.exit(1); + }); + }; + + process.on('SIGTERM', shutdownHandler); + process.on('SIGINT', shutdownHandler); + + const { idleShutdownMs } = config; + if (idleShutdownMs && idleShutdownMs > 0) { + const checkMs = Math.min(idleShutdownMs / 10, 60_000); + idleCheckInterval = setInterval(() => { + if (Date.now() - lastRequestTime > idleShutdownMs) { + appendLog( + config.logFilePath, + '[INFO] Idle timeout reached, shutting down', + ); + if (idleCheckInterval) { + clearInterval(idleCheckInterval); + idleCheckInterval = null; + } + shutdownHandler?.(); + } + }, checkMs); + idleCheckInterval.unref(); + } + + return state; + } catch (startupError) { + // Best-effort rollback: close the HTTP server if it was created, + // then let the session manager clean up any resources the + // contextFactory may have started. + const serverToClose = httpServer; + if (serverToClose) { + await new Promise((resolve) => { + serverToClose.close(() => { + httpServer = null; + resolve(); + }); + }); + } + try { + await config.sessionManager.cleanup(); + } catch { + // Swallow — we're already propagating startupError. + } + workflowContext = null; // eslint-disable-line require-atomic-updates + subPorts = {}; + throw startupError; + } + }, + + async stop(): Promise { + if (shuttingDown) { + return; + } + shuttingDown = true; + + appendLog(config.logFilePath, '[INFO] Daemon shutting down'); + + // 1. Remove signal handlers + if (shutdownHandler) { + process.removeListener('SIGTERM', shutdownHandler); + process.removeListener('SIGINT', shutdownHandler); + shutdownHandler = null; + } + + // 2. Clear idle check interval + if (idleCheckInterval) { + clearInterval(idleCheckInterval); + idleCheckInterval = null; + } + + // 3. Stop accepting new connections, wait for in-flight (max 10s) + await new Promise((resolve) => { + if (!httpServer) { + resolve(); + return; + } + + const forceClose = setTimeout(() => { + httpServer?.closeAllConnections(); + resolve(); + }, 10_000); + + httpServer.close(() => { + clearTimeout(forceClose); + httpServer = null; + resolve(); + }); + }); + + // 4. Clean up session + try { + await config.sessionManager.cleanup(); + } catch (error) { + appendLog( + config.logFilePath, + `[ERROR] Cleanup failed: ${extractErrorMessage(error)}`, + true, + ); + } + + // 5. Remove .mm-server file + if (worktreeRoot) { + await removeDaemonState(worktreeRoot); + } + + appendLog(config.logFilePath, '[INFO] Daemon stopped'); + }, + }; + + return instance; +} + +/** + * Appends a timestamped line to the daemon log file. + * + * @param logFilePath - Path to the log file, or undefined to skip file logging. + * @param message - The log message to append. + * @param fatal - Whether to also write to stderr. + */ +function appendLog( + logFilePath: string | undefined, + message: string, + fatal = false, +): void { + const line = `[${new Date().toISOString()}] ${message}\n`; + if (fatal) { + process.stderr.write(line); + } + if (logFilePath) { + fs.appendFile(logFilePath, line, 'utf-8').catch((error) => { + process.stderr.write(`Failed to write log: ${error.message}\n`); + }); + } +} diff --git a/src/server/daemon-state.test.ts b/src/server/daemon-state.test.ts new file mode 100644 index 0000000..f8a26be --- /dev/null +++ b/src/server/daemon-state.test.ts @@ -0,0 +1,233 @@ +/* eslint-disable n/no-unsupported-features/node-builtins */ +import * as fs from 'node:fs/promises'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; + +import { + writeDaemonState, + readDaemonState, + removeDaemonState, + acquireStartupLock, + releaseStartupLock, + isDaemonAlive, + isDaemonVersionMatch, + generateNonce, +} from './daemon-state.js'; +import pkg from '../../package.json'; +import type { DaemonState } from '../types/http.js'; + +const tmpDir = path.join(os.tmpdir(), `mm-daemon-state-test-${Date.now()}`); + +const mockState: DaemonState = { + port: 12345, + pid: process.pid, + startedAt: new Date().toISOString(), + nonce: 'test-nonce-abc', + version: pkg.version, + subPorts: { serviceA: 3001, serviceB: 3002 }, +}; + +describe('daemon-state', () => { + beforeEach(async () => { + await fs.mkdir(tmpDir, { recursive: true }); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + describe('writeDaemonState / readDaemonState', () => { + it('writes and reads state atomically', async () => { + await writeDaemonState(tmpDir, mockState); + const read = await readDaemonState(tmpDir); + expect(read).toStrictEqual(mockState); + }); + + it('overwrites existing state', async () => { + await writeDaemonState(tmpDir, mockState); + const updated: DaemonState = { ...mockState, port: 99999 }; + await writeDaemonState(tmpDir, updated); + const read = await readDaemonState(tmpDir); + expect(read?.port).toBe(99999); + }); + }); + + describe('readDaemonState', () => { + it('returns null when file does not exist', async () => { + const result = await readDaemonState(tmpDir); + expect(result).toBeNull(); + }); + + it('returns null for invalid JSON', async () => { + await fs.writeFile(path.join(tmpDir, '.mm-server'), 'not-json', 'utf-8'); + const result = await readDaemonState(tmpDir); + expect(result).toBeNull(); + }); + }); + + describe('removeDaemonState', () => { + it('removes the state file', async () => { + await writeDaemonState(tmpDir, mockState); + await removeDaemonState(tmpDir); + const result = await readDaemonState(tmpDir); + expect(result).toBeNull(); + }); + + it('does not throw when file does not exist', async () => { + expect(await removeDaemonState(tmpDir)).toBeUndefined(); + }); + }); + + describe('isDaemonAlive', () => { + it('returns false for an unreachable port', async () => { + const alive = await isDaemonAlive({ ...mockState, port: 1 }); + expect(alive).toBe(false); + }); + + it('returns false when response.ok is false', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: false, + } as Response); + + const alive = await isDaemonAlive(mockState); + + expect(alive).toBe(false); + }); + + it('returns false when nonce does not match', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: vi.fn().mockResolvedValue({ nonce: 'different-nonce' }), + } as unknown as Response); + + const alive = await isDaemonAlive({ + ...mockState, + nonce: 'expected-nonce', + }); + + expect(alive).toBe(false); + }); + }); + + describe('acquireStartupLock / releaseStartupLock', () => { + it('creates the lock file and writes the current pid', async () => { + const acquired = await acquireStartupLock(tmpDir); + + expect(acquired).toBe(true); + expect( + await fs.readFile(path.join(tmpDir, '.mm-server.lock'), 'utf-8'), + ).toBe(`${process.pid}\n`); + }); + + it('returns false when another process holds a fresh lock', async () => { + await fs.writeFile( + path.join(tmpDir, '.mm-server.lock'), + `${process.pid}\n`, + ); + + const acquired = await acquireStartupLock(tmpDir); + + expect(acquired).toBe(false); + }); + + it('reclaims a stale lock by age', async () => { + const lockPath = path.join(tmpDir, '.mm-server.lock'); + const staleTime = new Date(Date.now() - 31_000); + + await fs.writeFile(lockPath, `${process.pid}\n`); + await fs.utimes(lockPath, staleTime, staleTime); + + const acquired = await acquireStartupLock(tmpDir); + + expect(acquired).toBe(true); + expect(await fs.readFile(lockPath, 'utf-8')).toBe(`${process.pid}\n`); + }); + + it('reclaims a stale lock for a dead pid', async () => { + const lockPath = path.join(tmpDir, '.mm-server.lock'); + + await fs.writeFile(lockPath, '999999\n'); + + const acquired = await acquireStartupLock(tmpDir); + + expect(acquired).toBe(true); + expect(await fs.readFile(lockPath, 'utf-8')).toBe(`${process.pid}\n`); + }); + + it('returns false when stale lock check errors', async () => { + await fs.writeFile(path.join(tmpDir, '.mm-server.lock'), '12345\n'); + await fs.chmod(path.join(tmpDir, '.mm-server.lock'), 0o000); + + const acquired = await acquireStartupLock(tmpDir); + + expect(acquired).toBe(false); + }); + + it('throws when lock creation fails with a non-EEXIST error', async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + + await expect(acquireStartupLock(tmpDir)).rejects.toMatchObject({ + code: 'ENOENT', + }); + }); + + it('removes the lock file', async () => { + const lockPath = path.join(tmpDir, '.mm-server.lock'); + + await fs.writeFile(lockPath, `${process.pid}\n`); + await releaseStartupLock(tmpDir); + + await expect(fs.access(lockPath)).rejects.toMatchObject({ + code: 'ENOENT', + }); + }); + + it('ignores ENOENT when releasing the lock', async () => { + expect(await releaseStartupLock(tmpDir)).toBeUndefined(); + }); + + it('throws when lock release fails with a non-ENOENT error', async () => { + await fs.mkdir(path.join(tmpDir, '.mm-server.lock')); + + // Linux returns EISDIR, macOS returns EPERM for unlink on a directory + await expect(releaseStartupLock(tmpDir)).rejects.toMatchObject({ + code: expect.stringMatching(/^(EPERM|EISDIR)$/u), + }); + }); + }); + + describe('isDaemonVersionMatch', () => { + it('returns true when version matches package.json version', () => { + expect(isDaemonVersionMatch(mockState)).toBe(true); + }); + + it('returns false when version differs', () => { + expect(isDaemonVersionMatch({ ...mockState, version: '0.0.0' })).toBe( + false, + ); + }); + + it('returns false when version is absent (pre-version-tracking daemon)', () => { + const { version: _, ...stateWithoutVersion } = mockState; + expect(isDaemonVersionMatch(stateWithoutVersion as DaemonState)).toBe( + false, + ); + }); + }); + + describe('generateNonce', () => { + it('returns a non-empty string', () => { + const nonce = generateNonce(); + expect(typeof nonce).toBe('string'); + expect(nonce.length).toBeGreaterThan(0); + }); + + it('returns unique values on successive calls', () => { + const a = generateNonce(); + const b = generateNonce(); + expect(a).not.toBe(b); + }); + }); +}); diff --git a/src/server/daemon-state.ts b/src/server/daemon-state.ts new file mode 100644 index 0000000..d3cfe9a --- /dev/null +++ b/src/server/daemon-state.ts @@ -0,0 +1,209 @@ +import { randomUUID } from 'node:crypto'; +import { constants } from 'node:fs'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; + +import pkg from '../../package.json'; +import type { DaemonState } from '../types/http.js'; + +const DAEMON_STATE_FILE = '.mm-server'; +const DAEMON_STATE_TMP_FILE = '.mm-server.tmp'; +const DAEMON_LOCK_FILE = '.mm-server.lock'; +const LOCK_STALE_MS = 30_000; + +/** + * Writes daemon state atomically using rename pattern. + * Writes to .mm-server.tmp first, then renames to .mm-server. + * + * @param worktreeRoot - Absolute path to the git worktree root. + * @param state - The daemon state to persist. + */ +export async function writeDaemonState( + worktreeRoot: string, + state: DaemonState, +): Promise { + const tmpPath = path.join(worktreeRoot, DAEMON_STATE_TMP_FILE); + const finalPath = path.join(worktreeRoot, DAEMON_STATE_FILE); + await fs.writeFile(tmpPath, JSON.stringify(state, null, 2), 'utf-8'); + await fs.rename(tmpPath, finalPath); +} + +/** + * Reads daemon state from .mm-server file. + * Returns null if file doesn't exist, JSON is invalid, or required fields are missing. + * + * @param worktreeRoot - Absolute path to the git worktree root. + * @returns The parsed daemon state, or null if unavailable. + */ +export async function readDaemonState( + worktreeRoot: string, +): Promise { + const filePath = path.join(worktreeRoot, DAEMON_STATE_FILE); + try { + const content = await fs.readFile(filePath, 'utf-8'); + const parsed = JSON.parse(content) as Record; + if ( + typeof parsed.port !== 'number' || + typeof parsed.pid !== 'number' || + typeof parsed.nonce !== 'string' || + typeof parsed.startedAt !== 'string' + ) { + return null; + } + return parsed as DaemonState; + } catch { + return null; + } +} + +/** + * Removes the .mm-server file. + * Silently ignores if file doesn't exist. + * + * @param worktreeRoot - Absolute path to the git worktree root. + */ +export async function removeDaemonState(worktreeRoot: string): Promise { + const filePath = path.join(worktreeRoot, DAEMON_STATE_FILE); + try { + await fs.unlink(filePath); + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + throw error; + } + } +} + +/** + * Checks if a daemon is alive by sending GET /health and verifying the nonce. + * Returns false if connection refused, timeout, or nonce mismatch. + * + * @param state - The daemon state containing port and nonce to verify. + * @returns Whether the daemon is responding and matches the expected nonce. + */ +export async function isDaemonAlive(state: DaemonState): Promise { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 2000); + try { + const response = await fetch(`http://127.0.0.1:${state.port}/health`, { + signal: controller.signal, + }); + if (!response.ok) { + return false; + } + const body = (await response.json()) as { nonce?: string }; + return body.nonce === state.nonce; + } finally { + clearTimeout(timeout); + } + } catch { + return false; + } +} + +/** + * Checks whether the daemon's package version matches the current CLI version. + * Returns false if the daemon state has no version (pre-version-tracking daemon). + * + * @param state - The daemon state to check. + * @returns Whether the versions match. + */ +export function isDaemonVersionMatch(state: DaemonState): boolean { + return state.version === pkg.version; +} + +/** + * Generates a new random nonce for daemon identification. + * + * @returns A UUID string. + */ +export function generateNonce(): string { + return randomUUID(); +} + +/** + * Acquires an exclusive startup lock for the worktree. + * Uses O_CREAT | O_EXCL to atomically create the lock file — if it already + * exists, checks whether the lock is stale (dead PID or older than 30s) + * and reclaims it if so. + * + * @param worktreeRoot - Absolute path to the git worktree root. + * @returns true if the lock was acquired, false if another process holds it. + */ +export async function acquireStartupLock( + worktreeRoot: string, +): Promise { + const lockPath = path.join(worktreeRoot, DAEMON_LOCK_FILE); + try { + // eslint-disable-next-line no-bitwise + const flags = constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY; + const fd = await fs.open(lockPath, flags); + await fd.write(`${process.pid}\n`); + await fd.close(); + return true; + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'EEXIST') { + if (await isLockStale(lockPath)) { + try { + await fs.unlink(lockPath); + } catch { + return false; + } + return acquireStartupLock(worktreeRoot); + } + return false; + } + throw error; + } +} + +/** + * Checks whether a lock file is stale by examining PID liveness and file age. + * + * @param lockPath - Absolute path to the lock file. + * @returns true if the lock holder is dead or the file is older than LOCK_STALE_MS. + */ +async function isLockStale(lockPath: string): Promise { + try { + const [content, stat] = await Promise.all([ + fs.readFile(lockPath, 'utf-8'), + fs.stat(lockPath), + ]); + + const ageMs = Date.now() - stat.mtimeMs; + if (ageMs > LOCK_STALE_MS) { + return true; + } + + const pid = parseInt(content.trim(), 10); + if (!isNaN(pid)) { + try { + process.kill(pid, 0); + return false; + } catch { + return true; + } + } + + return false; + } catch { + return false; + } +} + +/** + * Releases the startup lock for the worktree. + * Silently ignores if the lock file doesn't exist. + * + * @param worktreeRoot - Absolute path to the git worktree root. + */ +export async function releaseStartupLock(worktreeRoot: string): Promise { + const lockPath = path.join(worktreeRoot, DAEMON_LOCK_FILE); + try { + await fs.unlink(lockPath); + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + throw error; + } + } +} diff --git a/src/server/observation-compaction.test.ts b/src/server/observation-compaction.test.ts new file mode 100644 index 0000000..033c41f --- /dev/null +++ b/src/server/observation-compaction.test.ts @@ -0,0 +1,561 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { + collapseOptionSubtrees, + compactObservation, + diffObservation, + nodeChanged, + observationCompactionDeps, +} from './observation-compaction.js'; +import type { A11yNodeTrimmed } from '../tools/types/discovery.js'; +import type { StepRecordObservation } from '../tools/types/step-record.js'; + +function createNode( + ref: string, + role: string, + overrides: Partial = {}, +): A11yNodeTrimmed { + return { + ref, + role, + name: overrides.name ?? `${role}-${ref}`, + path: overrides.path ?? ['root', ref], + ...overrides, + }; +} + +function createOptionRun(count: number, start = 1): A11yNodeTrimmed[] { + return Array.from({ length: count }, (_, index) => { + const refNumber = start + index; + return createNode(`e${refNumber}`, 'option', { + name: `Option ${refNumber}`, + path: ['root', 'combo', `option-${refNumber}`], + }); + }); +} + +function createObservation( + nodes: A11yNodeTrimmed[], + overrides: Partial = {}, +): StepRecordObservation { + return { + state: overrides.state ?? ({} as StepRecordObservation['state']), + testIds: overrides.testIds ?? [], + a11y: { + nodes, + ...(overrides.a11y?.diff ? { diff: overrides.a11y.diff } : {}), + }, + ...(overrides.priorKnowledge + ? { priorKnowledge: overrides.priorKnowledge } + : {}), + } as StepRecordObservation; +} + +describe('collapseOptionSubtrees', () => { + it('collapses 55 options after a combobox into a summary node', () => { + const combobox = createNode('e1', 'combobox', { + name: 'Select network', + path: ['root', 'combobox'], + }); + const nodes = [combobox, ...createOptionRun(55, 2)]; + + const result = collapseOptionSubtrees(nodes); + + expect(result).toHaveLength(2); + expect(result[0]).toBe(combobox); + expect(result[1]).toStrictEqual({ + ref: 'e2\u2013e56', + role: 'option', + name: '55 options (refs e2\u2013e56)', + path: ['root', 'combo', 'option-2'], + }); + }); + + it('does not collapse runs below the threshold', () => { + const combobox = createNode('e1', 'combobox'); + const optionOne = createNode('e2', 'option'); + const optionTwo = createNode('e3', 'option'); + + const result = collapseOptionSubtrees([combobox, optionOne, optionTwo]); + + expect(result).toHaveLength(3); + expect(result).toStrictEqual([combobox, optionOne, optionTwo]); + }); + + it('leaves bare options unchanged when no combobox or listbox precedes them', () => { + const options = createOptionRun(4); + + const result = collapseOptionSubtrees(options); + + expect(result).toStrictEqual(options); + }); + + it('handles multiple combobox and listbox groups independently', () => { + const firstCombobox = createNode('e1', 'combobox', { + path: ['root', 'first-combobox'], + }); + const separator = createNode('e12', 'button', { + name: 'Continue', + path: ['root', 'separator'], + }); + const secondListbox = createNode('e13', 'listbox', { + path: ['root', 'second-listbox'], + }); + const nodes = [ + firstCombobox, + ...createOptionRun(10, 2), + separator, + secondListbox, + ...createOptionRun(5, 14), + ]; + + const result = collapseOptionSubtrees(nodes); + + expect(result).toStrictEqual([ + firstCombobox, + { + ref: 'e2\u2013e11', + role: 'option', + name: '10 options (refs e2\u2013e11)', + path: ['root', 'combo', 'option-2'], + }, + separator, + secondListbox, + { + ref: 'e14\u2013e18', + role: 'option', + name: '5 options (refs e14\u2013e18)', + path: ['root', 'combo', 'option-14'], + }, + ]); + }); + + it('preserves already-collapsed option summaries as a single entry', () => { + const combobox = createNode('e1', 'combobox'); + const collapsedSummary = createNode('e2\u2013e6', 'option', { + name: '5 options (refs e2\u2013e6)', + path: ['root', 'combo', 'option-2'], + }); + + const result = collapseOptionSubtrees([combobox, collapsedSummary]); + + expect(result).toStrictEqual([combobox, collapsedSummary]); + }); + + it('does not collapse when a non-option node immediately follows the combobox', () => { + const combobox = createNode('e1', 'combobox'); + const button = createNode('e2', 'button', { name: 'Apply' }); + const options = createOptionRun(3, 3); + + const result = collapseOptionSubtrees([combobox, button, ...options]); + + expect(result).toStrictEqual([combobox, button, ...options]); + }); + + it('treats malformed option range refs as single options during compaction', () => { + const combobox = createNode('e1', 'combobox'); + const malformedSummary = createNode( + `e${'9'.repeat(400)}\u2013e2`, + 'option', + { + name: 'Malformed range', + path: ['root', 'combo', 'option-weird'], + }, + ); + const optionTwo = createNode('e3', 'option', { + name: 'Option 3', + path: ['root', 'combo', 'option-3'], + }); + const optionThree = createNode('e4', 'option', { + name: 'Option 4', + path: ['root', 'combo', 'option-4'], + }); + + const result = collapseOptionSubtrees([ + combobox, + malformedSummary, + optionTwo, + optionThree, + ]); + + expect(result).toStrictEqual([ + combobox, + { + ref: `${malformedSummary.ref}\u2013e4`, + role: 'option', + name: `3 options (refs ${malformedSummary.ref}\u2013e4)`, + path: ['root', 'combo', 'option-weird'], + }, + ]); + }); +}); + +describe('compactObservation', () => { + it('preserves non-a11y fields by reference while returning a new object', () => { + const state = { connected: true }; + const testIds = [{ testId: 'submit', tag: 'button', visible: true }]; + const priorKnowledge = { schemaVersion: 1, notes: ['cached'] }; + const observation = { + state, + testIds, + a11y: { + nodes: [createNode('e1', 'combobox'), ...createOptionRun(4, 2)], + }, + priorKnowledge, + } as unknown as StepRecordObservation; + + const result = compactObservation(observation); + + expect(result).not.toBe(observation); + expect(result.state).toBe(state); + expect(result.testIds).toBe(testIds); + expect(result.priorKnowledge).toBe(priorKnowledge); + expect(result.a11y).not.toBe(observation.a11y); + expect(result.a11y.nodes).toStrictEqual([ + observation.a11y.nodes[0], + { + ref: 'e2\u2013e5', + role: 'option', + name: '4 options (refs e2\u2013e5)', + path: ['root', 'combo', 'option-2'], + }, + ]); + }); + + it('is idempotent when called repeatedly on the same result', () => { + const observation = { + state: {}, + testIds: [], + a11y: { + nodes: [createNode('e1', 'listbox'), ...createOptionRun(6, 2)], + }, + } as unknown as StepRecordObservation; + + const first = compactObservation(observation); + const second = compactObservation(first); + + expect(second).toStrictEqual(first); + }); + + it('returns a diff when the previous observation produces a smaller payload', () => { + const previous = createObservation([ + createNode('e1', 'button', { name: 'Continue' }), + createNode('e2', 'button', { name: 'Cancel' }), + ]); + const current = createObservation([ + createNode('e1', 'button', { name: 'Continue' }), + createNode('e3', 'button', { name: 'Confirm' }), + ]); + + const result = compactObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([ + createNode('e3', 'button', { name: 'Confirm' }), + ]); + expect(result.a11y.diff).toStrictEqual({ + added: ['e3'], + removed: ['e2'], + unchanged: 1, + }); + }); + + it('returns the full compacted observation when the diff is not smaller', () => { + const previous = createObservation([createNode('e1', 'button')]); + const current = createObservation( + Array.from({ length: 10 }, (_, index) => + createNode(`e${index + 10}`, 'button', { name: `Action ${index + 1}` }), + ), + ); + + const result = compactObservation(current, previous); + + expect(result.a11y.diff).toBeUndefined(); + expect(result.a11y.nodes).toStrictEqual(current.a11y.nodes); + }); + + it('falls back to the original observation when compaction throws', () => { + const observation = { + state: {}, + testIds: [], + a11y: { nodes: [createNode('e1', 'combobox')] }, + } as unknown as StepRecordObservation; + const collapseSpy = vi + .spyOn(observationCompactionDeps, 'collapseOptionSubtrees') + .mockImplementation(() => { + throw new Error('boom'); + }); + + const result = compactObservation(observation); + + expect(result).toBe(observation); + collapseSpy.mockRestore(); + }); + + it('handles empty node arrays gracefully', () => { + const observation = { + state: {}, + testIds: [], + a11y: { nodes: [] }, + } as unknown as StepRecordObservation; + + const result = compactObservation(observation); + + expect(result).not.toBe(observation); + expect(result.a11y.nodes).toStrictEqual([]); + }); + + describe('activeTab passthrough', () => { + it('preserves state.activeTab when present', () => { + const state = { + isLoaded: true, + currentUrl: 'chrome-extension://extension/home.html', + extensionId: 'extension-id', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x123', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '1 ETH', + activeTab: { + role: 'dapp', + url: 'https://app.uniswap.org/', + title: 'Uniswap', + }, + } satisfies StepRecordObservation['state']; + const observation = createObservation( + [createNode('e1', 'listbox'), ...createOptionRun(6, 2)], + { state }, + ); + + const result = compactObservation(observation); + + expect(result.state).toBe(state); + expect(result.state.activeTab).toStrictEqual(state.activeTab); + }); + + it('works when state.activeTab is undefined (backward compat)', () => { + const state = { + isLoaded: true, + currentUrl: 'chrome-extension://extension/home.html', + extensionId: 'extension-id', + isUnlocked: false, + currentScreen: 'unlock', + accountAddress: null, + networkName: null, + chainId: null, + balance: null, + } satisfies StepRecordObservation['state']; + const observation = createObservation( + [createNode('e1', 'listbox'), ...createOptionRun(6, 2)], + { state }, + ); + + const result = compactObservation(observation); + + expect(result.state).toBe(state); + expect(result.state.activeTab).toBeUndefined(); + }); + }); +}); + +describe('nodeChanged', () => { + it('returns true when the name changes', () => { + const previous = createNode('e1', 'button', { name: 'Continue' }); + const current = createNode('e1', 'button', { name: 'Confirm' }); + + expect(nodeChanged(current, previous)).toBe(true); + }); + + it('returns true when the role changes', () => { + const previous = createNode('e1', 'button'); + const current = createNode('e1', 'link'); + + expect(nodeChanged(current, previous)).toBe(true); + }); + + it('returns true when the path changes', () => { + const previous = createNode('e1', 'button', { path: ['root', 'page'] }); + const current = createNode('e1', 'button', { + path: ['root', 'dialog', 'page'], + }); + + expect(nodeChanged(current, previous)).toBe(true); + }); + + it('returns true when the disabled state changes', () => { + const previous = createNode('e1', 'button', { disabled: false }); + const current = createNode('e1', 'button', { disabled: true }); + + expect(nodeChanged(current, previous)).toBe(true); + }); + + it('returns false for identical nodes', () => { + const previous = createNode('e1', 'checkbox', { + checked: true, + expanded: false, + testId: 'accept', + textContent: 'Accept terms', + path: ['root', 'form', 'accept'], + }); + const current = createNode('e1', 'checkbox', { + checked: true, + expanded: false, + testId: 'accept', + textContent: 'Accept terms', + path: ['root', 'form', 'accept'], + }); + + expect(nodeChanged(current, previous)).toBe(false); + }); + + it('does not compare refs', () => { + const previous = createNode('e1', 'button', { + name: 'Continue', + path: ['root', 'actions'], + }); + const current = createNode('e999', 'button', { + name: 'Continue', + path: ['root', 'actions'], + }); + + expect(nodeChanged(current, previous)).toBe(false); + }); +}); + +describe('diffObservation', () => { + it('tracks added nodes and omits unchanged nodes from the diff payload', () => { + const stable = createNode('e1', 'button', { name: 'Continue' }); + const added = createNode('e2', 'button', { name: 'Cancel' }); + const previous = createObservation([stable]); + const current = createObservation([stable, added]); + + const result = diffObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([added]); + expect(result.a11y.diff).toStrictEqual({ + added: ['e2'], + removed: [], + unchanged: 1, + }); + }); + + it('tracks removed nodes without including them in nodes', () => { + const stable = createNode('e1', 'button', { name: 'Continue' }); + const removed = createNode('e2', 'button', { name: 'Cancel' }); + const previous = createObservation([stable, removed]); + const current = createObservation([stable]); + + const result = diffObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([]); + expect(result.a11y.diff).toStrictEqual({ + added: [], + removed: ['e2'], + unchanged: 1, + }); + }); + + it('includes changed nodes without marking them as added or removed', () => { + const previous = createObservation([ + createNode('e1', 'button', { disabled: false, name: 'Continue' }), + ]); + const changed = createNode('e1', 'button', { + disabled: true, + name: 'Continue', + }); + const current = createObservation([changed]); + + const result = diffObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([changed]); + expect(result.a11y.diff).toStrictEqual({ + added: [], + removed: [], + unchanged: 0, + }); + }); + + it('returns an empty diff payload when nothing changed', () => { + const previous = createObservation([ + createNode('e1', 'button'), + createNode('e2', 'checkbox', { checked: true }), + ]); + const current = createObservation([ + createNode('e1', 'button'), + createNode('e2', 'checkbox', { checked: true }), + ]); + + const result = diffObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([]); + expect(result.a11y.diff).toStrictEqual({ + added: [], + removed: [], + unchanged: 2, + }); + }); + + it('supports mixed added, removed, changed, and unchanged nodes', () => { + const unchangedNodes = Array.from({ length: 5 }, (_, index) => + createNode(`u${index + 1}`, 'button', { name: `Stable ${index + 1}` }), + ); + const previous = createObservation([ + ...unchangedNodes, + createNode('c1', 'button', { disabled: false, name: 'Changed' }), + createNode('r1', 'button', { name: 'Removed' }), + ]); + const changed = createNode('c1', 'button', { + disabled: true, + name: 'Changed', + }); + const addedOne = createNode('a1', 'button', { name: 'Added 1' }); + const addedTwo = createNode('a2', 'button', { name: 'Added 2' }); + const current = createObservation([ + ...unchangedNodes, + changed, + addedOne, + addedTwo, + ]); + + const result = diffObservation(current, previous); + + expect(result.a11y.nodes).toStrictEqual([changed, addedOne, addedTwo]); + expect(result.a11y.diff).toStrictEqual({ + added: ['a1', 'a2'], + removed: ['r1'], + unchanged: 5, + }); + }); + + it('preserves the current state and testIds', () => { + const state = { + mode: 'current', + } as unknown as StepRecordObservation['state']; + const testIds = [{ testId: 'submit', tag: 'button', visible: true }]; + const previous = createObservation([]); + const current = createObservation([createNode('e1', 'button')], { + state, + testIds, + }); + + const result = diffObservation(current, previous); + + expect(result.state).toBe(state); + expect(result.testIds).toBe(testIds); + }); + + it('preserves the current priorKnowledge', () => { + const priorKnowledge = { + schemaVersion: 1, + notes: ['cached'], + } as unknown as StepRecordObservation['priorKnowledge']; + const previous = createObservation([]); + const current = createObservation([createNode('e1', 'button')], { + priorKnowledge, + }); + + const result = diffObservation(current, previous); + + expect(result.priorKnowledge).toBe(priorKnowledge); + }); +}); diff --git a/src/server/observation-compaction.ts b/src/server/observation-compaction.ts new file mode 100644 index 0000000..e525c8d --- /dev/null +++ b/src/server/observation-compaction.ts @@ -0,0 +1,261 @@ +import type { A11yNodeTrimmed } from '../tools/types/discovery.js'; +import type { StepRecordObservation } from '../tools/types/step-record.js'; +import { OPTION_COLLAPSE_MIN_COUNT } from '../tools/utils/constants.js'; + +const OPTION_RANGE_PATTERN = + /^(?[^\d]+)(?\d+)\u2013\k(?\d+)$/u; + +type RefRange = { + firstRef: string; + lastRef: string; + count: number; +}; + +/** + * Parses a ref string into its first/last ref and total node count. + * Handles range refs like "e2–e6" from collapseIdenticalRuns, returning + * the spanning range and the count of individual nodes it represents. + * + * @param ref - A node ref string, either a simple ref (e.g. "e3") or a range (e.g. "e2–e6"). + * @returns The first ref, last ref, and total count of nodes the ref represents. + */ +function parseRefRange(ref: string): RefRange { + const match = OPTION_RANGE_PATTERN.exec(ref); + if (!match?.groups) { + return { firstRef: ref, lastRef: ref, count: 1 }; + } + + const { prefix, start, end } = match.groups; + const startIndex = Number(start); + const endIndex = Number(end); + + if (!Number.isFinite(startIndex) || !Number.isFinite(endIndex)) { + return { firstRef: ref, lastRef: ref, count: 1 }; + } + + return { + firstRef: `${prefix}${start}`, + lastRef: `${prefix}${end}`, + count: Math.abs(endIndex - startIndex) + 1, + }; +} + +/** + * Builds a summary node representing a collapsed group of option nodes. + * + * @param nodes - Array of option nodes to summarize. + * @returns A single summary node representing the collapsed options. + */ +function buildOptionSummary(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed { + const firstRange = parseRefRange(nodes[0].ref); + const lastRange = parseRefRange(nodes[nodes.length - 1].ref); + const optionCount = nodes.reduce( + (count, node) => count + parseRefRange(node.ref).count, + 0, + ); + const refRange = `${firstRange.firstRef}\u2013${lastRange.lastRef}`; + + return { + ref: refRange, + role: 'option', + name: `${optionCount} options (refs ${refRange})`, + path: nodes[0].path, + }; +} + +export const observationCompactionDeps = { + collapseOptionSubtrees(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed[] { + const collapsed: A11yNodeTrimmed[] = []; + let cursor = 0; + + while (cursor < nodes.length) { + const current = nodes[cursor]; + + if (current.role !== 'combobox' && current.role !== 'listbox') { + collapsed.push(current); + cursor += 1; + continue; + } + + collapsed.push(current); + cursor += 1; + + const optionNodes: A11yNodeTrimmed[] = []; + while (cursor < nodes.length && nodes[cursor].role === 'option') { + optionNodes.push(nodes[cursor]); + cursor += 1; + } + + if (optionNodes.length === 0) { + continue; + } + + const optionCount = optionNodes.reduce( + (count, node) => count + parseRefRange(node.ref).count, + 0, + ); + + if (optionCount >= OPTION_COLLAPSE_MIN_COUNT) { + collapsed.push(buildOptionSummary(optionNodes)); + continue; + } + + collapsed.push(...optionNodes); + } + + return collapsed; + }, +}; + +/** + * Collapses consecutive option nodes immediately beneath combobox/listbox nodes. + * + * @param nodes - Flat accessibility nodes to compact. + * @returns A new node array with large option runs summarized. + */ +export function collapseOptionSubtrees( + nodes: A11yNodeTrimmed[], +): A11yNodeTrimmed[] { + return observationCompactionDeps.collapseOptionSubtrees(nodes); +} + +/** + * Creates a compacted copy of an observation while preserving non-a11y fields. + * + * @param observation - Observation to compact. + * @param previousObservation - Optional previous observation to compute diff against. + * @returns A new compacted observation, or the original observation on failure. + */ +export function compactObservation( + observation: StepRecordObservation, + previousObservation?: StepRecordObservation | null, +): StepRecordObservation { + try { + const optionFiltered: StepRecordObservation = { + ...observation, + a11y: { + ...observation.a11y, + nodes: observationCompactionDeps.collapseOptionSubtrees( + observation.a11y.nodes, + ), + }, + }; + + if (!previousObservation) { + return optionFiltered; + } + + const previousFiltered: StepRecordObservation = { + ...previousObservation, + a11y: { + ...previousObservation.a11y, + nodes: observationCompactionDeps.collapseOptionSubtrees( + previousObservation.a11y.nodes, + ), + }, + }; + + const diffResult = diffObservation(optionFiltered, previousFiltered); + + if (diffResult.a11y.nodes.length >= optionFiltered.a11y.nodes.length) { + return optionFiltered; + } + + return diffResult; + } catch { + return observation; + } +} + +/** + * Checks if two string arrays are equal. + * + * @param left - First array to compare. + * @param right - Second array to compare. + * @returns True if arrays have equal length and identical elements. + */ +function arraysEqual(left: string[], right: string[]): boolean { + return ( + left.length === right.length && left.every((val, idx) => val === right[idx]) + ); +} + +/** + * Checks if two accessibility nodes have changed. + * + * @param a - First node to compare. + * @param b - Second node to compare. + * @returns True if any property differs between the nodes. + */ +export function nodeChanged(a: A11yNodeTrimmed, b: A11yNodeTrimmed): boolean { + return ( + a.name !== b.name || + a.role !== b.role || + a.disabled !== b.disabled || + a.checked !== b.checked || + a.expanded !== b.expanded || + a.testId !== b.testId || + a.textContent !== b.textContent || + !arraysEqual(a.path, b.path) + ); +} + +/** + * Computes the diff between two observations, returning only changed or new nodes. + * + * @param current - The current observation to compare. + * @param previous - The previous observation to compare against. + * @returns A new observation containing only changed/new nodes with diff metadata. + */ +export function diffObservation( + current: StepRecordObservation, + previous: StepRecordObservation, +): StepRecordObservation { + const prevMap = new Map( + previous.a11y.nodes.map((node) => [node.ref, node] as const), + ); + const currMap = new Map( + current.a11y.nodes.map((node) => [node.ref, node] as const), + ); + const changedOrNewNodes: A11yNodeTrimmed[] = []; + const addedRefs: string[] = []; + const removedRefs: string[] = []; + let unchangedCount = 0; + + for (const [ref, currNode] of currMap) { + const prevNode = prevMap.get(ref); + + if (!prevNode) { + addedRefs.push(ref); + changedOrNewNodes.push(currNode); + continue; + } + + if (nodeChanged(currNode, prevNode)) { + changedOrNewNodes.push(currNode); + continue; + } + + unchangedCount += 1; + } + + for (const ref of prevMap.keys()) { + if (!currMap.has(ref)) { + removedRefs.push(ref); + } + } + + return { + state: current.state, + testIds: current.testIds, + a11y: { + nodes: changedOrNewNodes, + diff: { + added: addedRefs, + removed: removedRefs, + unchanged: unchangedCount, + }, + }, + priorKnowledge: current.priorKnowledge, + }; +} diff --git a/src/server/port-allocator.test.ts b/src/server/port-allocator.test.ts new file mode 100644 index 0000000..c2f89c5 --- /dev/null +++ b/src/server/port-allocator.test.ts @@ -0,0 +1,48 @@ +import { describe, it, expect } from 'vitest'; + +import { allocatePort } from './port-allocator.js'; + +describe('allocatePort', () => { + it('returns a valid port number', async () => { + const { port, server } = await allocatePort(); + try { + expect(port).toBeGreaterThan(0); + expect(port).toBeLessThan(65536); + } finally { + server.close(); + } + }); + + it('returns different ports on concurrent calls', async () => { + const [a, b] = await Promise.all([allocatePort(), allocatePort()]); + try { + expect(a.port).not.toBe(b.port); + } finally { + a.server.close(); + b.server.close(); + } + }); + + it('returns a server that is already listening', async () => { + const { server } = await allocatePort(); + try { + expect(server.listening).toBe(true); + } finally { + server.close(); + } + }); + + it('binds to 127.0.0.1', async () => { + const { server } = await allocatePort(); + try { + const address = server.address(); + expect(address).not.toBeNull(); + expect(typeof address).toBe('object'); + if (typeof address === 'object' && address !== null) { + expect(address.address).toBe('127.0.0.1'); + } + } finally { + server.close(); + } + }); +}); diff --git a/src/server/port-allocator.ts b/src/server/port-allocator.ts new file mode 100644 index 0000000..dbc7982 --- /dev/null +++ b/src/server/port-allocator.ts @@ -0,0 +1,27 @@ +import * as net from 'node:net'; + +/** + * Allocates an available port by binding to port 0. + * Returns both the port number AND the bound server to avoid port-grab race conditions. + * The caller is responsible for passing the server to Express or closing it. + * + * @returns The allocated port and bound server. + */ +export async function allocatePort(): Promise<{ + port: number; + server: net.Server; +}> { + return new Promise((resolve, reject) => { + const server = net.createServer(); + server.listen(0, '127.0.0.1', () => { + const address = server.address(); + if (!address || typeof address === 'string') { + server.close(); + reject(new Error('Failed to get server address')); + return; + } + resolve({ port: address.port, server }); + }); + server.on('error', reject); + }); +} diff --git a/src/server/request-queue.test.ts b/src/server/request-queue.test.ts new file mode 100644 index 0000000..7729a3f --- /dev/null +++ b/src/server/request-queue.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect } from 'vitest'; + +import { RequestQueue } from './request-queue.js'; + +async function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +describe('RequestQueue', () => { + it('executes enqueued functions sequentially', async () => { + const queue = new RequestQueue(); + const results: number[] = []; + + await Promise.all([ + queue.enqueue(async () => { + await sleep(30); + results.push(1); + }), + queue.enqueue(async () => { + results.push(2); + }), + queue.enqueue(async () => { + results.push(3); + }), + ]); + + expect(results).toStrictEqual([1, 2, 3]); + }); + + it('returns the value produced by the enqueued function', async () => { + const queue = new RequestQueue(); + const result = await queue.enqueue(async () => 42); + expect(result).toBe(42); + }); + + it('returns values from concurrent enqueues in order', async () => { + const queue = new RequestQueue(); + const [a, b, c] = await Promise.all([ + queue.enqueue(async () => 'first'), + queue.enqueue(async () => 'second'), + queue.enqueue(async () => 'third'), + ]); + + expect(a).toBe('first'); + expect(b).toBe('second'); + expect(c).toBe('third'); + }); + + it('rejects when the function exceeds the timeout', async () => { + const queue = new RequestQueue(50); + + await expect( + queue.enqueue( + async () => new Promise((resolve) => setTimeout(resolve, 500)), + ), + ).rejects.toThrowError('timed out'); + }); + + it('remains functional after a timeout rejection', async () => { + const queue = new RequestQueue(50); + + await queue + .enqueue(async () => new Promise((resolve) => setTimeout(resolve, 500))) + .catch(() => {}); + + const result = await queue.enqueue(async () => 'recovered'); + expect(result).toBe('recovered'); + }); + + it('propagates errors thrown by the enqueued function', async () => { + const queue = new RequestQueue(); + + await expect( + queue.enqueue(async () => { + throw new Error('task failed'); + }), + ).rejects.toThrowError('task failed'); + }); + + it('continues processing after an error in a previous task', async () => { + const queue = new RequestQueue(); + + await queue + .enqueue(async () => { + throw new Error('fail'); + }) + .catch(() => {}); + + const result = await queue.enqueue(async () => 'after-error'); + expect(result).toBe('after-error'); + }); +}); diff --git a/src/server/request-queue.ts b/src/server/request-queue.ts new file mode 100644 index 0000000..79f52b3 --- /dev/null +++ b/src/server/request-queue.ts @@ -0,0 +1,64 @@ +import { debugWarn } from '../utils'; + +/** + * Async mutex for serializing concurrent tool requests. + * Ensures only one tool executes at a time. + */ +export class RequestQueue { + #queue: Promise = Promise.resolve(); + + readonly #timeoutMs: number; + + /** + * @param timeoutMs - Maximum milliseconds a queued task may run. + */ + constructor(timeoutMs = 30_000) { + this.#timeoutMs = timeoutMs; + } + + /** + * Enqueues an async task for serial execution with a timeout. + * + * @param fn - The async function to execute. + * @returns The resolved value of the provided function. + */ + async enqueue(fn: () => Promise): Promise { + let release!: () => void; + const next = new Promise((resolve) => { + release = resolve; + }); + const prev = this.#queue; + this.#queue = next; + await prev; + let timer: ReturnType | undefined; + const fnPromise = fn(); + try { + return await Promise.race([ + fnPromise, + new Promise((_resolve, reject) => { + timer = setTimeout( + () => + reject( + new Error( + `Tool execution timed out after ${this.#timeoutMs}ms`, + ), + ), + this.#timeoutMs, + ); + }), + ]); + } finally { + if (timer !== undefined) { + clearTimeout(timer); + } + // Wait for the task to actually settle before releasing the mutex, + // even after a timeout rejection. This preserves the serialization + // guarantee — the next task cannot start while a timed-out task + // is still running and potentially mutating shared state. + await fnPromise.catch((error) => { + debugWarn('request-queue.enqueue', error); + }); + release(); + } + } +} diff --git a/src/mcp-server/session-manager.ts b/src/server/session-manager.ts similarity index 85% rename from src/mcp-server/session-manager.ts rename to src/server/session-manager.ts index 5de75bc..2ac10fe 100644 --- a/src/mcp-server/session-manager.ts +++ b/src/server/session-manager.ts @@ -1,5 +1,5 @@ /** - * Generic Session Manager Interface for Browser Extension MCP Servers. + * Generic Session Manager Interface for Browser Extension HTTP Servers. * * This module defines the interface that concrete session managers must implement. * The interface abstracts browser session management, page tracking, and extension state. @@ -10,8 +10,10 @@ import type { Page, BrowserContext } from '@playwright/test'; -import type { TabRole, SessionState, SessionMetadata } from './types'; -import type { EnvironmentMode } from '../capabilities/context.js'; +import type { + EnvironmentMode, + WorkflowContext, +} from '../capabilities/context.js'; import type { ExtensionState, BuildCapability, @@ -21,6 +23,7 @@ import type { StateSnapshotCapability, ScreenshotResult, } from '../capabilities/types.js'; +import type { TabRole, SessionState, SessionMetadata } from '../tools/types'; /** * Represents a tracked browser page with its role and URL. @@ -265,6 +268,18 @@ export type ISessionManager = { // Environment Configuration // ----------------------------------------------------------------------------- + /** + * Set the workflow context created by the server's context factory. + * + * Called by `createServer` during startup so that the session manager has + * access to the same capability objects that tools receive. Implementations + * should store the context and expose its capabilities through the + * individual capability getters. + * + * @param context - The workflow context produced by the configured `contextFactory`. + */ + setWorkflowContext(context: WorkflowContext): void; + /** * Get the current environment mode. * @@ -295,47 +310,3 @@ export type ISessionManager = { canSwitchContext: boolean; }; }; - -/** - * Session manager instance holder. - * - * In the core package, this is undefined by default. - * Extension implementations should call setSessionManager() to inject - * their concrete implementation. - */ -let _sessionManager: ISessionManager | undefined; - -/** - * Set the session manager instance. - * - * This should be called by extension-specific code during server initialization. - * - * @param manager The session manager implementation to inject - */ -export function setSessionManager(manager: ISessionManager): void { - _sessionManager = manager; -} - -/** - * Get the session manager instance. - * - * @throws Error if no session manager has been set - * @returns The session manager instance - */ -export function getSessionManager(): ISessionManager { - if (!_sessionManager) { - throw new Error( - 'Session manager not initialized. Call setSessionManager() first.', - ); - } - return _sessionManager; -} - -/** - * Check if a session manager has been set. - * - * @returns True if a session manager is set, false otherwise - */ -export function hasSessionManager(): boolean { - return _sessionManager !== undefined; -} diff --git a/src/tools/batch.test.ts b/src/tools/batch.test.ts new file mode 100644 index 0000000..2cfffab --- /dev/null +++ b/src/tools/batch.test.ts @@ -0,0 +1,627 @@ +import { describe, it, expect, vi } from 'vitest'; + +import { runStepsTool } from './batch.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext, ToolFunction } from '../types/http.js'; + +function createMockContext( + options: { + hasActive?: boolean; + toolRegistry?: Map>; + } = {}, +): ToolContext { + const { hasActive = true, toolRegistry } = options; + + return { + sessionManager: createMockSessionManager({ hasActive }), + page: {} as ToolContext['page'], + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + toolRegistry, + } as unknown as ToolContext; +} + +describe('runStepsTool', () => { + it('returns error when no active session', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await runStepsTool( + { steps: [{ tool: 'click', args: { testId: 'button' } }] }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + + it('returns internal error when tool registry is missing', async () => { + const context = createMockContext(); + + const result = await runStepsTool( + { steps: [{ tool: 'click', args: { testId: 'button' } }] }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INTERNAL_ERROR); + expect(result.error.message).toContain('Tool registry not available'); + } + }); + + it('executes a single step successfully', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'clicked', + }); + const context = createMockContext({ + toolRegistry: new Map([['click', clickHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'click', args: { testId: 'button' } }] }, + context, + ); + + expect(clickHandler).toHaveBeenCalledWith( + { testId: 'button', timeoutMs: 15000 }, + context, + ); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(1); + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: true, + result: 'clicked', + }); + expect(result.result.steps[0].meta.durationMs).toBeGreaterThanOrEqual(0); + expect(result.result.steps[0].meta.timestamp).toStrictEqual( + expect.any(String), + ); + expect(result.result.summary).toMatchObject({ + ok: true, + total: 1, + succeeded: 1, + failed: 0, + }); + } + }); + + it('returns unknown tool error in the step result', async () => { + const context = createMockContext({ toolRegistry: new Map() }); + + const result = await runStepsTool( + { steps: [{ tool: 'unknown_tool', args: {} }] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0]).toMatchObject({ + tool: 'unknown_tool', + ok: false, + error: { + code: ErrorCodes.MM_UNKNOWN_TOOL, + message: 'Unknown tool: unknown_tool', + }, + }); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('records a failed step when a handler returns ok false', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: false, + error: { code: 'MM_CLICK_FAILED', message: 'Click failed' }, + }); + const context = createMockContext({ + toolRegistry: new Map([['click', clickHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'click', args: { testId: 'btn' } }] }, + context, + ); + + expect(clickHandler).toHaveBeenCalledWith( + { testId: 'btn', timeoutMs: 15000 }, + context, + ); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: false, + error: { code: 'MM_CLICK_FAILED', message: 'Click failed' }, + }); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('stops on error when stopOnError is true', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: false, + error: { code: 'ERR', message: 'fail' }, + }); + const typeHandler = vi + .fn() + .mockResolvedValue({ ok: true, result: 'typed' }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: { testId: 'btn' } }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + stopOnError: true, + }, + context, + ); + + expect(clickHandler).toHaveBeenCalledTimes(1); + expect(typeHandler).not.toHaveBeenCalled(); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(1); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('collects multiple step results with mixed outcomes', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'clicked', + }); + const typeHandler = vi.fn().mockResolvedValue({ + ok: false, + error: { code: 'MM_TYPE_FAILED', message: 'Type failed' }, + }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: { testId: 'button' } }, + { tool: 'unknown_tool', args: {} }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + }, + context, + ); + + expect(clickHandler).toHaveBeenCalledTimes(1); + expect(typeHandler).toHaveBeenCalledTimes(1); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(3); + expect(result.result.steps.map((step) => step.ok)).toStrictEqual([ + true, + false, + false, + ]); + expect(result.result.steps[1].error?.code).toBe( + ErrorCodes.MM_UNKNOWN_TOOL, + ); + expect(result.result.steps[2].error?.code).toBe('MM_TYPE_FAILED'); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 3, + succeeded: 1, + failed: 2, + }); + expect(result.result.summary.durationMs).toBeGreaterThanOrEqual(0); + } + }); + + it('records internal error when a handler throws', async () => { + const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout')); + const context = createMockContext({ + toolRegistry: new Map([['click', clickHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'click', args: { testId: 'btn' } }] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: false, + error: { + code: ErrorCodes.MM_INTERNAL_ERROR, + }, + }); + expect(result.result.steps[0].error?.message).toContain('Timeout'); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('stops on error for unknown tool when stopOnError is true', async () => { + const typeHandler = vi + .fn() + .mockResolvedValue({ ok: true, result: 'typed' }); + const context = createMockContext({ + toolRegistry: new Map([['type', typeHandler]]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'unknown_tool', args: {} }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + stopOnError: true, + }, + context, + ); + + expect(typeHandler).not.toHaveBeenCalled(); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(1); + expect(result.result.steps[0]).toMatchObject({ + tool: 'unknown_tool', + ok: false, + error: { + code: ErrorCodes.MM_UNKNOWN_TOOL, + }, + }); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('returns validation error for invalid tool args', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'clicked', + }); + const typeHandler = vi + .fn() + .mockResolvedValue({ ok: true, result: 'typed' }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: {} }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + }, + context, + ); + + expect(clickHandler).not.toHaveBeenCalled(); + expect(typeHandler).toHaveBeenCalledTimes(1); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(2); + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: false, + error: { + code: 'VALIDATION_ERROR', + }, + }); + expect(result.result.steps[0].error?.message).toContain('Exactly one of'); + expect(result.result.steps[1]).toMatchObject({ + tool: 'type', + ok: true, + }); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 2, + succeeded: 1, + failed: 1, + }); + } + }); + + it('stops on validation error when stopOnError is true', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'clicked', + }); + const typeHandler = vi + .fn() + .mockResolvedValue({ ok: true, result: 'typed' }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: {} }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + stopOnError: true, + }, + context, + ); + + expect(clickHandler).not.toHaveBeenCalled(); + expect(typeHandler).not.toHaveBeenCalled(); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(1); + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: false, + error: { + code: 'VALIDATION_ERROR', + }, + }); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('stops on handler throw when stopOnError is true', async () => { + const clickHandler = vi.fn().mockRejectedValue(new Error('Timeout')); + const typeHandler = vi + .fn() + .mockResolvedValue({ ok: true, result: 'typed' }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: { testId: 'btn' } }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + ], + stopOnError: true, + }, + context, + ); + + expect(clickHandler).toHaveBeenCalledTimes(1); + expect(typeHandler).not.toHaveBeenCalled(); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(1); + expect(result.result.steps[0]).toMatchObject({ + tool: 'click', + ok: false, + error: { + code: ErrorCodes.MM_INTERNAL_ERROR, + }, + }); + expect(result.result.steps[0].error?.message).toContain('Timeout'); + expect(result.result.summary).toMatchObject({ + ok: false, + total: 1, + succeeded: 0, + failed: 1, + }); + } + }); + + it('excludes observations when includeObservations is "none"', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: { clicked: true }, + }); + const context = createMockContext({ + toolRegistry: new Map([['click', clickHandler]]), + }); + + const result = await runStepsTool( + { + steps: [{ tool: 'click', args: { testId: 'btn' } }], + includeObservations: 'none', + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0]).not.toHaveProperty('observation'); + } + }); + + it('marks remaining steps as skipped when batchTimeoutMs is exceeded', async () => { + const clickHandler = vi.fn().mockImplementation( + async () => + new Promise((resolve) => { + setTimeout(() => resolve({ ok: true, result: 'clicked' }), 50); + }), + ); + const typeHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'typed', + }); + const context = createMockContext({ + toolRegistry: new Map([ + ['click', clickHandler], + ['type', typeHandler], + ]), + }); + + const result = await runStepsTool( + { + steps: [ + { tool: 'click', args: { testId: 'btn' } }, + { tool: 'type', args: { testId: 'input', text: 'hello' } }, + { tool: 'click', args: { testId: 'submit' } }, + ], + batchTimeoutMs: 1, + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps).toHaveLength(3); + // First step may succeed or be skipped depending on timing + // Steps after deadline should be skipped + const skippedSteps = result.result.steps.filter( + (step) => step.meta.skipped === true, + ); + expect(skippedSteps.length).toBeGreaterThan(0); + skippedSteps.forEach((step) => { + expect(step.ok).toBe(false); + expect(step.error?.code).toBe('MM_BATCH_TIMEOUT'); + }); + expect(result.result.summary.skipped).toBeGreaterThan(0); + } + }); + + it('resolves navigate_home alias to navigate with screen: home', async () => { + const navigateHandler = vi.fn().mockResolvedValue({ + ok: true, + result: { navigated: true }, + }); + const context = createMockContext({ + toolRegistry: new Map([['navigate', navigateHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'navigate_home' }] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0].ok).toBe(true); + } + expect(navigateHandler).toHaveBeenCalledWith({ screen: 'home' }, context); + }); + + it('resolves navigate-home (hyphenated) alias to navigate with screen: home', async () => { + const navigateHandler = vi.fn().mockResolvedValue({ + ok: true, + result: { navigated: true }, + }); + const context = createMockContext({ + toolRegistry: new Map([['navigate', navigateHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'navigate-home' }] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0].ok).toBe(true); + } + expect(navigateHandler).toHaveBeenCalledWith({ screen: 'home' }, context); + }); + + it('resolves navigate_settings alias to navigate with screen: settings', async () => { + const navigateHandler = vi.fn().mockResolvedValue({ + ok: true, + result: { navigated: true }, + }); + const context = createMockContext({ + toolRegistry: new Map([['navigate', navigateHandler]]), + }); + + const result = await runStepsTool( + { steps: [{ tool: 'navigate_settings' }] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.steps[0].ok).toBe(true); + } + expect(navigateHandler).toHaveBeenCalledWith( + { screen: 'settings' }, + context, + ); + }); + + it('normalises within.ref to within.a11yRef in step args', async () => { + const clickHandler = vi.fn().mockResolvedValue({ + ok: true, + result: 'clicked', + }); + const context = createMockContext({ + toolRegistry: new Map([['click', clickHandler]]), + }); + + const result = await runStepsTool( + { + steps: [ + { + tool: 'click', + args: { testId: 'btn', within: { ref: 'e1' } }, + }, + ], + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(clickHandler).toHaveBeenCalledWith( + expect.objectContaining({ + testId: 'btn', + within: { a11yRef: 'e1' }, + }), + context, + ); + } + }); +}); diff --git a/src/tools/batch.ts b/src/tools/batch.ts new file mode 100644 index 0000000..dc88651 --- /dev/null +++ b/src/tools/batch.ts @@ -0,0 +1,251 @@ +import type { RunStepsInput, RunStepsResult, StepResult } from './types'; +import { ErrorCodes } from './types'; +import { createToolError, createToolSuccess } from './utils.js'; +import type { ToolContext, ToolFunction, ToolResponse } from '../types/http.js'; +import { extractErrorMessage } from '../utils'; +import type { ToolName } from '../validation/schemas.js'; +import { toolSchemas } from '../validation/schemas.js'; + +/** Tools whose args include a target selection (a11yRef/testId/selector). */ +const TARGET_TOOLS = new Set(['click', 'type', 'wait_for']); + +/** + * Maps CLI-style compound tool names to their registry name + injected args. + * The CLI handles these conversions for standalone commands, but agents using + * run-steps bypass CLI parsing and may send compound names directly. + */ +const TOOL_ALIASES: Record< + string, + { tool: string; inject: Record } +> = { + navigate_home: { tool: 'navigate', inject: { screen: 'home' } }, + 'navigate-home': { tool: 'navigate', inject: { screen: 'home' } }, + navigate_settings: { tool: 'navigate', inject: { screen: 'settings' } }, + 'navigate-settings': { tool: 'navigate', inject: { screen: 'settings' } }, + navigate_notification: { + tool: 'navigate', + inject: { screen: 'notification' }, + }, + 'navigate-notification': { + tool: 'navigate', + inject: { screen: 'notification' }, + }, +}; + +type NormalisedStep = { + tool: string; + args: Record; +}; + +/** + * Resolves tool aliases and normalises shorthand arg keys. + * + * @param tool - Raw tool name (may be an alias like `navigate_home`). + * @param args - Raw step arguments. + * @returns Resolved tool name and normalised arguments. + */ +function normaliseStep( + tool: string, + args: Record, +): NormalisedStep { + const alias = TOOL_ALIASES[tool]; + const resolvedTool = alias ? alias.tool : tool; + let normalised = alias ? { ...alias.inject, ...args } : args; + + if (TARGET_TOOLS.has(resolvedTool)) { + if ('ref' in normalised && !('a11yRef' in normalised)) { + const { ref, ...rest } = normalised; + normalised = { a11yRef: ref, ...rest }; + } + + if (typeof normalised.within === 'object' && normalised.within !== null) { + const withinObj = normalised.within as Record; + if ('ref' in withinObj && !('a11yRef' in withinObj)) { + const { ref: withinRef, ...withinRest } = withinObj; + normalised = { + ...normalised, + within: { a11yRef: withinRef, ...withinRest }, + }; + } + } + } + + return { tool: resolvedTool, args: normalised }; +} + +/** + * Executes a batch of tool steps sequentially. + * + * @param input - The batch step definitions and options. + * @param context - The tool execution context. + * @returns The aggregated step results and summary. + */ +export async function runStepsTool( + input: RunStepsInput, + context: ToolContext, +): Promise> { + if (!context.sessionManager.hasActiveSession()) { + return createToolError( + ErrorCodes.MM_NO_ACTIVE_SESSION, + 'No active session. Call launch first.', + ); + } + + if (!context.toolRegistry) { + return createToolError( + ErrorCodes.MM_INTERNAL_ERROR, + 'Tool registry not available.', + ); + } + + const { steps: stepInputs, stopOnError = false, batchTimeoutMs } = input; + const stepResults: StepResult[] = []; + let succeeded = 0; + let failed = 0; + let skipped = 0; + const batchStartTime = Date.now(); + const batchDeadline = batchTimeoutMs + ? batchStartTime + batchTimeoutMs + : undefined; + + for (const stepInput of stepInputs) { + if (batchDeadline && Date.now() > batchDeadline) { + const remainingIndex = stepInputs.indexOf(stepInput); + for (const remaining of stepInputs.slice(remainingIndex)) { + stepResults.push({ + tool: remaining.tool, + ok: false, + error: { + code: 'MM_BATCH_TIMEOUT', + message: `Batch deadline exceeded after ${batchTimeoutMs}ms`, + }, + meta: { + durationMs: 0, + timestamp: new Date().toISOString(), + skipped: true, + }, + }); + skipped += 1; + failed += 1; + } + break; + } + const stepStartTime = Date.now(); + const { tool: rawTool, args: rawArgs = {} } = stepInput; + const { tool, args } = normaliseStep(rawTool, rawArgs); + const handler = context.toolRegistry.get(tool) as + | ToolFunction, unknown> + | undefined; + + if (!handler) { + stepResults.push({ + tool, + ok: false, + error: { + code: ErrorCodes.MM_UNKNOWN_TOOL, + message: `Unknown tool: ${tool}`, + }, + meta: { + durationMs: Date.now() - stepStartTime, + timestamp: new Date().toISOString(), + }, + }); + failed += 1; + + if (stopOnError) { + break; + } + + continue; + } + + const schema = + tool in toolSchemas ? toolSchemas[tool as ToolName] : undefined; + let validatedArgs: Record = args; + if (schema) { + const parsed = schema.safeParse(args); + if (!parsed.success) { + stepResults.push({ + tool, + ok: false, + error: { + code: 'VALIDATION_ERROR', + message: parsed.error.issues + .map((i) => + i.path.length > 0 + ? `${i.path.join('.')}: ${i.message}` + : i.message, + ) + .join('; '), + }, + meta: { + durationMs: Date.now() - stepStartTime, + timestamp: new Date().toISOString(), + }, + }); + failed += 1; + + if (stopOnError) { + break; + } + + continue; + } + validatedArgs = parsed.data as Record; + } + + try { + const response = await handler(validatedArgs, context); + + stepResults.push({ + tool, + ok: response.ok, + result: response.ok ? response.result : undefined, + error: response.ok ? undefined : response.error, + meta: { + durationMs: Date.now() - stepStartTime, + timestamp: new Date().toISOString(), + }, + }); + + if (response.ok) { + succeeded += 1; + } else { + failed += 1; + if (stopOnError) { + break; + } + } + } catch (error) { + stepResults.push({ + tool, + ok: false, + error: { + code: ErrorCodes.MM_INTERNAL_ERROR, + message: `Unexpected error: ${extractErrorMessage(error)}`, + }, + meta: { + durationMs: Date.now() - stepStartTime, + timestamp: new Date().toISOString(), + }, + }); + failed += 1; + + if (stopOnError) { + break; + } + } + } + + return createToolSuccess({ + steps: stepResults, + summary: { + ok: failed === 0, + total: stepResults.length, + succeeded, + failed, + skipped, + durationMs: Date.now() - batchStartTime, + }, + }); +} diff --git a/src/tools/build.test.ts b/src/tools/build.test.ts new file mode 100644 index 0000000..4429237 --- /dev/null +++ b/src/tools/build.test.ts @@ -0,0 +1,192 @@ +/** + * Unit tests for build tool handler. + * + * Tests the build handler with BuildCapability and legacy build paths, + * including success/failure scenarios and build options handling. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { buildTool } from './build.js'; +import type { BuildCapability } from '../capabilities/types.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext( + options: { buildCapability?: BuildCapability } = {}, +) { + const sessionManager = createMockSessionManager({ + hasActive: true, + sessionId: 'test-session-123', + sessionMetadata: { + schemaVersion: 1, + sessionId: 'test-session-123', + createdAt: new Date().toISOString(), + flowTags: [], + tags: [], + launch: { stateMode: 'default' }, + }, + }); + + sessionManager.getBuildCapability.mockReturnValue(options.buildCapability); + + return { + sessionManager, + page: {}, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('buildTool', () => { + let mockBuildCapability: BuildCapability; + + beforeEach(() => { + mockBuildCapability = { + build: vi.fn(), + getExtensionPath: vi.fn(), + isBuilt: vi.fn(), + }; + }); + + describe('with capability', () => { + it('builds extension successfully with default buildType', async () => { + vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ + success: true, + extensionPath: '/path/to/dist/chrome', + durationMs: 5000, + }); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.buildType).toBe('build:test'); + expect(result.result.extensionPathResolved).toBe( + '/path/to/dist/chrome', + ); + } + expect(mockBuildCapability.build).toHaveBeenCalledWith({ + buildType: undefined, + force: undefined, + }); + }); + + it('builds extension with explicit buildType', async () => { + vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ + success: true, + extensionPath: '/path/to/dist/chrome', + durationMs: 5000, + }); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({ buildType: 'build:test' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.buildType).toBe('build:test'); + expect(result.result.extensionPathResolved).toBe( + '/path/to/dist/chrome', + ); + } + expect(mockBuildCapability.build).toHaveBeenCalledWith({ + buildType: 'build:test', + force: undefined, + }); + }); + + it('builds extension with force flag', async () => { + vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ + success: true, + extensionPath: '/path/to/dist/chrome', + durationMs: 5000, + }); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({ force: true }, context); + + expect(result.ok).toBe(true); + expect(mockBuildCapability.build).toHaveBeenCalledWith({ + buildType: undefined, + force: true, + }); + }); + + it('returns error when build fails with error message', async () => { + vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ + success: false, + extensionPath: '', + durationMs: 1000, + error: 'Compilation error', + }); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); + expect(result.error.message).toContain('Compilation error'); + } + }); + + it('returns error when build fails without error message', async () => { + vi.spyOn(mockBuildCapability, 'build').mockResolvedValue({ + success: false, + extensionPath: '', + durationMs: 1000, + }); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); + expect(result.error.message).toContain('Unknown error'); + } + }); + + it('returns error when build throws exception', async () => { + vi.spyOn(mockBuildCapability, 'build').mockRejectedValue( + new Error('Build process crashed'), + ); + const context = createMockContext({ + buildCapability: mockBuildCapability, + }); + + const result = await buildTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_BUILD_FAILED); + expect(result.error.message).toContain('Build process crashed'); + } + }); + }); + + it('returns error when build capability is unavailable', async () => { + const context = createMockContext(); + + const result = await buildTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); + expect(result.error.message).toContain('BuildCapability not available'); + } + }); +}); diff --git a/src/tools/build.ts b/src/tools/build.ts new file mode 100644 index 0000000..316b756 --- /dev/null +++ b/src/tools/build.ts @@ -0,0 +1,52 @@ +import type { BuildInput, BuildToolResult } from './types'; +import { ErrorCodes } from './types'; +import { createToolError, createToolSuccess } from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; +import { extractErrorMessage } from '../utils'; + +/** + * Triggers an extension build using the configured build capability. + * + * @param input - The build configuration options. + * @param context - The tool execution context. + * @returns The build result with the resolved extension path. + */ +export async function buildTool( + input: BuildInput, + context: ToolContext, +): Promise> { + const buildCapability = + context.workflowContext.build ?? + context.sessionManager.getBuildCapability(); + + if (!buildCapability) { + return createToolError( + ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE, + 'BuildCapability not available. The mm_build tool requires either: (1) running in e2e mode with the MetaMask extension wrapper, or (2) running directly in the metamask-extension repository with dependencies installed.', + ); + } + + try { + const result = await buildCapability.build({ + buildType: input.buildType, + force: input.force, + }); + + if (!result.success) { + return createToolError( + ErrorCodes.MM_BUILD_FAILED, + `Build failed: ${result.error ?? 'Unknown error'}`, + ); + } + + return createToolSuccess({ + buildType: input.buildType ?? 'build:test', + extensionPathResolved: result.extensionPath, + }); + } catch (error) { + return createToolError( + ErrorCodes.MM_BUILD_FAILED, + `Build failed: ${extractErrorMessage(error)}`, + ); + } +} diff --git a/src/tools/cleanup.test.ts b/src/tools/cleanup.test.ts new file mode 100644 index 0000000..5348703 --- /dev/null +++ b/src/tools/cleanup.test.ts @@ -0,0 +1,74 @@ +/** + * Unit tests for cleanup tool handler. + * + * Tests session cleanup with various session states. + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +import { cleanupTool } from './cleanup.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext(hasActive = false): ToolContext { + return { + sessionManager: createMockSessionManager({ hasActive }), + page: {}, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('cleanupTool', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('cleans up active session successfully', async () => { + const context = createMockContext(true); + vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(true); + + const result = await cleanupTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.cleanedUp).toBe(true); + } + expect(context.sessionManager.cleanup).toHaveBeenCalled(); + }); + + it('returns false when no session to clean up', async () => { + const context = createMockContext(false); + vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(false); + + const result = await cleanupTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.cleanedUp).toBe(false); + } + }); + + it('cleans up multiple times without error', async () => { + const context = createMockContext(true); + vi.spyOn(context.sessionManager, 'cleanup') + .mockResolvedValueOnce(true) + .mockResolvedValueOnce(false); + + const result1 = await cleanupTool({}, context); + const result2 = await cleanupTool({}, context); + + expect(result1.ok).toBe(true); + if (result1.ok) { + expect(result1.result.cleanedUp).toBe(true); + } + + expect(result2.ok).toBe(true); + if (result2.ok) { + expect(result2.result.cleanedUp).toBe(false); + } + + expect(context.sessionManager.cleanup).toHaveBeenCalledTimes(2); + }); +}); diff --git a/src/tools/cleanup.ts b/src/tools/cleanup.ts new file mode 100644 index 0000000..cae4b36 --- /dev/null +++ b/src/tools/cleanup.ts @@ -0,0 +1,19 @@ +import type { CleanupInput, CleanupResult } from './types'; +import { createToolSuccess } from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Tears down the active browser session and cleans up resources. + * + * @param _input - Unused input parameters. + * @param context - The tool execution context. + * @returns The cleanup result indicating what was cleaned up. + */ +export async function cleanupTool( + _input: CleanupInput, + context: ToolContext, +): Promise> { + const cleanedUp = await context.sessionManager.cleanup(); + + return createToolSuccess({ cleanedUp }); +} diff --git a/src/tools/clipboard.test.ts b/src/tools/clipboard.test.ts new file mode 100644 index 0000000..d067712 --- /dev/null +++ b/src/tools/clipboard.test.ts @@ -0,0 +1,215 @@ +/** + * Unit tests for clipboard tool handler. + * + * Tests CDP-based clipboard operations (read/write) with proper mocking. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { clipboardTool } from './clipboard.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext( + options: { + hasActive?: boolean; + cdpSession?: { + send: ReturnType; + detach: ReturnType; + }; + } = {}, +): ToolContext { + const { hasActive = true, cdpSession } = options; + + const mockCdpSession = cdpSession ?? { + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }; + + const mockPage = { + context: vi.fn().mockReturnValue({ + newCDPSession: vi.fn().mockResolvedValue(mockCdpSession), + }), + }; + + return { + sessionManager: createMockSessionManager({ hasActive }), + page: mockPage, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('clipboardTool', () => { + describe('write action', () => { + it('writes text to clipboard via CDP', async () => { + const cdpSession = { + send: vi.fn().mockResolvedValue(undefined), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool( + { action: 'write', text: 'test content' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.action).toBe('write'); + expect(result.result.success).toBe(true); + expect(result.result.text).toBe('test content'); + } + expect(cdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', { + expression: 'navigator.clipboard.writeText("test content")', + awaitPromise: true, + userGesture: true, + }); + expect(cdpSession.detach).toHaveBeenCalled(); + }); + + it('detaches CDP session even if write fails', async () => { + const cdpSession = { + send: vi.fn().mockRejectedValue(new Error('Write failed')), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool( + { action: 'write', text: 'test' }, + context, + ); + + expect(result.ok).toBe(false); + expect(cdpSession.detach).toHaveBeenCalled(); + }); + }); + + describe('read action', () => { + it('reads text from clipboard via CDP', async () => { + const cdpSession = { + send: vi.fn().mockResolvedValue({ + result: { value: 'clipboard content' }, + }), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.action).toBe('read'); + expect(result.result.success).toBe(true); + expect(result.result.text).toBe('clipboard content'); + } + expect(cdpSession.send).toHaveBeenCalledWith('Runtime.evaluate', { + expression: 'navigator.clipboard.readText()', + awaitPromise: true, + userGesture: true, + }); + }); + + it('uses description when value is missing', async () => { + const cdpSession = { + send: vi.fn().mockResolvedValue({ + result: { description: 'fallback content' }, + }), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe('fallback content'); + } + }); + + it('returns empty string when result is missing', async () => { + const cdpSession = { + send: vi.fn().mockResolvedValue({ result: {} }), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe(''); + } + }); + }); + + describe('error classification', () => { + it('classifies permission denied errors', async () => { + const cdpSession = { + send: vi.fn().mockRejectedValue(new Error('permissions denied')), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe('MM_CLIPBOARD_PERMISSION_DENIED'); + expect(result.error.message).toContain('Clipboard permission denied'); + } + }); + + it('classifies LavaMoat blocked errors', async () => { + const cdpSession = { + send: vi.fn().mockRejectedValue(new Error('LavaMoat policy violation')), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool( + { action: 'write', text: 'test' }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe('MM_CLIPBOARD_LAVAMOAT_BLOCKED'); + expect(result.error.message).toContain( + 'Clipboard blocked by LavaMoat policy', + ); + } + }); + + it('classifies generic clipboard errors', async () => { + const cdpSession = { + send: vi.fn().mockRejectedValue(new Error('Unknown error')), + detach: vi.fn().mockResolvedValue(undefined), + }; + const context = createMockContext({ cdpSession }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe('MM_CLIPBOARD_FAILED'); + expect(result.error.message).toContain('Clipboard operation failed'); + } + }); + }); + + describe('session validation', () => { + it('returns error when no active session', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await clipboardTool({ action: 'read' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); +}); diff --git a/src/tools/clipboard.ts b/src/tools/clipboard.ts new file mode 100644 index 0000000..ade71a3 --- /dev/null +++ b/src/tools/clipboard.ts @@ -0,0 +1,82 @@ +import type { ClipboardInput, ClipboardResult } from './types'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Reads from or writes to the system clipboard via CDP. + * + * @param input - The clipboard action and optional text payload. + * @param context - The tool execution context. + * @returns The clipboard operation result with the text content. + */ +export async function clipboardTool( + input: ClipboardInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + try { + const cdpSession = await context.page.context().newCDPSession(context.page); + + try { + if (input.action === 'write') { + await cdpSession.send('Runtime.evaluate', { + expression: `navigator.clipboard.writeText(${JSON.stringify(input.text)})`, + awaitPromise: true, + userGesture: true, + }); + + return createToolSuccess({ + action: 'write', + success: true, + text: input.text, + }); + } + + const result = await cdpSession.send('Runtime.evaluate', { + expression: 'navigator.clipboard.readText()', + awaitPromise: true, + userGesture: true, + }); + + const clipboardText = + result.result?.value ?? result.result?.description ?? ''; + + return createToolSuccess({ + action: 'read', + success: true, + text: clipboardText as string, + }); + } finally { + await cdpSession.detach().catch(() => undefined); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + + if (message.includes('permissions') || message.includes('denied')) { + return createToolError( + 'MM_CLIPBOARD_PERMISSION_DENIED', + `Clipboard permission denied: ${message}`, + ); + } + + if (message.includes('LavaMoat') || message.includes('policy')) { + return createToolError( + 'MM_CLIPBOARD_LAVAMOAT_BLOCKED', + `Clipboard blocked by LavaMoat policy: ${message}`, + ); + } + + return createToolError( + 'MM_CLIPBOARD_FAILED', + `Clipboard operation failed: ${message}`, + ); + } +} diff --git a/src/tools/context.test.ts b/src/tools/context.test.ts new file mode 100644 index 0000000..2af9de5 --- /dev/null +++ b/src/tools/context.test.ts @@ -0,0 +1,176 @@ +/** + * Unit tests for context tool handlers. + * + * Tests context switching (e2e/prod) and context info retrieval. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { setContextTool, getContextTool } from './context.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext( + options: { + hasActive?: boolean; + sessionId?: string; + environmentMode?: 'e2e' | 'prod'; + } = {}, +): ToolContext { + return { + sessionManager: createMockSessionManager(options), + page: {} as ToolContext['page'], + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('setContextTool', () => { + it('switches context from e2e to prod', async () => { + const context = createMockContext({ environmentMode: 'e2e' }); + vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({ + currentContext: 'prod', + hasActiveSession: false, + sessionId: null, + capabilities: { available: ['build', 'fixture'] }, + canSwitchContext: true, + }); + + const result = await setContextTool({ context: 'prod' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.previousContext).toBe('e2e'); + expect(result.result.newContext).toBe('prod'); + expect(result.result.availableCapabilities).toStrictEqual([ + 'build', + 'fixture', + ]); + } + expect(context.sessionManager.setContext).toHaveBeenCalledWith( + 'prod', + undefined, + ); + }); + + it('forwards context options to session manager', async () => { + const context = createMockContext({ environmentMode: 'e2e' }); + vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({ + currentContext: 'e2e', + hasActiveSession: false, + sessionId: null, + capabilities: { available: ['build', 'fixture', 'chain'] }, + canSwitchContext: true, + }); + + const contextOptions = { + mockServer: { + enabled: true, + port: 18000, + }, + }; + + const result = await setContextTool( + { + context: 'e2e', + options: contextOptions, + }, + context, + ); + + expect(result.ok).toBe(true); + expect(context.sessionManager.setContext).toHaveBeenCalledWith( + 'e2e', + contextOptions, + ); + }); + + it('switches context from prod to e2e', async () => { + const context = createMockContext({ environmentMode: 'prod' }); + vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({ + currentContext: 'e2e', + hasActiveSession: false, + sessionId: null, + capabilities: { available: ['build', 'fixture', 'chain', 'seeding'] }, + canSwitchContext: true, + }); + + const result = await setContextTool({ context: 'e2e' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.previousContext).toBe('prod'); + expect(result.result.newContext).toBe('e2e'); + expect(result.result.availableCapabilities).toStrictEqual([ + 'build', + 'fixture', + 'chain', + 'seeding', + ]); + } + }); + + it('classifies context switch blocked errors', async () => { + const context = createMockContext({ environmentMode: 'e2e' }); + vi.mocked(context.sessionManager.setContext).mockImplementation(() => { + throw new Error(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); + }); + + const result = await setContextTool({ context: 'prod' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); + expect(result.error.message).toBe(ErrorCodes.MM_CONTEXT_SWITCH_BLOCKED); + } + }); + + it('classifies generic context errors', async () => { + const context = createMockContext({ environmentMode: 'e2e' }); + vi.mocked(context.sessionManager.setContext).mockImplementation(() => { + throw new Error('Unknown error'); + }); + + const result = await setContextTool({ context: 'prod' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SET_CONTEXT_FAILED); + expect(result.error.message).toContain('Context switch failed'); + } + }); +}); + +describe('getContextTool', () => { + it('returns context info when getContextInfo is available', async () => { + const context = createMockContext({ + hasActive: true, + sessionId: 'test-session-123', + environmentMode: 'e2e', + }); + vi.mocked(context.sessionManager.getContextInfo).mockReturnValue({ + currentContext: 'e2e', + hasActiveSession: true, + sessionId: 'test-session-123', + capabilities: { available: ['build', 'fixture', 'chain'] }, + canSwitchContext: false, + }); + + const result = await getContextTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.currentContext).toBe('e2e'); + expect(result.result.hasActiveSession).toBe(true); + expect(result.result.sessionId).toBe('test-session-123'); + expect(result.result.capabilities.available).toStrictEqual([ + 'build', + 'fixture', + 'chain', + ]); + expect(result.result.canSwitchContext).toBe(false); + } + }); +}); diff --git a/src/tools/context.ts b/src/tools/context.ts new file mode 100644 index 0000000..65f501d --- /dev/null +++ b/src/tools/context.ts @@ -0,0 +1,55 @@ +import { classifyContextError } from './error-classification.js'; +import type { SetContextInput } from './types/tool-inputs.js'; +import type { + SetContextResult, + GetContextResult, +} from './types/tool-outputs.js'; +import { createToolError, createToolSuccess } from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +export type { SetContextInput } from './types/tool-inputs.js'; +export type { + SetContextResult, + GetContextResult, +} from './types/tool-outputs.js'; + +/** + * Switches the session environment context between e2e and prod modes. + * + * @param input - The target context and optional configuration. + * @param context - The tool execution context. + * @returns The previous and new context with available capabilities. + */ +export async function setContextTool( + input: SetContextInput, + context: ToolContext, +): Promise> { + try { + const previousContext = context.sessionManager.getEnvironmentMode(); + context.sessionManager.setContext(input.context, input.options); + const info = context.sessionManager.getContextInfo(); + + return createToolSuccess({ + previousContext, + newContext: input.context, + availableCapabilities: info.capabilities.available, + }); + } catch (error) { + const errorInfo = classifyContextError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Retrieves the current session context, capabilities, and status. + * + * @param _input - Unused input parameters. + * @param context - The tool execution context. + * @returns The current context information. + */ +export async function getContextTool( + _input: Record, + context: ToolContext, +): Promise> { + return createToolSuccess(context.sessionManager.getContextInfo()); +} diff --git a/src/mcp-server/tools/discovery-tools.test.ts b/src/tools/discovery-tools.test.ts similarity index 58% rename from src/mcp-server/tools/discovery-tools.test.ts rename to src/tools/discovery-tools.test.ts index 8b5a248..683a7af 100644 --- a/src/mcp-server/tools/discovery-tools.test.ts +++ b/src/tools/discovery-tools.test.ts @@ -8,19 +8,18 @@ */ import type { Page } from '@playwright/test'; -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { - handleListTestIds, - handleAccessibilitySnapshot, - handleDescribeScreen, + accessibilitySnapshotTool, + describeScreenTool, + listTestIdsTool, } from './discovery-tools.js'; -import { ScreenshotResult } from '../../capabilities/types.js'; -import * as discoveryModule from '../discovery.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils/mock-factories.js'; -import type { TestIdItem, A11yNodeTrimmed } from '../types'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import type { A11yNodeTrimmed, TestIdItem } from './types'; +import { ErrorCodes } from './types/errors.js'; +import * as discoveryModule from './utils/discovery.js'; +import type { ToolContext } from '../types/http.js'; function createMockPage(): Page { return { @@ -28,12 +27,16 @@ function createMockPage(): Page { } as unknown as Page; } -describe('discovery-tools', () => { - beforeEach(() => { - vi.clearAllMocks(); +function createMockContext( + options: { + hasActive?: boolean; + } = {}, +): ToolContext { + const { hasActive = true } = options; - const mockSessionManager = createMockSessionManager({ - hasActive: true, + return { + sessionManager: createMockSessionManager({ + hasActive, sessionId: 'test-session-123', sessionMetadata: { schemaVersion: 1, @@ -43,34 +46,27 @@ describe('discovery-tools', () => { flowTags: ['discovery'], tags: [], launch: { - stateMode: 'default' as const, + stateMode: 'default', }, }, - }); - - vi.spyOn(mockSessionManager, 'getPage').mockReturnValue(createMockPage()); - - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue({ - recordStep: vi.fn().mockResolvedValue(undefined), - getLastSteps: vi.fn().mockResolvedValue([]), - searchSteps: vi.fn().mockResolvedValue([]), - summarizeSession: vi.fn().mockResolvedValue({ - sessionId: 'test-session-123', - stepCount: 0, - recipe: [], - }), - listSessions: vi.fn().mockResolvedValue([]), + }), + page: createMockPage(), + refMap: new Map(), + workflowContext: {}, + knowledgeStore: { generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session-123'), - } as any); + }, + } as unknown as ToolContext; +} + +describe('discovery-tools', () => { + beforeEach(() => { + vi.clearAllMocks(); }); - describe('handleListTestIds', () => { + describe('listTestIdsTool', () => { it('returns list of test IDs with default limit', async () => { + const context = createMockContext(); const mockItems: TestIdItem[] = [ { testId: 'button-1', tag: 'button', text: 'Click', visible: true }, { testId: 'input-1', tag: 'input', visible: true }, @@ -84,24 +80,24 @@ describe('discovery-tools', () => { }, ); - const result = await handleListTestIds({}); + const result = await listTestIdsTool({}, context); expect(result.ok).toBe(true); if (result.ok) { expect(result.result.items).toStrictEqual(mockItems); - expect(discoveryModule.collectTestIds).toHaveBeenCalledWith( - expect.anything(), - 150, - ); } + expect(discoveryModule.collectTestIds).toHaveBeenCalledWith( + context.page, + 150, + ); }); it('respects custom limit', async () => { - const mockItems: TestIdItem[] = [ - { testId: 'item-1', tag: 'div', visible: true }, - ]; + const context = createMockContext(); - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(mockItems); + vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([ + { testId: 'item-1', tag: 'div', visible: true }, + ]); vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { nodes: [], @@ -109,18 +105,17 @@ describe('discovery-tools', () => { }, ); - const result = await handleListTestIds({ limit: 50 }); + const result = await listTestIdsTool({ limit: 50 }, context); expect(result.ok).toBe(true); - if (result.ok) { - expect(discoveryModule.collectTestIds).toHaveBeenCalledWith( - expect.anything(), - 50, - ); - } + expect(discoveryModule.collectTestIds).toHaveBeenCalledWith( + context.page, + 50, + ); }); it('updates refMap in session manager', async () => { + const context = createMockContext(); const mockRefMap = new Map([['e1', 'role=button[name="Submit"]']]); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); @@ -131,53 +126,30 @@ describe('discovery-tools', () => { }, ); - const sessionManager = sessionManagerModule.getSessionManager(); + await listTestIdsTool({}, context); - await handleListTestIds({}); - - expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); - }); - - it('records step to knowledge store', async () => { - const mockItems: TestIdItem[] = [ - { testId: 'test-1', tag: 'button', visible: true }, - ]; - - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue(mockItems); - vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( - { - nodes: [], - refMap: new Map(), - }, - ); - - const { knowledgeStore } = knowledgeStoreModule; - - await handleListTestIds({}); - - expect(knowledgeStore.recordStep).toHaveBeenCalled(); + expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); }); it('returns error when no active session', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); + const context = createMockContext({ hasActive: false }); - const result = await handleListTestIds({}); + const result = await listTestIdsTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { - expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION'); + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); } }); it('handles discovery errors', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue( new Error('Page closed'), ); - const result = await handleListTestIds({}); + const result = await listTestIdsTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { @@ -186,8 +158,9 @@ describe('discovery-tools', () => { }); }); - describe('handleAccessibilitySnapshot', () => { + describe('accessibilitySnapshotTool', () => { it('returns accessibility tree with refs', async () => { + const context = createMockContext(); const mockNodes: A11yNodeTrimmed[] = [ { ref: 'e1', role: 'button', name: 'Submit', path: [] }, { ref: 'e2', role: 'link', name: 'Cancel', path: [] }, @@ -205,7 +178,7 @@ describe('discovery-tools', () => { ); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - const result = await handleAccessibilitySnapshot({}); + const result = await accessibilitySnapshotTool({}, context); expect(result.ok).toBe(true); if (result.ok) { @@ -214,6 +187,8 @@ describe('discovery-tools', () => { }); it('uses root selector when provided', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { nodes: [], @@ -222,15 +197,16 @@ describe('discovery-tools', () => { ); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - await handleAccessibilitySnapshot({ rootSelector: '.modal' }); + await accessibilitySnapshotTool({ rootSelector: '.modal' }, context); expect(discoveryModule.collectTrimmedA11ySnapshot).toHaveBeenCalledWith( - expect.anything(), + context.page, '.modal', ); }); it('updates refMap in session manager', async () => { + const context = createMockContext(); const mockRefMap = new Map([['e1', 'role=button[name="OK"]']]); vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( @@ -241,14 +217,14 @@ describe('discovery-tools', () => { ); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - const sessionManager = sessionManagerModule.getSessionManager(); - - await handleAccessibilitySnapshot({}); + await accessibilitySnapshotTool({}, context); - expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); + expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); }); - it('records step to knowledge store', async () => { + it('collects test ids with observation limit', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { nodes: [], @@ -257,33 +233,33 @@ describe('discovery-tools', () => { ); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - const { knowledgeStore } = knowledgeStoreModule; + await accessibilitySnapshotTool({}, context); - await handleAccessibilitySnapshot({}); - - expect(knowledgeStore.recordStep).toHaveBeenCalled(); + expect(discoveryModule.collectTestIds).toHaveBeenCalledWith( + context.page, + 50, + ); }); it('returns error when no active session', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); + const context = createMockContext({ hasActive: false }); - const result = await handleAccessibilitySnapshot({}); + const result = await accessibilitySnapshotTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { - expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION'); + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); } }); it('handles discovery errors', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockRejectedValue( new Error('Discovery failed'), ); - const result = await handleAccessibilitySnapshot({}); + const result = await accessibilitySnapshotTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { @@ -292,8 +268,9 @@ describe('discovery-tools', () => { }); }); - describe('handleDescribeScreen', () => { + describe('describeScreenTool', () => { it('returns comprehensive screen state', async () => { + const context = createMockContext(); const mockTestIds: TestIdItem[] = [ { testId: 'button-1', tag: 'button', visible: true }, ]; @@ -311,7 +288,7 @@ describe('discovery-tools', () => { }, ); - const result = await handleDescribeScreen({}); + const result = await describeScreenTool({}, context); expect(result.ok).toBe(true); if (result.ok) { @@ -323,6 +300,8 @@ describe('discovery-tools', () => { }); it('includes screenshot when requested', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { @@ -330,19 +309,20 @@ describe('discovery-tools', () => { refMap: new Map(), }, ); - - const sessionManager = sessionManagerModule.getSessionManager(); - vi.spyOn(sessionManager, 'screenshot').mockResolvedValue({ + vi.mocked(context.sessionManager.screenshot).mockResolvedValue({ path: '/path/to/screenshot.png', width: 1280, height: 720, base64: 'base64data', }); - const result = await handleDescribeScreen({ - includeScreenshot: true, - screenshotName: 'test-screen', - }); + const result = await describeScreenTool( + { + includeScreenshot: true, + screenshotName: 'test-screen', + }, + context, + ); expect(result.ok).toBe(true); if (result.ok) { @@ -352,14 +332,16 @@ describe('discovery-tools', () => { height: 720, base64: null, }); - expect(sessionManager.screenshot).toHaveBeenCalledWith({ - name: 'test-screen', - fullPage: true, - }); } + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ + name: 'test-screen', + fullPage: true, + }); }); it('includes base64 in screenshot when requested', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { @@ -367,19 +349,20 @@ describe('discovery-tools', () => { refMap: new Map(), }, ); - - const sessionManager = sessionManagerModule.getSessionManager(); - vi.spyOn(sessionManager, 'screenshot').mockResolvedValue({ + vi.mocked(context.sessionManager.screenshot).mockResolvedValue({ path: '/path/to/screenshot.png', width: 1280, height: 720, base64: 'base64data', }); - const result = await handleDescribeScreen({ - includeScreenshot: true, - includeScreenshotBase64: true, - }); + const result = await describeScreenTool( + { + includeScreenshot: true, + includeScreenshotBase64: true, + }, + context, + ); expect(result.ok).toBe(true); if (result.ok) { @@ -388,6 +371,8 @@ describe('discovery-tools', () => { }); it('uses default screenshot name when not provided', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( { @@ -396,30 +381,36 @@ describe('discovery-tools', () => { }, ); - const sessionManager = sessionManagerModule.getSessionManager(); - const mockedScreenshot = vi - .spyOn(sessionManager, 'screenshot') - .mockResolvedValue({ - path: '/path/to/screenshot.png', - width: 1280, - height: 720, - } as ScreenshotResult); + await describeScreenTool({ includeScreenshot: true }, context); - await handleDescribeScreen({ includeScreenshot: true }); - - expect(mockedScreenshot).toHaveBeenCalledWith({ + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ name: 'describe-screen', fullPage: true, }); }); it('generates prior knowledge from context', async () => { + const context = createMockContext(); const mockTestIds: TestIdItem[] = [ { testId: 'send-btn', tag: 'button', visible: true }, ]; const mockNodes: A11yNodeTrimmed[] = [ { ref: 'e1', role: 'button', name: 'Send', path: [] }, ]; + const mockPriorKnowledge = { + schemaVersion: 1 as const, + generatedAt: '2026-02-04T00:00:00.000Z', + query: { + currentScreen: 'home', + currentUrl: 'chrome-extension://ext-123/home.html', + visibleTestIds: mockTestIds, + a11yNodes: mockNodes, + currentSessionFlowTags: ['discovery'], + }, + relatedSessions: [], + similarSteps: [], + suggestedNextActions: [], + }; vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue( mockTestIds, @@ -430,37 +421,31 @@ describe('discovery-tools', () => { refMap: new Map([['e1', 'role=button[name="Send"]']]), }, ); + vi.mocked( + context.knowledgeStore.generatePriorKnowledge, + ).mockResolvedValue(mockPriorKnowledge as any); - const mockPriorKnowledge = { - version: 1 as const, - hints: [ - { type: 'similar_flow' as const, content: 'Previous send flow' }, - ], - }; - - const { knowledgeStore } = knowledgeStoreModule; - vi.spyOn(knowledgeStore, 'generatePriorKnowledge').mockResolvedValue( - mockPriorKnowledge as any, - ); - - const result = await handleDescribeScreen({}); + const result = await describeScreenTool({}, context); expect(result.ok).toBe(true); if (result.ok) { expect(result.result.priorKnowledge).toStrictEqual(mockPriorKnowledge); - expect(knowledgeStore.generatePriorKnowledge).toHaveBeenCalledWith( - expect.objectContaining({ - currentScreen: 'home', - visibleTestIds: mockTestIds, - a11yNodes: mockNodes, - currentSessionFlowTags: ['discovery'], - }), - 'test-session-123', - ); } + expect( + context.knowledgeStore.generatePriorKnowledge, + ).toHaveBeenCalledWith( + expect.objectContaining({ + currentScreen: 'home', + visibleTestIds: mockTestIds, + a11yNodes: mockNodes, + currentSessionFlowTags: ['discovery'], + }), + 'test-session-123', + ); }); it('updates refMap in session manager', async () => { + const context = createMockContext(); const mockRefMap = new Map([['e1', 'role=button[name="OK"]']]); vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); @@ -471,49 +456,30 @@ describe('discovery-tools', () => { }, ); - const sessionManager = sessionManagerModule.getSessionManager(); - - await handleDescribeScreen({}); + await describeScreenTool({}, context); - expect(sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); - }); - - it('records step to knowledge store', async () => { - vi.spyOn(discoveryModule, 'collectTestIds').mockResolvedValue([]); - vi.spyOn(discoveryModule, 'collectTrimmedA11ySnapshot').mockResolvedValue( - { - nodes: [], - refMap: new Map(), - }, - ); - - const { knowledgeStore } = knowledgeStoreModule; - - await handleDescribeScreen({}); - - expect(knowledgeStore.recordStep).toHaveBeenCalled(); + expect(context.sessionManager.setRefMap).toHaveBeenCalledWith(mockRefMap); }); it('returns error when no active session', async () => { - const mockSessionManager = createMockSessionManager({ hasActive: false }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); + const context = createMockContext({ hasActive: false }); - const result = await handleDescribeScreen({}); + const result = await describeScreenTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { - expect(result.error.code).toBe('MM_NO_ACTIVE_SESSION'); + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); } }); it('handles discovery errors', async () => { + const context = createMockContext(); + vi.spyOn(discoveryModule, 'collectTestIds').mockRejectedValue( new Error('Page closed'), ); - const result = await handleDescribeScreen({}); + const result = await describeScreenTool({}, context); expect(result.ok).toBe(false); if (!result.ok) { diff --git a/src/tools/discovery-tools.ts b/src/tools/discovery-tools.ts new file mode 100644 index 0000000..60962fc --- /dev/null +++ b/src/tools/discovery-tools.ts @@ -0,0 +1,163 @@ +import { classifyDiscoveryError } from './error-classification.js'; +import type { + AccessibilitySnapshotInput, + AccessibilitySnapshotResult, + DescribeScreenInput, + DescribeScreenResult, + ListTestIdsInput, + ListTestIdsResult, + PriorKnowledgeContext, +} from './types'; +import { + DEFAULT_TESTID_LIMIT, + OBSERVATION_TESTID_LIMIT, +} from './utils/constants.js'; +import { + collectTestIds, + collectTrimmedA11ySnapshot, +} from './utils/discovery.js'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Collects visible test IDs from the current page. + * + * @param input - The test ID collection options including limit. + * @param context - The tool execution context. + * @returns The list of discovered test ID items. + */ +export async function listTestIdsTool( + input: ListTestIdsInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const limit = input.limit ?? DEFAULT_TESTID_LIMIT; + + try { + const items = await collectTestIds(context.page, limit); + const { refMap } = await collectTrimmedA11ySnapshot(context.page); + + context.sessionManager.setRefMap(refMap); + + return createToolSuccess({ items }); + } catch (error) { + const errorInfo = classifyDiscoveryError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Captures a trimmed accessibility tree snapshot of the current page. + * + * @param input - The snapshot options including optional root selector. + * @param context - The tool execution context. + * @returns The accessibility snapshot nodes. + */ +export async function accessibilitySnapshotTool( + input: AccessibilitySnapshotInput, + context: ToolContext, +): Promise> { + const missingSession = + requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + try { + const { nodes, refMap } = await collectTrimmedA11ySnapshot( + context.page, + input.rootSelector, + ); + + context.sessionManager.setRefMap(refMap); + await collectTestIds(context.page, OBSERVATION_TESTID_LIMIT); + + return createToolSuccess({ nodes }); + } catch (error) { + const errorInfo = classifyDiscoveryError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Captures a full screen description including state, test IDs, a11y, and prior knowledge. + * + * @param input - The describe-screen options including screenshot flags. + * @param context - The tool execution context. + * @returns The composite screen description result. + */ +export async function describeScreenTool( + input: DescribeScreenInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + try { + const state = await context.sessionManager.getExtensionState(); + const testIds = await collectTestIds(context.page, DEFAULT_TESTID_LIMIT); + const { nodes, refMap } = await collectTrimmedA11ySnapshot(context.page); + + context.sessionManager.setRefMap(refMap); + + const trackedPages = context.sessionManager.getTrackedPages(); + const activePage = context.sessionManager.getPage(); + const activeTracked = trackedPages.find((tp) => tp.page === activePage); + const activeTab = activeTracked + ? { role: activeTracked.role, url: activePage.url() } + : undefined; + + let screenshot: DescribeScreenResult['screenshot'] = null; + + if (input.includeScreenshot) { + const screenshotName = input.screenshotName ?? 'describe-screen'; + const result = await context.sessionManager.screenshot({ + name: screenshotName, + fullPage: true, + }); + + screenshot = { + path: result.path, + width: result.width, + height: result.height, + base64: input.includeScreenshotBase64 ? result.base64 : null, + }; + } + + const sessionMetadata = context.sessionManager.getSessionMetadata(); + const priorKnowledgeContext: PriorKnowledgeContext = { + currentScreen: state.currentScreen, + currentUrl: state.currentUrl, + visibleTestIds: testIds, + a11yNodes: nodes, + currentSessionFlowTags: sessionMetadata?.flowTags, + }; + + const priorKnowledge = await context.knowledgeStore.generatePriorKnowledge( + priorKnowledgeContext, + context.sessionManager.getSessionId(), + ); + + return createToolSuccess({ + state, + activeTab, + testIds: { items: testIds }, + a11y: { nodes }, + screenshot, + priorKnowledge, + }); + } catch (error) { + const errorInfo = classifyDiscoveryError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/mcp-server/tools/error-classification.test.ts b/src/tools/error-classification.test.ts similarity index 99% rename from src/mcp-server/tools/error-classification.test.ts rename to src/tools/error-classification.test.ts index b9ba1bb..e141e12 100644 --- a/src/mcp-server/tools/error-classification.test.ts +++ b/src/tools/error-classification.test.ts @@ -20,8 +20,8 @@ import { classifyStateError, classifySeedingError, classifyContextError, -} from './error-classification'; -import { ErrorCodes } from '../types'; +} from './error-classification.js'; +import { ErrorCodes } from './types'; describe('error-classification', () => { describe('isPageClosedError', () => { diff --git a/src/mcp-server/tools/error-classification.ts b/src/tools/error-classification.ts similarity index 99% rename from src/mcp-server/tools/error-classification.ts rename to src/tools/error-classification.ts index c424d91..9b844f3 100644 --- a/src/mcp-server/tools/error-classification.ts +++ b/src/tools/error-classification.ts @@ -5,7 +5,7 @@ * based on error message patterns. */ -import { ErrorCodes } from '../types'; +import { ErrorCodes } from './types'; import { extractErrorMessage } from '../utils'; const ERROR_PATTERNS = { diff --git a/src/tools/index.ts b/src/tools/index.ts new file mode 100644 index 0000000..c75bad2 --- /dev/null +++ b/src/tools/index.ts @@ -0,0 +1,15 @@ +export * from './batch.js'; +export * from './build.js'; +export * from './cleanup.js'; +export * from './clipboard.js'; +export * from './context.js'; +export * from './discovery-tools.js'; +export * from './interaction.js'; +export * from './knowledge.js'; +export * from './launch.js'; +export * from './navigation.js'; +export * from './registry.js'; +export * from './screenshot.js'; +export * from './seeding.js'; +export * from './state.js'; +export * from './utils.js'; diff --git a/src/tools/interaction.test.ts b/src/tools/interaction.test.ts new file mode 100644 index 0000000..4099384 --- /dev/null +++ b/src/tools/interaction.test.ts @@ -0,0 +1,894 @@ +/** + * Unit tests for interaction tool handlers. + * + * Tests handleClick, handleType, and handleWaitFor with various target types, + * error scenarios, and page closure detection. + */ + +import { describe, it, expect, vi, afterEach } from 'vitest'; + +import { + clickTool, + getTextTool, + typeTool, + waitForTool, +} from './interaction.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import * as discoveryModule from './utils/discovery.js'; +import * as targetsModule from './utils/targets.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockLocator() { + return { + click: vi.fn().mockResolvedValue(undefined), + fill: vi.fn().mockResolvedValue(undefined), + waitFor: vi.fn().mockResolvedValue(undefined), + textContent: vi.fn().mockResolvedValue('Hello World'), + }; +} + +function createMockContext( + options: { + hasActive?: boolean; + page?: object; + refMap?: Map; + } = {}, +): ToolContext { + return { + sessionManager: createMockSessionManager({ + hasActive: options.hasActive ?? true, + }), + page: (options.page ?? {}) as ToolContext['page'], + refMap: options.refMap ?? new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('interaction', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe('clickTool', () => { + it('clicks element by testId', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ testId: 'my-button' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.clicked).toBe(true); + expect(result.result.target).toBe('testId:my-button'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'my-button', + context.refMap, + 15000, + undefined, + ); + expect(locator.click).toHaveBeenCalled(); + }); + + it('uses custom timeout when provided', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + await clickTool({ testId: 'my-button', timeoutMs: 5000 }, context); + + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'my-button', + context.refMap, + 5000, + undefined, + ); + }); + + it('passes within scope to waitForTarget', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool( + { testId: 'btn', within: { testId: 'parent' } }, + context, + ); + + expect(result.ok).toBe(true); + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'btn', + context.refMap, + 15000, + { type: 'testId', value: 'parent' }, + ); + }); + + it('clicks element by CSS selector', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ selector: 'button.primary' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.clicked).toBe(true); + expect(result.result.target).toBe('selector:button.primary'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'selector', + 'button.primary', + context.refMap, + 15000, + undefined, + ); + }); + + it('clicks element by accessibility reference', async () => { + const page = {}; + const locator = createMockLocator(); + const refMap = new Map([['e5', 'button[aria-label="Submit"]']]); + const context = createMockContext({ page, refMap }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ a11yRef: 'e5' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.clicked).toBe(true); + expect(result.result.target).toBe('a11yRef:e5'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'a11yRef', + 'e5', + refMap, + 15000, + undefined, + ); + }); + + it('returns error when no target specified', async () => { + const result = await clickTool({} as any, createMockContext()); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when multiple targets specified', async () => { + const result = await clickTool( + { testId: 'button', selector: '.button' } as any, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { + vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({ + valid: true, + } as any); + + const result = await clickTool({ testId: 'button' }, createMockContext()); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toBe('Invalid target selection'); + } + }); + + it('handles page closure gracefully', async () => { + const locator = createMockLocator(); + locator.click.mockRejectedValue( + new Error('Target page, context or browser has been closed'), + ); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ testId: 'close-btn' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.clicked).toBe(true); + expect(result.result.pageClosedAfterClick).toBe(true); + expect(result.result.target).toBe('testId:close-btn'); + } + }); + + it('handles browser closed error gracefully', async () => { + const locator = createMockLocator(); + locator.click.mockRejectedValue(new Error('browser has been closed')); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ testId: 'close-btn' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.pageClosedAfterClick).toBe(true); + } + }); + + it('returns error when click fails with non-closure error', async () => { + const locator = createMockLocator(); + locator.click.mockRejectedValue(new Error('Element is not clickable')); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await clickTool({ testId: 'my-button' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CLICK_FAILED); + } + }); + + it('returns error when element not found', async () => { + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( + new Error('Timeout waiting for element'), + ); + + const result = await clickTool({ testId: 'nonexistent' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); + } + }); + + it('returns error when no session active', async () => { + const result = await clickTool( + { testId: 'my-button' }, + createMockContext({ hasActive: false }), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); + + describe('typeTool', () => { + it('types text into element by testId', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool( + { testId: 'amount-input', text: '0.5' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.typed).toBe(true); + expect(result.result.target).toBe('testId:amount-input'); + expect(result.result.textLength).toBe(3); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'amount-input', + context.refMap, + 15000, + undefined, + ); + expect(locator.fill).toHaveBeenCalledWith('0.5'); + }); + + it('uses custom timeout when provided', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + await typeTool( + { testId: 'input', text: 'test', timeoutMs: 3000 }, + context, + ); + + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'input', + context.refMap, + 3000, + undefined, + ); + }); + + it('passes within scope to waitForTarget', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool( + { testId: 'input', text: 'hello', within: { selector: '.form' } }, + context, + ); + + expect(result.ok).toBe(true); + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'input', + context.refMap, + 15000, + { type: 'selector', value: '.form' }, + ); + }); + + it('types text into element by CSS selector', async () => { + const locator = createMockLocator(); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool( + { selector: 'input[name="email"]', text: 'test@example.com' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.typed).toBe(true); + expect(result.result.target).toBe('selector:input[name="email"]'); + expect(result.result.textLength).toBe(16); + } + expect(locator.fill).toHaveBeenCalledWith('test@example.com'); + }); + + it('types text into element by accessibility reference', async () => { + const page = {}; + const locator = createMockLocator(); + const refMap = new Map([['e3', 'input[aria-label="Amount"]']]); + const context = createMockContext({ page, refMap }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool({ a11yRef: 'e3', text: '100' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.typed).toBe(true); + expect(result.result.target).toBe('a11yRef:e3'); + expect(result.result.textLength).toBe(3); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'a11yRef', + 'e3', + refMap, + 15000, + undefined, + ); + }); + + it('types empty string and reports zero length', async () => { + const locator = createMockLocator(); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool({ testId: 'input', text: '' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.typed).toBe(true); + expect(result.result.textLength).toBe(0); + } + expect(locator.fill).toHaveBeenCalledWith(''); + }); + + it('returns error when no target specified', async () => { + const result = await typeTool( + { text: 'test' } as any, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when multiple targets specified', async () => { + const result = await typeTool( + { testId: 'input', selector: 'input', text: 'test' } as any, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { + vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({ + valid: true, + } as any); + + const result = await typeTool( + { testId: 'input', text: 'test' }, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toBe('Invalid target selection'); + } + }); + + it('returns error when fill fails', async () => { + const locator = createMockLocator(); + locator.fill.mockRejectedValue(new Error('Element is not editable')); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await typeTool({ testId: 'input', text: 'test' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_TYPE_FAILED); + } + }); + + it('returns error when element not found', async () => { + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( + new Error('Timeout waiting for element'), + ); + + const result = await typeTool( + { testId: 'nonexistent', text: 'test' }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); + } + }); + + it('returns error when no session active', async () => { + const result = await typeTool( + { testId: 'input', text: 'test' }, + createMockContext({ hasActive: false }), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); + + describe('waitForTool', () => { + it('waits for element by testId', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await waitForTool({ testId: 'loading-spinner' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.found).toBe(true); + expect(result.result.target).toBe('testId:loading-spinner'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'loading-spinner', + context.refMap, + 15000, + undefined, + ); + }); + + it('uses custom timeout when provided', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + await waitForTool({ testId: 'element', timeoutMs: 30000 }, context); + + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'testId', + 'element', + context.refMap, + 30000, + undefined, + ); + }); + + it('passes within scope to waitForTarget', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await waitForTool( + { a11yRef: 'e5', within: { a11yRef: 'e1' } }, + context, + ); + + expect(result.ok).toBe(true); + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'a11yRef', + 'e5', + context.refMap, + 15000, + { type: 'a11yRef', value: 'e1' }, + ); + }); + + it('waits for element by CSS selector', async () => { + const page = {}; + const locator = createMockLocator(); + const context = createMockContext({ page }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await waitForTool( + { selector: '.success-message' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.found).toBe(true); + expect(result.result.target).toBe('selector:.success-message'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'selector', + '.success-message', + context.refMap, + 15000, + undefined, + ); + }); + + it('waits for element by accessibility reference', async () => { + const page = {}; + const locator = createMockLocator(); + const refMap = new Map([['e10', 'button[aria-label="Confirm"]']]); + const context = createMockContext({ page, refMap }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await waitForTool({ a11yRef: 'e10' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.found).toBe(true); + expect(result.result.target).toBe('a11yRef:e10'); + } + expect(discoveryModule.waitForTarget).toHaveBeenCalledWith( + page, + 'a11yRef', + 'e10', + refMap, + 15000, + undefined, + ); + }); + + it('returns error when no target specified', async () => { + const result = await waitForTool({} as any, createMockContext()); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when multiple targets specified', async () => { + const result = await waitForTool( + { testId: 'element', selector: '.element' } as any, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Exactly one'); + } + }); + + it('returns error when validation result is invalid but not caught by isInvalidTargetSelection', async () => { + vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({ + valid: true, + } as any); + + const result = await waitForTool( + { testId: 'element' }, + createMockContext(), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toBe('Invalid target selection'); + } + }); + + it('returns error when element not found within timeout', async () => { + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( + new Error('Timeout 15000ms exceeded'), + ); + + const result = await waitForTool({ testId: 'nonexistent' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); + } + }); + + it('returns error when page closed during wait', async () => { + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( + new Error('Target page has been closed'), + ); + + const result = await waitForTool({ testId: 'element' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_WAIT_TIMEOUT); + } + }); + + it('returns error when no session active', async () => { + const result = await waitForTool( + { testId: 'element' }, + createMockContext({ hasActive: false }), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); + + describe('getTextTool', () => { + it('returns textContent by testId', async () => { + const locator = createMockLocator(); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await getTextTool({ testId: 'my-element' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe('Hello World'); + expect(result.result).toHaveLength(11); + expect(result.result.target).toBe('testId:my-element'); + } + }); + + it('returns empty string when textContent is null', async () => { + const locator = createMockLocator(); + locator.textContent.mockResolvedValue(null); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await getTextTool({ testId: 'empty-node' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe(''); + expect(result.result).toHaveLength(0); + } + }); + + it('returns textContent by a11yRef', async () => { + const locator = createMockLocator(); + locator.textContent.mockResolvedValue('Ref content'); + const context = createMockContext({ + refMap: new Map([['e1', 'button[name="Submit"]']]), + }); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await getTextTool({ a11yRef: 'e1' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe('Ref content'); + } + }); + + it('returns textContent by CSS selector', async () => { + const locator = createMockLocator(); + locator.textContent.mockResolvedValue('Selector content'); + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockResolvedValue( + locator as any, + ); + + const result = await getTextTool({ selector: '#result-text' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.text).toBe('Selector content'); + expect(result.result.target).toBe('selector:#result-text'); + } + }); + + it('returns error when element not found', async () => { + const context = createMockContext(); + + vi.spyOn(discoveryModule, 'waitForTarget').mockRejectedValue( + new Error('Timeout waiting for selector'), + ); + + const result = await getTextTool({ testId: 'missing' }, context); + + expect(result.ok).toBe(false); + }); + + it('returns error with invalid target selection', async () => { + const context = createMockContext(); + + vi.spyOn(targetsModule, 'validateTargetSelection').mockReturnValue({ + valid: false, + error: 'No target provided', + } as any); + + const result = await getTextTool({} as any, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + } + }); + + it('returns error when no session active', async () => { + const result = await getTextTool( + { testId: 'element' }, + createMockContext({ hasActive: false }), + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + + it('supports --within scoping', async () => { + const locator = createMockLocator(); + locator.textContent.mockResolvedValue('Scoped text'); + const context = createMockContext(); + const spy = vi + .spyOn(discoveryModule, 'waitForTarget') + .mockResolvedValue(locator as any); + + const result = await getTextTool( + { + testId: 'child-element', + within: { testId: 'parent-container' }, + }, + context, + ); + + expect(result.ok).toBe(true); + expect(spy).toHaveBeenCalledWith( + expect.anything(), + 'testId', + 'child-element', + expect.any(Map), + expect.any(Number), + { type: 'testId', value: 'parent-container' }, + ); + }); + }); +}); diff --git a/src/tools/interaction.ts b/src/tools/interaction.ts new file mode 100644 index 0000000..5e3c3af --- /dev/null +++ b/src/tools/interaction.ts @@ -0,0 +1,283 @@ +import { + classifyClickError, + classifyTypeError, + classifyWaitError, + isPageClosedError, +} from './error-classification.js'; +import type { + ClickInput, + ClickResult, + GetTextInput, + GetTextResult, + TypeInput, + TypeResult, + WaitForInput, + WaitForResult, + WithinTarget, +} from './types'; +import { ErrorCodes } from './types'; +import { DEFAULT_INTERACTION_TIMEOUT_MS } from './utils/constants.js'; +import { waitForTarget } from './utils/discovery.js'; +import type { WithinScope } from './utils/discovery.js'; +import { validateTargetSelection } from './utils/targets.js'; +import { + isInvalidTargetSelection, + isValidTargetSelection, +} from './utils/type-guards.js'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Converts a WithinTarget input to the WithinScope format expected by waitForTarget. + * + * @param within - The optional within target from tool input. + * @returns The resolved scope, or undefined if no within target is provided. + */ +function resolveWithinScope( + within: WithinTarget | undefined, +): WithinScope | undefined { + if (!within) { + return undefined; + } + if (within.a11yRef) { + return { type: 'a11yRef', value: within.a11yRef }; + } + if (within.testId) { + return { type: 'testId', value: within.testId }; + } + if (within.selector) { + return { type: 'selector', value: within.selector }; + } + return undefined; +} + +/** + * Clicks an element identified by ref, test ID, or selector. + * + * @param input - The click target and timeout options. + * @param context - The tool execution context. + * @returns The click operation result. + */ +export async function clickTool( + input: ClickInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; + const validation = validateTargetSelection(input); + + if (isInvalidTargetSelection(validation)) { + return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error); + } + + if (!isValidTargetSelection(validation)) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Invalid target selection', + ); + } + + const { type: targetType, value: targetValue } = validation; + + try { + const locator = await waitForTarget( + context.page, + targetType, + targetValue, + context.refMap, + timeoutMs, + resolveWithinScope(input.within), + ); + + try { + await locator.click(); + return createToolSuccess({ + clicked: true, + target: `${targetType}:${targetValue}`, + }); + } catch (clickError) { + if (isPageClosedError(clickError)) { + return createToolSuccess({ + clicked: true, + target: `${targetType}:${targetValue}`, + pageClosedAfterClick: true, + }); + } + + throw clickError; + } + } catch (error) { + const errorInfo = classifyClickError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Types text into an element identified by ref, test ID, or selector. + * + * @param input - The type target, text content, and timeout options. + * @param context - The tool execution context. + * @returns The type operation result. + */ +export async function typeTool( + input: TypeInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; + const validation = validateTargetSelection(input); + + if (isInvalidTargetSelection(validation)) { + return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error); + } + + if (!isValidTargetSelection(validation)) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Invalid target selection', + ); + } + + const { type: targetType, value: targetValue } = validation; + + try { + const locator = await waitForTarget( + context.page, + targetType, + targetValue, + context.refMap, + timeoutMs, + resolveWithinScope(input.within), + ); + + await locator.fill(input.text); + + return createToolSuccess({ + typed: true, + target: `${targetType}:${targetValue}`, + textLength: input.text.length, + }); + } catch (error) { + const errorInfo = classifyTypeError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Waits for an element to appear on the page within a timeout. + * + * @param input - The wait target and timeout options. + * @param context - The tool execution context. + * @returns The wait result indicating whether the element was found. + */ +export async function waitForTool( + input: WaitForInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; + const validation = validateTargetSelection(input); + + if (isInvalidTargetSelection(validation)) { + return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error); + } + + if (!isValidTargetSelection(validation)) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Invalid target selection', + ); + } + + const { type: targetType, value: targetValue } = validation; + + try { + await waitForTarget( + context.page, + targetType, + targetValue, + context.refMap, + timeoutMs, + resolveWithinScope(input.within), + ); + + return createToolSuccess({ + found: true, + target: `${targetType}:${targetValue}`, + }); + } catch (error) { + const errorInfo = classifyWaitError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Reads the text content of an element identified by ref, test ID, or selector. + * + * @param input - The target element and timeout options. + * @param context - The tool execution context. + * @returns The text content of the matched element. + */ +export async function getTextTool( + input: GetTextInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; + const validation = validateTargetSelection(input); + + if (isInvalidTargetSelection(validation)) { + return createToolError(ErrorCodes.MM_INVALID_INPUT, validation.error); + } + + if (!isValidTargetSelection(validation)) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Invalid target selection', + ); + } + + const { type: targetType, value: targetValue } = validation; + + try { + const locator = await waitForTarget( + context.page, + targetType, + targetValue, + context.refMap, + timeoutMs, + resolveWithinScope(input.within), + ); + + const text = (await locator.textContent()) ?? ''; + + return createToolSuccess({ + text, + target: `${targetType}:${targetValue}`, + length: text.length, + }); + } catch (error) { + const errorInfo = classifyWaitError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/mcp-server/tools/knowledge.test.ts b/src/tools/knowledge.test.ts similarity index 53% rename from src/mcp-server/tools/knowledge.test.ts rename to src/tools/knowledge.test.ts index afb0233..5eaa60c 100644 --- a/src/mcp-server/tools/knowledge.test.ts +++ b/src/tools/knowledge.test.ts @@ -5,25 +5,21 @@ * summarize, and session listing with various filter combinations. */ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; import { - handleKnowledgeLast, - handleKnowledgeSearch, - handleKnowledgeSummarize, - handleKnowledgeSessions, + knowledgeLastTool, + knowledgeSearchTool, + knowledgeSummarizeTool, + knowledgeSessionsTool, } from './knowledge.js'; -import * as knowledgeStoreModule from '../knowledge-store.js'; -import * as sessionManagerModule from '../session-manager.js'; -import { createMockSessionManager } from '../test-utils'; -import { ErrorCodes } from '../types/errors.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; -describe('knowledge', () => { - let mockSessionManager: ReturnType; - let mockKnowledgeStore: any; - - beforeEach(() => { - mockSessionManager = createMockSessionManager({ +function createMockContext(): ToolContext { + return { + sessionManager: createMockSessionManager({ hasActive: true, sessionId: 'test-session-123', sessionMetadata: { @@ -34,14 +30,11 @@ describe('knowledge', () => { tags: [], launch: { stateMode: 'default' }, }, - }); - vi.spyOn(sessionManagerModule, 'getSessionManager').mockReturnValue( - mockSessionManager, - ); - - // Mock knowledge store to prevent "not initialized" errors - mockKnowledgeStore = { - recordStep: vi.fn().mockResolvedValue(undefined), + }), + page: {}, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: { getLastSteps: vi.fn().mockResolvedValue([]), searchSteps: vi.fn().mockResolvedValue([]), summarizeSession: vi.fn().mockResolvedValue({ @@ -50,59 +43,71 @@ describe('knowledge', () => { recipe: [], }), listSessions: vi.fn().mockResolvedValue([]), - generatePriorKnowledge: vi.fn().mockResolvedValue(undefined), - writeSessionMetadata: vi.fn().mockResolvedValue('test-session'), - }; - vi.spyOn(knowledgeStoreModule, 'knowledgeStore', 'get').mockReturnValue( - mockKnowledgeStore, - ); - }); + }, + } as unknown as ToolContext; +} + +describe('knowledge', () => { + let context: ToolContext; - afterEach(() => { - vi.restoreAllMocks(); + beforeEach(() => { + context = createMockContext(); }); - describe('handleKnowledgeLast', () => { + describe('knowledgeLastTool', () => { it('retrieves last N steps with default parameters', async () => { - // Arrange const mockSteps = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' }, - { timestamp: '2026-02-04T10:01:00Z', tool: 'mm_type', screen: 'home' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'home', + snippet: 'Clicked send', + }, + { + timestamp: '2026-02-04T10:01:00Z', + tool: 'type', + screen: 'home', + snippet: 'Entered amount', + }, ]; - mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps); + vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue( + mockSteps, + ); - // Act - const result = await handleKnowledgeLast({}); + const result = await knowledgeLastTool({}, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.steps).toStrictEqual(mockSteps); } - expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith( - 20, // default n - 'current', // default scope + expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith( + 20, + 'current', 'test-session-123', - undefined, // no filters + undefined, ); }); it('retrieves last N steps with custom n parameter', async () => { - // Arrange const mockSteps = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'home', + snippet: 'Clicked send', + }, ]; - mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps); + vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue( + mockSteps, + ); - // Act - const result = await handleKnowledgeLast({ n: 5 }); + const result = await knowledgeLastTool({ n: 5 }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.steps).toStrictEqual(mockSteps); } - expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith( + expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith( 5, 'current', 'test-session-123', @@ -111,18 +116,22 @@ describe('knowledge', () => { }); it('retrieves steps with scope "all"', async () => { - // Arrange const mockSteps = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'home', + snippet: 'Clicked send', + }, ]; - mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps); + vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue( + mockSteps, + ); - // Act - const result = await handleKnowledgeLast({ scope: 'all' }); + const result = await knowledgeLastTool({ scope: 'all' }, context); - // Assert expect(result.ok).toBe(true); - expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith( + expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith( 20, 'all', 'test-session-123', @@ -131,26 +140,30 @@ describe('knowledge', () => { }); it('retrieves steps with filters', async () => { - // Arrange const mockSteps = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'send' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'send', + snippet: 'Clicked confirm', + }, ]; - mockKnowledgeStore.getLastSteps.mockResolvedValue(mockSteps); const filters = { flowTag: 'send', screen: 'send', sinceHours: 24, }; + vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue( + mockSteps, + ); - // Act - const result = await handleKnowledgeLast({ n: 10, filters }); + const result = await knowledgeLastTool({ n: 10, filters }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.steps).toStrictEqual(mockSteps); } - expect(mockKnowledgeStore.getLastSteps).toHaveBeenCalledWith( + expect(context.knowledgeStore.getLastSteps).toHaveBeenCalledWith( 10, 'current', 'test-session-123', @@ -159,13 +172,10 @@ describe('knowledge', () => { }); it('returns empty array when no steps found', async () => { - // Arrange - mockKnowledgeStore.getLastSteps.mockResolvedValue([]); + vi.mocked(context.knowledgeStore.getLastSteps).mockResolvedValue([]); - // Act - const result = await handleKnowledgeLast({ n: 10 }); + const result = await knowledgeLastTool({ n: 10 }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.steps).toStrictEqual([]); @@ -173,15 +183,12 @@ describe('knowledge', () => { }); it('returns error when knowledge store fails', async () => { - // Arrange - mockKnowledgeStore.getLastSteps.mockRejectedValue( + vi.mocked(context.knowledgeStore.getLastSteps).mockRejectedValue( new Error('Database connection failed'), ); - // Act - const result = await handleKnowledgeLast({ n: 10 }); + const result = await knowledgeLastTool({ n: 10 }, context); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR); @@ -191,52 +198,60 @@ describe('knowledge', () => { }); }); - describe('handleKnowledgeSearch', () => { + describe('knowledgeSearchTool', () => { it('searches steps with default parameters', async () => { - // Arrange const mockMatches = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'home', + snippet: 'Clicked send', + }, ]; - mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches); + vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue( + mockMatches, + ); - // Act - const result = await handleKnowledgeSearch({ query: 'mm_click' }); + const result = await knowledgeSearchTool({ query: 'click' }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.matches).toStrictEqual(mockMatches); - expect(result.result.query).toBe('mm_click'); + expect(result.result.query).toBe('click'); } - expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith( - 'mm_click', - 20, // default limit - 'all', // default scope + expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith( + 'click', + 20, + 'all', 'test-session-123', - undefined, // no filters + undefined, ); }); it('searches steps with custom limit', async () => { - // Arrange const mockMatches = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_type', screen: 'send' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'type', + screen: 'send', + snippet: 'Entered recipient', + }, ]; - mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches); + vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue( + mockMatches, + ); - // Act - const result = await handleKnowledgeSearch({ - query: 'mm_type', - limit: 50, - }); + const result = await knowledgeSearchTool( + { query: 'type', limit: 50 }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.matches).toStrictEqual(mockMatches); } - expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith( - 'mm_type', + expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith( + 'type', 50, 'all', 'test-session-123', @@ -245,22 +260,26 @@ describe('knowledge', () => { }); it('searches steps with scope "current"', async () => { - // Arrange const mockMatches = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'home' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'home', + snippet: 'Clicked send', + }, ]; - mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches); + vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue( + mockMatches, + ); - // Act - const result = await handleKnowledgeSearch({ - query: 'mm_click', - scope: 'current', - }); + const result = await knowledgeSearchTool( + { query: 'click', scope: 'current' }, + context, + ); - // Assert expect(result.ok).toBe(true); - expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith( - 'mm_click', + expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith( + 'click', 20, 'current', 'test-session-123', @@ -269,30 +288,33 @@ describe('knowledge', () => { }); it('searches steps with filters', async () => { - // Arrange const mockMatches = [ - { timestamp: '2026-02-04T10:00:00Z', tool: 'mm_click', screen: 'send' }, + { + timestamp: '2026-02-04T10:00:00Z', + tool: 'click', + screen: 'send', + snippet: 'Confirmed transaction', + }, ]; - mockKnowledgeStore.searchSteps.mockResolvedValue(mockMatches); const filters = { flowTag: 'send', tag: 'transaction', screen: 'send', }; + vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue( + mockMatches, + ); - // Act - const result = await handleKnowledgeSearch({ - query: 'confirm', - limit: 10, - filters, - }); + const result = await knowledgeSearchTool( + { query: 'confirm', limit: 10, filters }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.matches).toStrictEqual(mockMatches); } - expect(mockKnowledgeStore.searchSteps).toHaveBeenCalledWith( + expect(context.knowledgeStore.searchSteps).toHaveBeenCalledWith( 'confirm', 10, 'all', @@ -302,13 +324,13 @@ describe('knowledge', () => { }); it('returns empty array when no matches found', async () => { - // Arrange - mockKnowledgeStore.searchSteps.mockResolvedValue([]); + vi.mocked(context.knowledgeStore.searchSteps).mockResolvedValue([]); - // Act - const result = await handleKnowledgeSearch({ query: 'nonexistent' }); + const result = await knowledgeSearchTool( + { query: 'nonexistent' }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.matches).toStrictEqual([]); @@ -317,15 +339,12 @@ describe('knowledge', () => { }); it('returns error when search fails', async () => { - // Arrange - mockKnowledgeStore.searchSteps.mockRejectedValue( + vi.mocked(context.knowledgeStore.searchSteps).mockRejectedValue( new Error('Search index corrupted'), ); - // Act - const result = await handleKnowledgeSearch({ query: 'test' }); + const result = await knowledgeSearchTool({ query: 'test' }, context); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR); @@ -335,109 +354,106 @@ describe('knowledge', () => { }); }); - describe('handleKnowledgeSummarize', () => { + describe('knowledgeSummarizeTool', () => { it('summarizes current session by default', async () => { - // Arrange const mockSummary = { sessionId: 'test-session-123', stepCount: 5, recipe: [ - { stepNumber: 1, tool: 'mm_click', notes: 'Clicked send button' }, - { stepNumber: 2, tool: 'mm_type', notes: 'Entered amount' }, + { stepNumber: 1, tool: 'click', notes: 'Clicked send button' }, + { stepNumber: 2, tool: 'type', notes: 'Entered amount' }, ], }; - mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary); + vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue( + mockSummary, + ); - // Act - const result = await handleKnowledgeSummarize({}); + const result = await knowledgeSummarizeTool({}, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result).toStrictEqual(mockSummary); } - expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith( + expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith( 'test-session-123', ); }); it('summarizes current session with scope "current"', async () => { - // Arrange const mockSummary = { sessionId: 'test-session-123', stepCount: 3, recipe: [], }; - mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary); + vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue( + mockSummary, + ); - // Act - const result = await handleKnowledgeSummarize({ scope: 'current' }); + const result = await knowledgeSummarizeTool( + { scope: 'current' }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result).toStrictEqual(mockSummary); } - expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith( + expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith( 'test-session-123', ); }); it('summarizes specific session by sessionId', async () => { - // Arrange const mockSummary = { sessionId: 'other-session-456', stepCount: 10, - recipe: [ - { stepNumber: 1, tool: 'mm_launch', notes: 'Launched browser' }, - ], + recipe: [{ stepNumber: 1, tool: 'launch', notes: 'Launched browser' }], }; - mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary); + vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue( + mockSummary, + ); - // Act - const result = await handleKnowledgeSummarize({ - sessionId: 'other-session-456', - }); + const result = await knowledgeSummarizeTool( + { sessionId: 'other-session-456' }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result).toStrictEqual(mockSummary); } - expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith( + expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith( 'other-session-456', ); }); it('summarizes session with scope object containing sessionId', async () => { - // Arrange const mockSummary = { sessionId: 'scoped-session-789', stepCount: 7, recipe: [], }; - mockKnowledgeStore.summarizeSession.mockResolvedValue(mockSummary); + vi.mocked(context.knowledgeStore.summarizeSession).mockResolvedValue( + mockSummary, + ); - // Act - const result = await handleKnowledgeSummarize({ - scope: { sessionId: 'scoped-session-789' }, - }); + const result = await knowledgeSummarizeTool( + { scope: { sessionId: 'scoped-session-789' } }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result).toStrictEqual(mockSummary); } - expect(mockKnowledgeStore.summarizeSession).toHaveBeenCalledWith( + expect(context.knowledgeStore.summarizeSession).toHaveBeenCalledWith( 'scoped-session-789', ); }); it('returns error when scope is "all"', async () => { - // Act - const result = await handleKnowledgeSummarize({ scope: 'all' }); + const result = await knowledgeSummarizeTool({ scope: 'all' }, context); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); @@ -446,13 +462,10 @@ describe('knowledge', () => { }); it('returns error when no sessionId can be determined', async () => { - // Arrange - vi.spyOn(mockSessionManager, 'getSessionId').mockReturnValue(undefined); + vi.mocked(context.sessionManager.getSessionId).mockReturnValue(undefined); - // Act - const result = await handleKnowledgeSummarize({}); + const result = await knowledgeSummarizeTool({}, context); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); @@ -461,17 +474,15 @@ describe('knowledge', () => { }); it('returns error when summarize fails', async () => { - // Arrange - mockKnowledgeStore.summarizeSession.mockRejectedValue( + vi.mocked(context.knowledgeStore.summarizeSession).mockRejectedValue( new Error('Session not found'), ); - // Act - const result = await handleKnowledgeSummarize({ - sessionId: 'nonexistent-session', - }); + const result = await knowledgeSummarizeTool( + { sessionId: 'nonexistent-session' }, + context, + ); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR); @@ -481,9 +492,8 @@ describe('knowledge', () => { }); }); - describe('handleKnowledgeSessions', () => { + describe('knowledgeSessionsTool', () => { it('lists sessions with default limit', async () => { - // Arrange const mockSessions = [ { sessionId: 'session-1', @@ -499,24 +509,23 @@ describe('knowledge', () => { tags: ['test'], }, ]; - mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions); + vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue( + mockSessions, + ); - // Act - const result = await handleKnowledgeSessions({}); + const result = await knowledgeSessionsTool({}, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.sessions).toStrictEqual(mockSessions); } - expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith( - 10, // default limit - undefined, // no filters + expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith( + 10, + undefined, ); }); it('lists sessions with custom limit', async () => { - // Arrange const mockSessions = [ { sessionId: 'session-1', @@ -525,24 +534,23 @@ describe('knowledge', () => { tags: [], }, ]; - mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions); + vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue( + mockSessions, + ); - // Act - const result = await handleKnowledgeSessions({ limit: 25 }); + const result = await knowledgeSessionsTool({ limit: 25 }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.sessions).toStrictEqual(mockSessions); } - expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith( + expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith( 25, undefined, ); }); it('lists sessions with filters', async () => { - // Arrange const mockSessions = [ { sessionId: 'session-1', @@ -551,31 +559,34 @@ describe('knowledge', () => { tags: [], }, ]; - mockKnowledgeStore.listSessions.mockResolvedValue(mockSessions); const filters = { flowTag: 'send', sinceHours: 48, }; + vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue( + mockSessions, + ); - // Act - const result = await handleKnowledgeSessions({ limit: 20, filters }); + const result = await knowledgeSessionsTool( + { limit: 20, filters }, + context, + ); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.sessions).toStrictEqual(mockSessions); } - expect(mockKnowledgeStore.listSessions).toHaveBeenCalledWith(20, filters); + expect(context.knowledgeStore.listSessions).toHaveBeenCalledWith( + 20, + filters, + ); }); it('returns empty array when no sessions found', async () => { - // Arrange - mockKnowledgeStore.listSessions.mockResolvedValue([]); + vi.mocked(context.knowledgeStore.listSessions).mockResolvedValue([]); - // Act - const result = await handleKnowledgeSessions({ limit: 10 }); + const result = await knowledgeSessionsTool({ limit: 10 }, context); - // Assert expect(result.ok).toBe(true); if (result.ok) { expect(result.result.sessions).toStrictEqual([]); @@ -583,15 +594,12 @@ describe('knowledge', () => { }); it('returns error when listing fails', async () => { - // Arrange - mockKnowledgeStore.listSessions.mockRejectedValue( + vi.mocked(context.knowledgeStore.listSessions).mockRejectedValue( new Error('Database unavailable'), ); - // Act - const result = await handleKnowledgeSessions({}); + const result = await knowledgeSessionsTool({}, context); - // Assert expect(result.ok).toBe(false); if (!result.ok) { expect(result.error.code).toBe(ErrorCodes.MM_KNOWLEDGE_ERROR); diff --git a/src/tools/knowledge.ts b/src/tools/knowledge.ts new file mode 100644 index 0000000..27a3939 --- /dev/null +++ b/src/tools/knowledge.ts @@ -0,0 +1,164 @@ +import { extractErrorMessage } from '../utils'; +import type { + KnowledgeLastInput, + KnowledgeLastResult, + KnowledgeScope, + KnowledgeSearchInput, + KnowledgeSearchResult, + KnowledgeSessionsInput, + KnowledgeSessionsResult, + KnowledgeSummarizeInput, + KnowledgeSummarizeResult, +} from './types'; +import { ErrorCodes } from './types'; +import { createToolError, createToolSuccess } from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Retrieves the most recent knowledge steps from the store. + * + * @param input - The step retrieval options including count and scope. + * @param context - The tool execution context. + * @returns The retrieved knowledge steps. + */ +export async function knowledgeLastTool( + input: KnowledgeLastInput, + context: ToolContext, +): Promise> { + const sessionId = context.sessionManager.getSessionId(); + const nSteps = input.n ?? 20; + const scope: KnowledgeScope = input.scope ?? 'current'; + + try { + const steps = await context.knowledgeStore.getLastSteps( + nSteps, + scope, + sessionId, + input.filters, + ); + + return createToolSuccess({ steps }); + } catch (error) { + return createToolError( + ErrorCodes.MM_KNOWLEDGE_ERROR, + `Failed to retrieve steps: ${extractErrorMessage(error)}`, + ); + } +} + +/** + * Searches knowledge steps by query string. + * + * @param input - The search query, limit, scope, and filters. + * @param context - The tool execution context. + * @returns The matching knowledge steps and query. + */ +export async function knowledgeSearchTool( + input: KnowledgeSearchInput, + context: ToolContext, +): Promise> { + const sessionId = context.sessionManager.getSessionId(); + const limit = input.limit ?? 20; + const scope: KnowledgeScope = input.scope ?? 'all'; + + try { + const matches = await context.knowledgeStore.searchSteps( + input.query, + limit, + scope, + sessionId, + input.filters, + ); + + return createToolSuccess({ + matches, + query: input.query, + }); + } catch (error) { + return createToolError( + ErrorCodes.MM_KNOWLEDGE_ERROR, + `Search failed: ${extractErrorMessage(error)}`, + ); + } +} + +/** + * Generates a summary of a knowledge session. + * + * @param input - The session ID or scope to summarize. + * @param context - The tool execution context. + * @returns The session summary. + */ +export async function knowledgeSummarizeTool( + input: KnowledgeSummarizeInput, + context: ToolContext, +): Promise> { + const currentSessionId = context.sessionManager.getSessionId(); + + let targetSessionId: string | undefined; + + if (input.sessionId) { + targetSessionId = input.sessionId; + } else if (input.scope) { + if (input.scope === 'all') { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Cannot summarize all sessions. Use scope="current" or provide a specific sessionId.', + ); + } + + if (input.scope === 'current') { + targetSessionId = currentSessionId; + } else if (typeof input.scope === 'object' && 'sessionId' in input.scope) { + targetSessionId = input.scope.sessionId; + } + } else { + targetSessionId = currentSessionId; + } + + if (!targetSessionId) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'No sessionId provided and no active session', + ); + } + + try { + const summary = + await context.knowledgeStore.summarizeSession(targetSessionId); + return createToolSuccess(summary); + } catch (error) { + return createToolError( + ErrorCodes.MM_KNOWLEDGE_ERROR, + `Summarize failed: ${extractErrorMessage(error)}`, + ); + } +} + +/** + * Lists available knowledge sessions with optional filters. + * + * @param input - The listing options including limit and filters. + * @param context - The tool execution context. + * @returns The list of knowledge sessions. + */ +export async function knowledgeSessionsTool( + input: KnowledgeSessionsInput, + context: ToolContext, +): Promise> { + const limit = input.limit ?? 10; + + try { + const sessions = await context.knowledgeStore.listSessions( + limit, + input.filters, + ); + + return createToolSuccess({ sessions }); + } catch (error) { + return createToolError( + ErrorCodes.MM_KNOWLEDGE_ERROR, + `Failed to list sessions: ${extractErrorMessage(error)}`, + ); + } +} diff --git a/src/tools/launch.test.ts b/src/tools/launch.test.ts new file mode 100644 index 0000000..b198fa0 --- /dev/null +++ b/src/tools/launch.test.ts @@ -0,0 +1,295 @@ +/** + * Unit tests for launch tool handler. + * + * Tests session launch with various states and error scenarios. + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +import { launchTool } from './launch.js'; +import type { LaunchInput } from './types'; +import type { ExtensionState } from '../capabilities/types.js'; +import type { SessionLaunchResult } from '../server/session-manager.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext( + options: { + hasActive?: boolean; + launchResult?: SessionLaunchResult; + environmentMode?: 'e2e' | 'prod'; + } = {}, +): ToolContext { + return { + sessionManager: createMockSessionManager(options), + page: {} as ToolContext['page'], + refMap: new Map(), + workflowContext: {}, + knowledgeStore: { + writeSessionMetadata: vi.fn().mockResolvedValue('test-session-123'), + }, + } as unknown as ToolContext; +} + +describe('launchTool', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('successful launch', () => { + it('returns session info on successful launch', async () => { + const mockState: ExtensionState = { + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: false, + currentScreen: 'home', + accountAddress: null, + networkName: null, + chainId: null, + balance: null, + }; + + const mockLaunchResult: SessionLaunchResult = { + sessionId: 'test-session-123', + extensionId: 'ext-123', + state: mockState, + }; + + const context = createMockContext({ + hasActive: false, + launchResult: mockLaunchResult, + }); + const input: LaunchInput = { stateMode: 'default' }; + + const result = await launchTool(input, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.sessionId).toBe('test-session-123'); + expect(result.result.extensionId).toBe('ext-123'); + expect(result.result.state).toStrictEqual(mockState); + } + expect(context.sessionManager.launch).toHaveBeenCalledWith(input); + }); + + it('includes prerequisites in prod mode', async () => { + const mockState: ExtensionState = { + isLoaded: true, + currentUrl: 'chrome-extension://ext-456/home.html', + extensionId: 'ext-456', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '10 ETH', + }; + + const mockLaunchResult: SessionLaunchResult = { + sessionId: 'prod-session-456', + extensionId: 'ext-456', + state: mockState, + }; + + const context = createMockContext({ + hasActive: false, + launchResult: mockLaunchResult, + environmentMode: 'prod', + }); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.prerequisites).toBeDefined(); + expect(result.result.prerequisites).toHaveLength(3); + expect(result.result.prerequisites?.[0].step).toBe('Unlock Wallet'); + expect(result.result.prerequisites?.[1].step).toBe('Configure Network'); + expect(result.result.prerequisites?.[2].step).toBe('Set Up Accounts'); + } + }); + + it('does not include prerequisites in e2e mode', async () => { + const mockState: ExtensionState = { + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: false, + currentScreen: 'home', + accountAddress: null, + networkName: null, + chainId: null, + balance: null, + }; + + const mockLaunchResult: SessionLaunchResult = { + sessionId: 'e2e-session-789', + extensionId: 'ext-123', + state: mockState, + }; + + const context = createMockContext({ + hasActive: false, + launchResult: mockLaunchResult, + environmentMode: 'e2e', + }); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.prerequisites).toBeUndefined(); + } + }); + + it('passes through all launch input parameters', async () => { + const context = createMockContext({ hasActive: false }); + const input: LaunchInput = { + stateMode: 'custom', + fixturePreset: 'test-preset', + autoBuild: false, + slowMo: 100, + goal: 'Test send flow', + flowTags: ['send', 'transaction'], + tags: ['smoke-test'], + seedContracts: ['hst', 'nfts'], + ports: { + anvil: 8546, + fixtureServer: 12346, + }, + }; + + const result = await launchTool(input, context); + + expect(result.ok).toBe(true); + expect(context.sessionManager.launch).toHaveBeenCalledWith(input); + }); + + it('calls setContext before launch when context is provided', async () => { + const context = createMockContext({ hasActive: false }); + const input: LaunchInput = { + context: 'prod', + stateMode: 'onboarding', + }; + + await launchTool(input, context); + + expect(context.sessionManager.setContext).toHaveBeenCalledWith('prod'); + expect(context.sessionManager.launch).toHaveBeenCalledWith(input); + const setContextOrder = ( + context.sessionManager.setContext as ReturnType + ).mock.invocationCallOrder[0]; + const launchOrder = ( + context.sessionManager.launch as ReturnType + ).mock.invocationCallOrder[0]; + expect(setContextOrder).toBeLessThan(launchOrder); + }); + + it('does not call setContext when context is not provided', async () => { + const context = createMockContext({ hasActive: false }); + const input: LaunchInput = { stateMode: 'default' }; + + await launchTool(input, context); + + expect(context.sessionManager.setContext).not.toHaveBeenCalled(); + }); + }); + + describe('session already running', () => { + it('returns error when session already active', async () => { + const context = createMockContext({ hasActive: true }); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SESSION_ALREADY_RUNNING); + expect(result.error.message).toBe( + 'A session is already running. Call cleanup first, or use --force.', + ); + } + expect(context.sessionManager.launch).not.toHaveBeenCalled(); + }); + + it('cleans up and relaunches when force is true', async () => { + const context = createMockContext({ hasActive: true }); + vi.spyOn(context.sessionManager, 'cleanup').mockResolvedValue(true); + + const result = await launchTool( + { stateMode: 'default', force: true }, + context, + ); + + expect(result.ok).toBe(true); + expect(context.sessionManager.cleanup).toHaveBeenCalled(); + expect(context.sessionManager.launch).toHaveBeenCalled(); + }); + }); + + describe('launch failures', () => { + it('returns port conflict error for EADDRINUSE', async () => { + const context = createMockContext({ hasActive: false }); + vi.spyOn(context.sessionManager, 'launch').mockRejectedValue( + new Error('listen EADDRINUSE: address already in use :::8545'), + ); + + const input: LaunchInput = { stateMode: 'default' }; + const result = await launchTool(input, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE); + expect(result.error.message).toContain('Port conflict'); + expect(result.error.message).toContain('EADDRINUSE'); + } + }); + + it('returns port conflict error for port keyword in message', async () => { + const context = createMockContext({ hasActive: false }); + vi.spyOn(context.sessionManager, 'launch').mockRejectedValue( + new Error('port 8545 is already in use'), + ); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_PORT_IN_USE); + expect(result.error.message).toContain('Port conflict'); + } + }); + + it('returns generic launch failed error for other errors', async () => { + const context = createMockContext({ hasActive: false }); + vi.spyOn(context.sessionManager, 'launch').mockRejectedValue( + new Error('Browser failed to start'), + ); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED); + expect(result.error.message).toContain('Launch failed'); + expect(result.error.message).toContain('Browser failed to start'); + } + }); + + it('handles non-Error exceptions', async () => { + const context = createMockContext({ hasActive: false }); + vi.spyOn(context.sessionManager, 'launch').mockRejectedValue( + 'string error', + ); + + const result = await launchTool({ stateMode: 'default' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_LAUNCH_FAILED); + expect(result.error.message).toContain('Launch failed'); + } + }); + }); +}); diff --git a/src/tools/launch.ts b/src/tools/launch.ts new file mode 100644 index 0000000..16a7bc2 --- /dev/null +++ b/src/tools/launch.ts @@ -0,0 +1,76 @@ +import type { LaunchInput, LaunchPrerequisite, LaunchResult } from './types'; +import { ErrorCodes } from './types'; +import { createToolError, createToolSuccess } from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; +import { extractErrorMessage } from '../utils'; + +const PROD_MODE_PREREQUISITES: LaunchPrerequisite[] = [ + { + step: 'Unlock Wallet', + description: + 'The wallet must be unlocked before interacting with it. Use the extension UI to enter your password.', + }, + { + step: 'Configure Network', + description: + 'Ensure the correct network is selected (e.g., Ethereum Mainnet, Sepolia, or custom network).', + }, + { + step: 'Set Up Accounts', + description: + 'Import or create accounts as needed. Ensure the active account has sufficient funds for transactions.', + }, +]; + +/** + * Launches a new browser session with the configured extension. + * + * @param input - The launch configuration options. + * @param context - The tool execution context. + * @returns The launch result with session details and prerequisites. + */ +export async function launchTool( + input: LaunchInput, + context: ToolContext, +): Promise> { + const { sessionManager } = context; + + try { + if (sessionManager.hasActiveSession()) { + if (input.force) { + await sessionManager.cleanup(); + } else { + return createToolError( + ErrorCodes.MM_SESSION_ALREADY_RUNNING, + 'A session is already running. Call cleanup first, or use --force.', + ); + } + } + + if (input.context) { + sessionManager.setContext(input.context); + } + + const result = await sessionManager.launch(input); + const isProdMode = sessionManager.getEnvironmentMode() === 'prod'; + + return createToolSuccess({ + ...result, + ...(isProdMode && { prerequisites: PROD_MODE_PREREQUISITES }), + }); + } catch (error) { + const message = extractErrorMessage(error); + + if (message.includes('EADDRINUSE') || message.includes('port')) { + return createToolError( + ErrorCodes.MM_PORT_IN_USE, + `Port conflict: ${message}`, + ); + } + + return createToolError( + ErrorCodes.MM_LAUNCH_FAILED, + `Launch failed: ${message}`, + ); + } +} diff --git a/src/tools/navigation.test.ts b/src/tools/navigation.test.ts new file mode 100644 index 0000000..c3ef76b --- /dev/null +++ b/src/tools/navigation.test.ts @@ -0,0 +1,471 @@ +/** + * Unit tests for navigation tool handlers. + * + * Tests handleNavigate, handleWaitForNotification, handleSwitchToTab, and handleCloseTab + * with various navigation targets, tab operations, and error scenarios. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { + navigateTool, + waitForNotificationTool, + switchToTabTool, + closeTabTool, +} from './navigation.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockPage(url = 'about:blank') { + return { + url: vi.fn().mockReturnValue(url), + bringToFront: vi.fn().mockResolvedValue(undefined), + close: vi.fn().mockResolvedValue(undefined), + }; +} + +function createMockContext( + options: { + hasActive?: boolean; + page?: ReturnType; + trackedPages?: { page: unknown; role: string; url: string }[]; + } = {}, +): ToolContext { + const page = options.page ?? createMockPage(); + const sessionManager = createMockSessionManager({ + hasActive: options.hasActive ?? true, + trackedPages: options.trackedPages as never, + }); + + return { + sessionManager, + page: page as never, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('navigation', () => { + describe('navigateTool', () => { + it('navigates to home screen', async () => { + const page = createMockPage('chrome-extension://ext-123/home.html'); + const context = createMockContext({ page }); + + const result = await navigateTool({ screen: 'home' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.navigated).toBe(true); + expect(result.result.currentUrl).toBe( + 'chrome-extension://ext-123/home.html', + ); + } + expect(context.sessionManager.navigateToHome).toHaveBeenCalled(); + }); + + it('navigates to settings screen', async () => { + const page = createMockPage('chrome-extension://ext-123/settings.html'); + const context = createMockContext({ page }); + + const result = await navigateTool({ screen: 'settings' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.currentUrl).toBe( + 'chrome-extension://ext-123/settings.html', + ); + } + expect(context.sessionManager.navigateToSettings).toHaveBeenCalled(); + }); + + it('navigates to notification screen', async () => { + const page = createMockPage( + 'chrome-extension://ext-123/notification.html', + ); + const context = createMockContext({ page }); + + const result = await navigateTool({ screen: 'notification' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.currentUrl).toBe( + 'chrome-extension://ext-123/notification.html', + ); + } + expect(context.sessionManager.navigateToNotification).toHaveBeenCalled(); + }); + + it('navigates to a custom URL', async () => { + const page = createMockPage('https://app.uniswap.org'); + const context = createMockContext({ page }); + vi.spyOn(context.sessionManager, 'navigateToUrl').mockResolvedValue( + page as never, + ); + + const result = await navigateTool( + { screen: 'url', url: 'https://app.uniswap.org' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.currentUrl).toBe('https://app.uniswap.org'); + } + expect(context.sessionManager.navigateToUrl).toHaveBeenCalledWith( + 'https://app.uniswap.org', + ); + }); + + it('returns error when URL is missing', async () => { + const context = createMockContext(); + + const result = await navigateTool({ screen: 'url' } as never, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('url is required'); + } + }); + + it('returns error for unknown screen', async () => { + const context = createMockContext(); + + const result = await navigateTool( + { screen: 'invalid' } as never, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + expect(result.error.message).toContain('Unknown screen'); + } + }); + + it('classifies navigation failures', async () => { + const context = createMockContext(); + vi.spyOn(context.sessionManager, 'navigateToHome').mockRejectedValue( + new Error('Navigation failed'), + ); + + const result = await navigateTool({ screen: 'home' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NAVIGATION_FAILED); + } + }); + + it('returns no active session error when session is missing', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await navigateTool({ screen: 'home' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); + + describe('waitForNotificationTool', () => { + it('waits for notification popup with default timeout', async () => { + const notificationPage = createMockPage( + 'chrome-extension://ext-123/notification.html', + ); + const context = createMockContext(); + vi.spyOn( + context.sessionManager, + 'waitForNotificationPage', + ).mockResolvedValue(notificationPage as never); + + const result = await waitForNotificationTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.found).toBe(true); + expect(result.result.pageUrl).toBe( + 'chrome-extension://ext-123/notification.html', + ); + } + expect( + context.sessionManager.waitForNotificationPage, + ).toHaveBeenCalledWith(15000); + }); + + it('uses custom timeout value', async () => { + const notificationPage = createMockPage( + 'chrome-extension://ext-123/notification.html', + ); + const context = createMockContext(); + vi.spyOn( + context.sessionManager, + 'waitForNotificationPage', + ).mockResolvedValue(notificationPage as never); + + const result = await waitForNotificationTool( + { timeoutMs: 30000 }, + context, + ); + + expect(result.ok).toBe(true); + expect( + context.sessionManager.waitForNotificationPage, + ).toHaveBeenCalledWith(30000); + }); + + it('classifies notification timeout errors', async () => { + const context = createMockContext(); + vi.spyOn( + context.sessionManager, + 'waitForNotificationPage', + ).mockRejectedValue(new Error('Timeout 15000ms exceeded')); + + const result = await waitForNotificationTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NOTIFICATION_TIMEOUT); + } + }); + + it('returns no active session error when session is missing', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await waitForNotificationTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + }); + + describe('switchToTabTool', () => { + it('switches to tab by role', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org'); + const context = createMockContext({ + page: extensionPage, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' }, + ], + }); + + const result = await switchToTabTool({ role: 'dapp' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.switched).toBe(true); + expect(result.result.activeTab.role).toBe('dapp'); + expect(result.result.activeTab.url).toBe('https://app.uniswap.org'); + } + expect(dappPage.bringToFront).toHaveBeenCalled(); + expect(context.sessionManager.setActivePage).toHaveBeenCalledWith( + dappPage, + ); + }); + + it('switches to tab by URL prefix', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org/swap'); + const context = createMockContext({ + page: extensionPage, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { + page: dappPage, + role: 'dapp', + url: 'https://app.uniswap.org/swap', + }, + ], + }); + + const result = await switchToTabTool( + { url: 'https://app.uniswap.org' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.activeTab.url).toBe( + 'https://app.uniswap.org/swap', + ); + } + expect(dappPage.bringToFront).toHaveBeenCalled(); + }); + + it('returns invalid input when neither role nor url is provided', async () => { + const context = createMockContext(); + + const result = await switchToTabTool({} as never, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + } + }); + + it('returns tab not found when no matching tab exists', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const context = createMockContext({ + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + }); + + const result = await switchToTabTool({ role: 'dapp' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); + expect(result.error.message).toContain('No tab found matching: dapp'); + } + }); + }); + + describe('closeTabTool', () => { + it('closes tab by role', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org'); + const context = createMockContext({ + page: extensionPage, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' }, + ], + }); + + const result = await closeTabTool({ role: 'dapp' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.closed).toBe(true); + expect(result.result.closedUrl).toBe('https://app.uniswap.org'); + } + expect(dappPage.close).toHaveBeenCalled(); + }); + + it('closes tab by URL prefix', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org/swap'); + const context = createMockContext({ + page: extensionPage, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { + page: dappPage, + role: 'dapp', + url: 'https://app.uniswap.org/swap', + }, + ], + }); + + const result = await closeTabTool( + { url: 'https://app.uniswap.org' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.closedUrl).toBe('https://app.uniswap.org/swap'); + } + expect(dappPage.close).toHaveBeenCalled(); + }); + + it('switches to extension tab when closing the active tab', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org'); + const context = createMockContext({ + page: dappPage, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { page: dappPage, role: 'dapp', url: 'https://app.uniswap.org' }, + ], + }); + + const result = await closeTabTool({ role: 'dapp' }, context); + + expect(result.ok).toBe(true); + expect(extensionPage.bringToFront).toHaveBeenCalled(); + expect(context.sessionManager.setActivePage).toHaveBeenCalledWith( + extensionPage, + ); + expect(dappPage.close).toHaveBeenCalled(); + }); + + it('returns invalid input when neither role nor url is provided', async () => { + const context = createMockContext(); + + const result = await closeTabTool({} as never, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_INVALID_INPUT); + } + }); + + it('returns tab not found when no matching tab exists', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const context = createMockContext({ + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + }); + + const result = await closeTabTool({ role: 'dapp' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_TAB_NOT_FOUND); + expect(result.error.message).toContain('No tab found matching: dapp'); + } + }); + }); +}); diff --git a/src/tools/navigation.ts b/src/tools/navigation.ts new file mode 100644 index 0000000..5844a5f --- /dev/null +++ b/src/tools/navigation.ts @@ -0,0 +1,247 @@ +import { + classifyNavigationError, + classifyNotificationError, + classifyTabError, +} from './error-classification.js'; +import type { + CloseTabInput, + CloseTabResult, + NavigateInput, + NavigateResult, + SwitchToTabInput, + SwitchToTabResult, + WaitForNotificationInput, + WaitForNotificationResult, +} from './types'; +import { ErrorCodes } from './types'; +import { DEFAULT_INTERACTION_TIMEOUT_MS } from './utils/constants.js'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Navigates the browser to a specified screen or URL. + * + * @param input - The navigation target screen and optional URL. + * @param context - The tool execution context. + * @returns The navigation result with the current URL. + */ +export async function navigateTool( + input: NavigateInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + if (input.screen === 'url' && !input.url) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'url is required when screen is "url"', + ); + } + + const validScreens = ['home', 'settings', 'url', 'notification']; + if (!validScreens.includes(input.screen)) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + `Unknown screen: ${String(input.screen)}`, + ); + } + + try { + switch (input.screen) { + case 'home': + await context.sessionManager.navigateToHome(); + break; + case 'settings': + await context.sessionManager.navigateToSettings(); + break; + case 'url': + await context.sessionManager.navigateToUrl(input.url as string); + break; + case 'notification': + await context.sessionManager.navigateToNotification(); + break; + default: + throw new Error(`Unsupported screen: ${String(input.screen)}`); + } + + return createToolSuccess({ + navigated: true, + currentUrl: context.page.url(), + }); + } catch (error) { + const errorInfo = classifyNavigationError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Waits for a notification page to appear within a timeout. + * + * @param input - The notification wait options including timeout. + * @param context - The tool execution context. + * @returns The notification page URL when found. + */ +export async function waitForNotificationTool( + input: WaitForNotificationInput, + context: ToolContext, +): Promise> { + const missingSession = + requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const timeoutMs = input.timeoutMs ?? DEFAULT_INTERACTION_TIMEOUT_MS; + + try { + const notificationPage = + await context.sessionManager.waitForNotificationPage(timeoutMs); + + return createToolSuccess({ + found: true, + pageUrl: notificationPage.url(), + }); + } catch (error) { + const errorInfo = classifyNotificationError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Switches the active page to a tab matching the given role or URL. + * + * @param input - The tab selection criteria (role or URL). + * @param context - The tool execution context. + * @returns The active tab info after switching. + */ +export async function switchToTabTool( + input: SwitchToTabInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + if (!input.role && !input.url) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Either role or url must be provided', + ); + } + + try { + const trackedPages = context.sessionManager.getTrackedPages(); + const targetPage = trackedPages.find((trackedPage) => { + if (input.role) { + return trackedPage.role === input.role; + } + if (input.url) { + return trackedPage.url.startsWith(input.url); + } + return false; + }); + + if (!targetPage) { + const availableTabs = trackedPages.map((trackedPage) => ({ + role: trackedPage.role, + url: trackedPage.url, + })); + throw new Error( + `No tab found matching: ${input.role ?? input.url}. Available tabs: ${JSON.stringify(availableTabs)}`, + ); + } + + await targetPage.page.bringToFront(); + context.sessionManager.setActivePage(targetPage.page); + + const activeTabInfo = context.sessionManager + .getTrackedPages() + .find((trackedPage) => trackedPage.page === targetPage.page); + + return createToolSuccess({ + switched: true, + activeTab: { + role: activeTabInfo?.role ?? 'other', + url: targetPage.page.url(), + }, + }); + } catch (error) { + const errorInfo = classifyTabError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Closes a browser tab matching the given role or URL. + * + * @param input - The tab selection criteria (role or URL). + * @param context - The tool execution context. + * @returns The close result with the closed tab URL. + */ +export async function closeTabTool( + input: CloseTabInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + if (!input.role && !input.url) { + return createToolError( + ErrorCodes.MM_INVALID_INPUT, + 'Either role or url must be provided', + ); + } + + try { + const trackedPages = context.sessionManager.getTrackedPages(); + const targetPage = trackedPages.find((trackedPage) => { + if (input.role) { + return trackedPage.role === input.role; + } + if (input.url) { + return trackedPage.url.startsWith(input.url); + } + return false; + }); + + if (!targetPage) { + throw new Error(`No tab found matching: ${input.role ?? input.url}`); + } + + const closedUrl = targetPage.url; + + if (targetPage.page === context.page) { + const otherPages = trackedPages.filter( + (trackedPage) => trackedPage.page !== targetPage.page, + ); + const fallbackPage = + otherPages.find((trackedPage) => trackedPage.role === 'extension') ?? + otherPages[0]; + + if (fallbackPage) { + await fallbackPage.page.bringToFront(); + context.sessionManager.setActivePage(fallbackPage.page); + } + } + + await targetPage.page.close(); + + return createToolSuccess({ + closed: true, + closedUrl, + }); + } catch (error) { + const errorInfo = classifyTabError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/tools/registry.test.ts b/src/tools/registry.test.ts new file mode 100644 index 0000000..98f54ff --- /dev/null +++ b/src/tools/registry.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from 'vitest'; + +import { toolRegistry, TOOL_CATEGORIES, getToolCategory } from './registry.js'; + +describe('toolRegistry', () => { + it('has expected tool entries', () => { + const expectedTools = [ + 'build', + 'launch', + 'cleanup', + 'click', + 'type', + 'navigate', + 'screenshot', + 'describe_screen', + 'clipboard', + 'run_steps', + ]; + + for (const toolName of expectedTools) { + expect(toolRegistry.has(toolName)).toBe(true); + } + }); + + it('returns a function for launch', () => { + expect(typeof toolRegistry.get('launch')).toBe('function'); + }); + + it('returns undefined for a nonexistent tool', () => { + expect(toolRegistry.get('nonexistent')).toBeUndefined(); + }); + + it('has the expected number of entries', () => { + expect(toolRegistry.size).toBe(28); + }); + + it('stores only functions as values', () => { + for (const handler of toolRegistry.values()) { + expect(typeof handler).toBe('function'); + } + }); + + it('uses unprefixed keys', () => { + for (const key of toolRegistry.keys()) { + expect(key.startsWith('mm_')).toBe(false); + } + }); +}); + +describe('TOOL_CATEGORIES and getToolCategory', () => { + it('every key in toolRegistry exists in TOOL_CATEGORIES', () => { + for (const key of toolRegistry.keys()) { + expect(TOOL_CATEGORIES).toHaveProperty(key); + } + }); + + it('every key in TOOL_CATEGORIES exists in toolRegistry', () => { + for (const key of Object.keys(TOOL_CATEGORIES)) { + expect(toolRegistry.has(key)).toBe(true); + } + }); + + it('getToolCategory returns mutating for nonexistent tool', () => { + expect(getToolCategory('nonexistent_tool')).toBe('mutating'); + }); + + it('getToolCategory returns mutating for click', () => { + expect(getToolCategory('click')).toBe('mutating'); + }); + + it('getToolCategory returns readonly for knowledge_last', () => { + expect(getToolCategory('knowledge_last')).toBe('readonly'); + }); + + it('getToolCategory returns discovery for describe_screen', () => { + expect(getToolCategory('describe_screen')).toBe('discovery'); + }); + + it('getToolCategory returns batch for run_steps', () => { + expect(getToolCategory('run_steps')).toBe('batch'); + }); +}); diff --git a/src/tools/registry.ts b/src/tools/registry.ts new file mode 100644 index 0000000..0df43dd --- /dev/null +++ b/src/tools/registry.ts @@ -0,0 +1,125 @@ +import { runStepsTool } from './batch.js'; +import { buildTool } from './build.js'; +import { cleanupTool } from './cleanup.js'; +import { clipboardTool } from './clipboard.js'; +import { getContextTool, setContextTool } from './context.js'; +import { + accessibilitySnapshotTool, + describeScreenTool, + listTestIdsTool, +} from './discovery-tools.js'; +import { + clickTool, + getTextTool, + typeTool, + waitForTool, +} from './interaction.js'; +import { + knowledgeLastTool, + knowledgeSearchTool, + knowledgeSessionsTool, + knowledgeSummarizeTool, +} from './knowledge.js'; +import { launchTool } from './launch.js'; +import { + closeTabTool, + navigateTool, + switchToTabTool, + waitForNotificationTool, +} from './navigation.js'; +import { screenshotTool } from './screenshot.js'; +import { + getContractAddressTool, + listContractsTool, + seedContractTool, + seedContractsTool, +} from './seeding.js'; +import { getStateTool } from './state.js'; +import type { ToolFunction } from '../types/http.js'; + +// holds tools with heterogeneous parameter types. TypeScript's contravariant +// function parameters prevent assigning ToolFunction to +// ToolFunction, so `any` is the standard pattern for type-erased +// function maps. Input safety is enforced at the Zod validation boundary. +export const toolRegistry = new Map>([ + ['build', buildTool], + ['launch', launchTool], + ['cleanup', cleanupTool], + ['get_state', getStateTool], + ['navigate', navigateTool], + ['wait_for_notification', waitForNotificationTool], + ['switch_to_tab', switchToTabTool], + ['close_tab', closeTabTool], + ['list_testids', listTestIdsTool], + ['accessibility_snapshot', accessibilitySnapshotTool], + ['describe_screen', describeScreenTool], + ['screenshot', screenshotTool], + ['click', clickTool], + ['type', typeTool], + ['wait_for', waitForTool], + ['get_text', getTextTool], + ['knowledge_last', knowledgeLastTool], + ['knowledge_search', knowledgeSearchTool], + ['knowledge_summarize', knowledgeSummarizeTool], + ['knowledge_sessions', knowledgeSessionsTool], + ['seed_contract', seedContractTool], + ['seed_contracts', seedContractsTool], + ['get_contract_address', getContractAddressTool], + ['list_contracts', listContractsTool], + ['run_steps', runStepsTool], + ['set_context', setContextTool], + ['get_context', getContextTool], + ['clipboard', clipboardTool], +]); + +export type ToolCategory = 'mutating' | 'readonly' | 'discovery' | 'batch'; + +export const TOOL_CATEGORIES: Record = { + // MUTATING (13) + click: 'mutating', + type: 'mutating', + navigate: 'mutating', + launch: 'mutating', + cleanup: 'mutating', + switch_to_tab: 'mutating', + close_tab: 'mutating', + clipboard: 'mutating', + build: 'mutating', + wait_for: 'mutating', + wait_for_notification: 'mutating', + seed_contract: 'mutating', + seed_contracts: 'mutating', + // READONLY (9) + knowledge_last: 'readonly', + knowledge_search: 'readonly', + knowledge_summarize: 'readonly', + knowledge_sessions: 'readonly', + get_text: 'readonly', + get_state: 'readonly', + get_context: 'readonly', + // set_context is blocked while a session is active (MM_CONTEXT_SWITCH_BLOCKED), + // so Playwright observations would never be collected. Classified as readonly + // since it never runs in a state where page observations are meaningful. + set_context: 'readonly', + list_contracts: 'readonly', + get_contract_address: 'readonly', + // DISCOVERY (4) + describe_screen: 'discovery', + list_testids: 'discovery', + accessibility_snapshot: 'discovery', + screenshot: 'discovery', + // BATCH (1) + run_steps: 'batch', +}; + +/** + * Returns the category for a registered tool name. + * Unknown tools default to 'mutating' — the safe default that ensures + * new tools get observations until explicitly categorized. + * + * @param toolName - The registered tool name to look up. + * @returns The tool's category, or 'mutating' for unknown tools. + */ +export function getToolCategory(toolName: string): ToolCategory { + return TOOL_CATEGORIES[toolName] ?? 'mutating'; +} diff --git a/src/tools/screenshot.test.ts b/src/tools/screenshot.test.ts new file mode 100644 index 0000000..1b2ee2e --- /dev/null +++ b/src/tools/screenshot.test.ts @@ -0,0 +1,261 @@ +/** + * Unit tests for screenshot tool handler. + * + * Tests screenshotTool with various options including base64 encoding, + * selector scoping, and error handling. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { screenshotTool } from './screenshot.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockContext( + options: { + hasActive?: boolean; + } = {}, +): ToolContext { + const { hasActive = true } = options; + + return { + sessionManager: createMockSessionManager({ hasActive }), + page: {} as ToolContext['page'], + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext; +} + +describe('screenshotTool', () => { + describe('basic screenshot', () => { + it('captures full page screenshot by default', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/screenshot.png', + width: 1280, + height: 720, + base64: 'mock-base64', + }); + + const result = await screenshotTool({ name: 'test-screenshot' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.path).toBe('/path/to/screenshot.png'); + expect(result.result.width).toBe(1280); + expect(result.result.height).toBe(720); + expect(result.result.base64).toBeUndefined(); + } + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ + name: 'test-screenshot', + fullPage: true, + selector: undefined, + }); + }); + + it('captures viewport-only screenshot when fullPage is false', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/screenshot.png', + width: 1280, + height: 720, + base64: 'mock-base64', + }); + + const result = await screenshotTool( + { + name: 'viewport-screenshot', + fullPage: false, + }, + context, + ); + + expect(result.ok).toBe(true); + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ + name: 'viewport-screenshot', + fullPage: false, + selector: undefined, + }); + }); + }); + + describe('with base64 encoding', () => { + it('includes base64 when includeBase64 is true', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/screenshot.png', + width: 1280, + height: 720, + base64: + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', + }); + + const result = await screenshotTool( + { + name: 'base64-screenshot', + includeBase64: true, + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.base64).toBe( + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', + ); + } + }); + + it('excludes base64 when includeBase64 is false', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/screenshot.png', + width: 1280, + height: 720, + base64: + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', + }); + + const result = await screenshotTool( + { + name: 'no-base64-screenshot', + includeBase64: false, + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.base64).toBeUndefined(); + } + }); + }); + + describe('with selector scoping', () => { + it('captures screenshot of specific element', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/element-screenshot.png', + width: 400, + height: 200, + base64: 'mock-base64', + }); + + const result = await screenshotTool( + { + name: 'element-screenshot', + selector: '[data-testid="account-menu"]', + }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.width).toBe(400); + expect(result.result.height).toBe(200); + } + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ + name: 'element-screenshot', + fullPage: true, + selector: '[data-testid="account-menu"]', + }); + }); + + it('combines selector with fullPage false', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/element-screenshot.png', + width: 400, + height: 200, + base64: 'mock-base64', + }); + + const result = await screenshotTool( + { + name: 'element-viewport-screenshot', + selector: '.modal-content', + fullPage: false, + }, + context, + ); + + expect(result.ok).toBe(true); + expect(context.sessionManager.screenshot).toHaveBeenCalledWith({ + name: 'element-viewport-screenshot', + fullPage: false, + selector: '.modal-content', + }); + }); + }); + + describe('error handling', () => { + it('generates default name when not provided', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockResolvedValue({ + path: '/path/to/screenshot.png', + width: 1280, + height: 720, + }); + + const result = await screenshotTool({}, context); + + expect(result.ok).toBe(true); + expect(context.sessionManager.screenshot).toHaveBeenCalledWith( + expect.objectContaining({ + name: expect.stringMatching(/^screenshot-\d+$/u), + }), + ); + }); + + it('returns error when no active session', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await screenshotTool({ name: 'test-screenshot' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + + it('returns error when screenshot fails', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockRejectedValue( + new Error('Screenshot failed'), + ); + + const result = await screenshotTool({ name: 'test-screenshot' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SCREENSHOT_FAILED); + expect(result.error.message).toContain('Screenshot failed'); + } + }); + + it('returns error when page is closed', async () => { + const context = createMockContext(); + + vi.spyOn(context.sessionManager, 'screenshot').mockRejectedValue( + new Error('Target page, context or browser has been closed'), + ); + + const result = await screenshotTool({ name: 'test-screenshot' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED); + } + }); + }); +}); diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts new file mode 100644 index 0000000..5a842c4 --- /dev/null +++ b/src/tools/screenshot.ts @@ -0,0 +1,49 @@ +import { classifyScreenshotError } from './error-classification.js'; +import type { ScreenshotInput, ScreenshotToolResult } from './types'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Captures a screenshot of the current page. + * + * @param input - The screenshot options including name, selector, and base64 flag. + * @param context - The tool execution context. + * @returns The screenshot metadata and optional base64 data. + */ +export async function screenshotTool( + input: ScreenshotInput, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + try { + const screenshotName = input.name ?? `screenshot-${Date.now()}`; + const result = await context.sessionManager.screenshot({ + name: screenshotName, + fullPage: input.fullPage ?? true, + selector: input.selector, + }); + + const response: ScreenshotToolResult = { + path: result.path, + width: result.width, + height: result.height, + }; + + if (input.includeBase64) { + response.base64 = result.base64; + } + + return createToolSuccess(response); + } catch (error) { + const errorInfo = classifyScreenshotError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/tools/seeding.test.ts b/src/tools/seeding.test.ts new file mode 100644 index 0000000..81738a1 --- /dev/null +++ b/src/tools/seeding.test.ts @@ -0,0 +1,382 @@ +/** + * Unit tests for seeding tool handlers. + * + * Tests contract deployment handlers including single/multiple contract deployment, + * address lookup, and contract listing with ContractSeedingCapability. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { + seedContractTool, + seedContractsTool, + getContractAddressTool, + listContractsTool, +} from './seeding.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types'; +import type { ContractSeedingCapability } from '../capabilities/types.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockSeedingCapability(): ContractSeedingCapability { + return { + deployContract: vi.fn(), + deployContracts: vi.fn(), + getContractAddress: vi.fn(), + listDeployedContracts: vi.fn(), + getAvailableContracts: vi.fn(), + clearRegistry: vi.fn(), + initialize: vi.fn(), + }; +} + +function createMockContext( + options: { + hasActive?: boolean; + workflowCapability?: ContractSeedingCapability; + sessionCapability?: ContractSeedingCapability; + } = {}, +): ToolContext { + const { hasActive = true, workflowCapability, sessionCapability } = options; + + const sessionManager = createMockSessionManager({ hasActive }); + sessionManager.getContractSeedingCapability.mockReturnValue( + sessionCapability, + ); + + return { + sessionManager, + page: {} as ToolContext['page'], + refMap: new Map(), + workflowContext: { + config: { + environment: 'e2e', + extensionName: 'MetaMask', + }, + contractSeeding: workflowCapability, + }, + knowledgeStore: {} as ToolContext['knowledgeStore'], + toolRegistry: new Map(), + } as unknown as ToolContext; +} + +describe('seeding tools', () => { + describe('seedContractTool', () => { + it('deploys a single contract using workflowContext capability', async () => { + const deployedAt = new Date().toISOString(); + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'deployContract').mockResolvedValue({ + name: 'hst', + address: '0x1234567890123456789012345678901234567890', + deployedAt, + }); + const context = createMockContext({ workflowCapability: capability }); + + const result = await seedContractTool({ contractName: 'hst' }, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result).toStrictEqual({ + contractName: 'hst', + contractAddress: '0x1234567890123456789012345678901234567890', + deployedAt, + }); + } + expect(capability.deployContract).toHaveBeenCalledWith('hst', { + hardfork: undefined, + deployerOptions: undefined, + }); + expect( + context.sessionManager.getContractSeedingCapability, + ).not.toHaveBeenCalled(); + }); + + it('falls back to session manager capability when workflowContext lacks one', async () => { + const deployedAt = new Date().toISOString(); + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'deployContract').mockResolvedValue({ + name: 'nfts', + address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', + deployedAt, + }); + const context = createMockContext({ sessionCapability: capability }); + + const result = await seedContractTool( + { contractName: 'nfts', hardfork: 'shanghai' }, + context, + ); + + expect(result.ok).toBe(true); + expect(capability.deployContract).toHaveBeenCalledWith('nfts', { + hardfork: 'shanghai', + deployerOptions: undefined, + }); + expect( + context.sessionManager.getContractSeedingCapability, + ).toHaveBeenCalled(); + }); + + it('returns contract not found errors from deployment failures', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'deployContract').mockRejectedValue( + new Error('Contract not found: unknown'), + ); + const context = createMockContext({ workflowCapability: capability }); + + const result = await seedContractTool({ contractName: 'hst' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CONTRACT_NOT_FOUND); + expect(result.error.message).toContain('Contract not found'); + } + }); + + it('returns capability unavailable when no seeding capability exists', async () => { + const context = createMockContext(); + + const result = await seedContractTool({ contractName: 'hst' }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); + } + }); + }); + + describe('seedContractsTool', () => { + it('deploys multiple contracts and maps deployed and failed results', async () => { + const deployedAt = new Date().toISOString(); + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'deployContracts').mockResolvedValue({ + deployed: [ + { + name: 'hst', + address: '0x1234567890123456789012345678901234567890', + deployedAt, + }, + ], + failed: [ + { + name: 'nfts', + error: 'Contract deployment failed', + }, + ], + }); + const context = createMockContext({ workflowCapability: capability }); + + const result = await seedContractsTool( + { contracts: ['hst', 'nfts'] }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result).toStrictEqual({ + deployed: [ + { + contractName: 'hst', + contractAddress: '0x1234567890123456789012345678901234567890', + deployedAt, + }, + ], + failed: [ + { + contractName: 'nfts', + error: 'Contract deployment failed', + }, + ], + }); + } + expect(capability.deployContracts).toHaveBeenCalledWith(['hst', 'nfts'], { + hardfork: undefined, + }); + }); + + it('returns seed failed errors for complete deployment failures', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'deployContracts').mockRejectedValue( + new Error('Anvil not running'), + ); + const context = createMockContext({ workflowCapability: capability }); + + const result = await seedContractsTool({ contracts: ['hst'] }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); + expect(result.error.message).toContain('Anvil not running'); + } + }); + + it('returns capability unavailable when no seeding capability exists', async () => { + const context = createMockContext(); + + const result = await seedContractsTool({ contracts: ['hst'] }, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); + } + }); + }); + + describe('getContractAddressTool', () => { + it('returns the contract address when found', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'getContractAddress').mockReturnValue( + '0x1234567890123456789012345678901234567890', + ); + const context = createMockContext({ workflowCapability: capability }); + + const result = await getContractAddressTool( + { contractName: 'hst' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result).toStrictEqual({ + contractName: 'hst', + contractAddress: '0x1234567890123456789012345678901234567890', + }); + } + expect(capability.getContractAddress).toHaveBeenCalledWith('hst'); + }); + + it('returns null when the contract address is missing', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'getContractAddress').mockReturnValue(null); + const context = createMockContext({ workflowCapability: capability }); + + const result = await getContractAddressTool( + { contractName: 'nfts' }, + context, + ); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result).toStrictEqual({ + contractName: 'nfts', + contractAddress: null, + }); + } + }); + + it('returns error when getContractAddress throws', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'getContractAddress').mockImplementation(() => { + throw new Error('Connection lost'); + }); + const context = createMockContext({ workflowCapability: capability }); + + const result = await getContractAddressTool( + { contractName: 'hst' }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); + expect(result.error.message).toContain('Connection lost'); + } + }); + + it('returns capability unavailable when no seeding capability exists', async () => { + const context = createMockContext(); + + const result = await getContractAddressTool( + { contractName: 'hst' }, + context, + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); + } + }); + }); + + describe('listContractsTool', () => { + it('returns the list of deployed contracts', async () => { + const deployedAt1 = new Date().toISOString(); + const deployedAt2 = new Date(Date.now() + 1000).toISOString(); + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'listDeployedContracts').mockReturnValue([ + { + name: 'hst', + address: '0x1234567890123456789012345678901234567890', + deployedAt: deployedAt1, + }, + { + name: 'nfts', + address: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', + deployedAt: deployedAt2, + }, + ]); + const context = createMockContext({ workflowCapability: capability }); + + const result = await listContractsTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result).toStrictEqual({ + contracts: [ + { + contractName: 'hst', + contractAddress: '0x1234567890123456789012345678901234567890', + deployedAt: deployedAt1, + }, + { + contractName: 'nfts', + contractAddress: '0xabcdefabcdefabcdefabcdefabcdefabcdefabcd', + deployedAt: deployedAt2, + }, + ], + }); + } + expect(capability.listDeployedContracts).toHaveBeenCalled(); + }); + + it('returns capability unavailable when no seeding capability exists', async () => { + const context = createMockContext(); + + const result = await listContractsTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE); + expect(result.error.message).toContain( + 'ContractSeedingCapability not available', + ); + } + }); + + it('returns no active session when the session is missing', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await listContractsTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + + it('returns error when listDeployedContracts throws', async () => { + const capability = createMockSeedingCapability(); + vi.spyOn(capability, 'listDeployedContracts').mockImplementation(() => { + throw new Error('Connection lost'); + }); + const context = createMockContext({ workflowCapability: capability }); + + const result = await listContractsTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_SEED_FAILED); + expect(result.error.message).toContain('Connection lost'); + } + }); + }); +}); diff --git a/src/tools/seeding.ts b/src/tools/seeding.ts new file mode 100644 index 0000000..1a36cbb --- /dev/null +++ b/src/tools/seeding.ts @@ -0,0 +1,187 @@ +import { classifySeedingError } from './error-classification.js'; +import type { + GetContractAddressInput, + GetContractAddressResult, + ListDeployedContractsInput, + ListDeployedContractsResult, + SeedContractInput, + SeedContractResult, + SeedContractsInput, + SeedContractsResult, +} from './types'; +import { ErrorCodes } from './types'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { ContractSeedingCapability } from '../capabilities/types.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Resolves the contract seeding capability or returns an error response. + * + * @param context - The tool execution context. + * @returns The seeding capability or an error response if unavailable. + */ +function getSeedingCapability( + context: ToolContext, +): ContractSeedingCapability | ToolResponse { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + const capability = + context.workflowContext.contractSeeding ?? + context.sessionManager.getContractSeedingCapability(); + + if (!capability) { + return createToolError( + ErrorCodes.MM_CAPABILITY_NOT_AVAILABLE, + 'ContractSeedingCapability not available. The mm_seed_contract tool requires running in e2e mode with the MetaMask extension wrapper, which provides Anvil chain and contract deployment support.', + ); + } + + return capability; +} + +/** + * Type guard that checks if the value is a ToolResponse rather than a capability. + * + * @param value - The capability or tool response to check. + * @returns True if the value is a ToolResponse. + */ +function isToolResponse( + value: ContractSeedingCapability | ToolResponse, +): value is ToolResponse { + return 'ok' in value; +} + +/** + * Deploys a single smart contract to the local Anvil chain. + * + * @param input - The contract name and deployment options. + * @param context - The tool execution context. + * @returns The deployed contract address and metadata. + */ +export async function seedContractTool( + input: SeedContractInput, + context: ToolContext, +): Promise> { + const capability = getSeedingCapability(context); + if (isToolResponse(capability)) { + return capability; + } + + try { + const deployed = await capability.deployContract(input.contractName, { + hardfork: input.hardfork, + deployerOptions: input.deployerOptions, + }); + + return createToolSuccess({ + contractName: deployed.name, + contractAddress: deployed.address, + deployedAt: deployed.deployedAt, + }); + } catch (error) { + const errorInfo = classifySeedingError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Deploys multiple smart contracts in batch to the local Anvil chain. + * + * @param input - The contract list and shared deployment options. + * @param context - The tool execution context. + * @returns The deployed and failed contract results. + */ +export async function seedContractsTool( + input: SeedContractsInput, + context: ToolContext, +): Promise> { + const capability = getSeedingCapability(context); + if (isToolResponse(capability)) { + return capability; + } + + try { + const seedResult = await capability.deployContracts(input.contracts, { + hardfork: input.hardfork, + }); + + return createToolSuccess({ + deployed: seedResult.deployed.map((deployedContract) => ({ + contractName: deployedContract.name, + contractAddress: deployedContract.address, + deployedAt: deployedContract.deployedAt, + })), + failed: seedResult.failed.map((failedDeployment) => ({ + contractName: failedDeployment.name, + error: failedDeployment.error, + })), + }); + } catch (error) { + const errorInfo = classifySeedingError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Looks up the deployed address of a contract by name. + * + * @param input - The contract name to look up. + * @param context - The tool execution context. + * @returns The contract name and its deployed address. + */ +export async function getContractAddressTool( + input: GetContractAddressInput, + context: ToolContext, +): Promise> { + const capability = getSeedingCapability(context); + if (isToolResponse(capability)) { + return capability; + } + + try { + return createToolSuccess({ + contractName: input.contractName, + contractAddress: capability.getContractAddress(input.contractName), + }); + } catch (error) { + const errorInfo = classifySeedingError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} + +/** + * Lists all currently deployed contracts. + * + * @param _input - Unused input parameters. + * @param context - The tool execution context. + * @returns The list of deployed contracts with addresses and timestamps. + */ +export async function listContractsTool( + _input: ListDeployedContractsInput, + context: ToolContext, +): Promise> { + const capability = getSeedingCapability(context); + if (isToolResponse(capability)) { + return capability; + } + + try { + return createToolSuccess({ + contracts: capability.listDeployedContracts().map((deployedContract) => ({ + contractName: deployedContract.name, + contractAddress: deployedContract.address, + deployedAt: deployedContract.deployedAt, + })), + }); + } catch (error) { + const errorInfo = classifySeedingError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/tools/state.test.ts b/src/tools/state.test.ts new file mode 100644 index 0000000..3969f5a --- /dev/null +++ b/src/tools/state.test.ts @@ -0,0 +1,319 @@ +/** + * Unit tests for state tool handler. + * + * Tests handleGetState with various scenarios including state snapshot capability, + * tab tracking, and error handling. + */ + +import { describe, it, expect, vi } from 'vitest'; + +import { getStateTool } from './state.js'; +import type { StateSnapshotCapability } from '../capabilities/types.js'; +import { createMockSessionManager } from './test-utils/mock-factories.js'; +import type { MockSessionManagerOptions } from './test-utils/mock-factories.js'; +import { ErrorCodes } from './types/errors.js'; +import type { ToolContext } from '../types/http.js'; + +function createMockPage(url = 'chrome-extension://ext-123/home.html') { + return { + url: vi.fn().mockReturnValue(url), + } as never; +} + +function createMockContext( + options: MockSessionManagerOptions & { + page?: ReturnType; + stateSnapshotCapability?: StateSnapshotCapability; + } = {}, +): ToolContext & { + sessionManager: ReturnType; +} { + const page = createMockPage(); + const sessionManager = createMockSessionManager(options); + + sessionManager.getPage.mockReturnValue(options.page ?? page); + sessionManager.getStateSnapshotCapability.mockReturnValue( + options.stateSnapshotCapability, + ); + + return { + sessionManager, + page: options.page ?? page, + refMap: new Map(), + workflowContext: {}, + knowledgeStore: {}, + } as unknown as ToolContext & { + sessionManager: ReturnType; + }; +} + +describe('getStateTool', () => { + describe('without state snapshot capability', () => { + it('returns extension state from session manager', async () => { + const page = createMockPage('chrome-extension://ext-123/home.html'); + const context = createMockContext({ + hasActive: true, + page, + extensionState: { + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234567890123456789012345678901234567890', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '1.5 ETH', + }, + trackedPages: [ + { + page, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.state).toStrictEqual({ + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234567890123456789012345678901234567890', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '1.5 ETH', + }); + expect(result.result.tabs).toStrictEqual({ + active: { + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + tracked: [ + { + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + }); + } + expect(context.sessionManager.getExtensionState).toHaveBeenCalled(); + }); + + it('includes multiple tracked pages in tabs', async () => { + const extensionPage = createMockPage( + 'chrome-extension://ext-123/home.html', + ); + const dappPage = createMockPage('https://app.uniswap.org'); + const context = createMockContext({ + hasActive: true, + page: extensionPage, + extensionState: { + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234567890123456789012345678901234567890', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '1.5 ETH', + }, + trackedPages: [ + { + page: extensionPage, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + { + page: dappPage, + role: 'dapp', + url: 'https://app.uniswap.org', + }, + ], + }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.tabs).toBeDefined(); + expect(result.result.tabs?.tracked).toHaveLength(2); + expect(result.result.tabs?.tracked).toStrictEqual([ + { role: 'extension', url: 'chrome-extension://ext-123/home.html' }, + { role: 'dapp', url: 'https://app.uniswap.org' }, + ]); + } + }); + + it('handles active page without tracked page info', async () => { + const page = createMockPage('chrome-extension://ext-123/home.html'); + const context = createMockContext({ + hasActive: true, + page, + extensionState: { + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: false, + currentScreen: 'home', + accountAddress: null, + networkName: null, + chainId: null, + balance: null, + }, + trackedPages: [], + }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.tabs).toBeDefined(); + expect(result.result.tabs?.active.role).toBe('other'); + expect(result.result.tabs?.active.url).toBe( + 'chrome-extension://ext-123/home.html', + ); + } + }); + }); + + describe('with state snapshot capability', () => { + it('uses state snapshot capability when provided', async () => { + const page = createMockPage('chrome-extension://ext-123/home.html'); + const stateSnapshotCapability: StateSnapshotCapability = { + getState: vi.fn().mockResolvedValue({ + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234567890123456789012345678901234567890', + networkName: 'Localhost 8545', + chainId: 1337, + balance: '25 ETH', + }), + detectCurrentScreen: vi.fn().mockResolvedValue('home'), + }; + const context = createMockContext({ + hasActive: true, + page, + sessionState: { + extensionId: 'ext-123', + ports: { anvil: 8545 }, + } as never, + trackedPages: [ + { + page, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + stateSnapshotCapability, + }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.result.state.chainId).toBe(1337); + expect(result.result.state.networkName).toBe('Localhost 8545'); + expect(result.result.state.balance).toBe('25 ETH'); + } + expect(stateSnapshotCapability.getState).toHaveBeenCalledWith(page, { + extensionId: 'ext-123', + chainId: 1337, + }); + expect(context.sessionManager.getExtensionState).not.toHaveBeenCalled(); + }); + + it('uses chainId 1 when anvil port not present', async () => { + const page = createMockPage('chrome-extension://ext-123/home.html'); + const stateSnapshotCapability: StateSnapshotCapability = { + getState: vi.fn().mockResolvedValue({ + isLoaded: true, + currentUrl: 'chrome-extension://ext-123/home.html', + extensionId: 'ext-123', + isUnlocked: true, + currentScreen: 'home', + accountAddress: '0x1234567890123456789012345678901234567890', + networkName: 'Ethereum Mainnet', + chainId: 1, + balance: '1.5 ETH', + }), + detectCurrentScreen: vi.fn().mockResolvedValue('home'), + }; + const context = createMockContext({ + hasActive: true, + page, + sessionState: { + extensionId: 'ext-123', + ports: {}, + } as never, + trackedPages: [ + { + page, + role: 'extension', + url: 'chrome-extension://ext-123/home.html', + }, + ], + stateSnapshotCapability, + }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(true); + expect(stateSnapshotCapability.getState).toHaveBeenCalledWith(page, { + extensionId: 'ext-123', + chainId: 1, + }); + }); + }); + + describe('error handling', () => { + it('returns error when no active session', async () => { + const context = createMockContext({ hasActive: false }); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_NO_ACTIVE_SESSION); + } + }); + + it('returns error when getExtensionState fails', async () => { + const context = createMockContext({ hasActive: true }); + context.sessionManager.getExtensionState.mockRejectedValue( + new Error('Failed to get state'), + ); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_STATE_FAILED); + expect(result.error.message).toContain('Failed to get state'); + } + }); + + it('returns error when page is closed', async () => { + const context = createMockContext({ hasActive: true }); + context.sessionManager.getExtensionState.mockRejectedValue( + new Error('Target page, context or browser has been closed'), + ); + + const result = await getStateTool({}, context); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCodes.MM_PAGE_CLOSED); + } + }); + }); +}); diff --git a/src/tools/state.ts b/src/tools/state.ts new file mode 100644 index 0000000..c974cce --- /dev/null +++ b/src/tools/state.ts @@ -0,0 +1,88 @@ +import type { Page } from '@playwright/test'; + +import { classifyStateError } from './error-classification.js'; +import type { GetStateResult } from './types'; +import { + createToolError, + createToolSuccess, + requireActiveSession, +} from './utils.js'; +import type { + ExtensionState, + StateSnapshotCapability, +} from '../capabilities/types.js'; +import type { ISessionManager } from '../server/session-manager.js'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Retrieves the extension state using the snapshot capability or session manager. + * + * @param page - The active Playwright page. + * @param sessionManager - The session manager instance. + * @param stateSnapshotCapability - Optional capability for direct state snapshots. + * @returns The current extension state. + */ +async function getState( + page: Page, + sessionManager: ISessionManager, + stateSnapshotCapability?: StateSnapshotCapability, +): Promise { + if (stateSnapshotCapability) { + const extensionId = sessionManager.getSessionState()?.extensionId; + return stateSnapshotCapability.getState(page, { + extensionId, + chainId: sessionManager.getSessionState()?.ports?.anvil ? 1337 : 1, + }); + } + + return sessionManager.getExtensionState(); +} + +/** + * Retrieves the extension state and tracked tab information. + * + * @param _input - Unused input parameters. + * @param context - The tool execution context. + * @returns The extension state and tab details. + */ +export async function getStateTool( + _input: Record, + context: ToolContext, +): Promise> { + const missingSession = requireActiveSession(context); + if (missingSession) { + return missingSession; + } + + try { + const state = await getState( + context.page, + context.sessionManager, + context.workflowContext.stateSnapshot ?? + context.sessionManager.getStateSnapshotCapability(), + ); + + const trackedPages = context.sessionManager.getTrackedPages(); + const activePage = context.sessionManager.getPage(); + const activeTabInfo = trackedPages.find( + (trackedPage) => trackedPage.page === activePage, + ); + + return createToolSuccess({ + state, + tabs: { + active: { + role: activeTabInfo?.role ?? 'other', + url: activePage.url(), + }, + tracked: trackedPages.map((trackedPage) => ({ + role: trackedPage.role, + url: trackedPage.url, + })), + }, + }); + } catch (error) { + const errorInfo = classifyStateError(error); + return createToolError(errorInfo.code, errorInfo.message); + } +} diff --git a/src/mcp-server/test-utils/index.ts b/src/tools/test-utils/index.ts similarity index 86% rename from src/mcp-server/test-utils/index.ts rename to src/tools/test-utils/index.ts index 4959175..76200db 100644 --- a/src/mcp-server/test-utils/index.ts +++ b/src/tools/test-utils/index.ts @@ -13,5 +13,3 @@ export { type MockLocatorOptions, type MockBrowserContextOptions, } from './mock-playwright.js'; - -export { flushPromises } from './flush-promises.js'; diff --git a/src/mcp-server/test-utils/mock-factories.test.ts b/src/tools/test-utils/mock-factories.test.ts similarity index 99% rename from src/mcp-server/test-utils/mock-factories.test.ts rename to src/tools/test-utils/mock-factories.test.ts index 9d490cf..0987102 100644 --- a/src/mcp-server/test-utils/mock-factories.test.ts +++ b/src/tools/test-utils/mock-factories.test.ts @@ -136,7 +136,7 @@ describe('mock-factories', () => { }); it('allows customization via options', async () => { - const customSteps = [{ tool: 'mm_click', screen: 'home' }]; + const customSteps = [{ tool: 'click', screen: 'home' }]; const mock = createMockKnowledgeStore({ lastSteps: customSteps, }); diff --git a/src/mcp-server/test-utils/mock-factories.ts b/src/tools/test-utils/mock-factories.ts similarity index 96% rename from src/mcp-server/test-utils/mock-factories.ts rename to src/tools/test-utils/mock-factories.ts index 8540852..f19ee1c 100644 --- a/src/mcp-server/test-utils/mock-factories.ts +++ b/src/tools/test-utils/mock-factories.ts @@ -11,8 +11,11 @@ import { vi } from 'vitest'; import type { ExtensionState } from '../../capabilities/types.js'; -import type { KnowledgeStore } from '../knowledge-store.js'; -import type { TrackedPage, SessionLaunchResult } from '../session-manager.js'; +import type { KnowledgeStore } from '../../knowledge-store/knowledge-store.js'; +import type { + TrackedPage, + SessionLaunchResult, +} from '../../server/session-manager.js'; import type { SessionState } from '../types/session.js'; import type { SessionMetadata } from '../types/step-record.js'; @@ -130,6 +133,7 @@ export function createMockSessionManager( getStateSnapshotCapability: vi.fn().mockReturnValue(undefined), // Environment + setWorkflowContext: vi.fn(), getEnvironmentMode: vi .fn() .mockReturnValue(options.environmentMode ?? 'e2e'), diff --git a/src/mcp-server/test-utils/mock-playwright.ts b/src/tools/test-utils/mock-playwright.ts similarity index 100% rename from src/mcp-server/test-utils/mock-playwright.ts rename to src/tools/test-utils/mock-playwright.ts diff --git a/src/mcp-server/types/discovery.ts b/src/tools/types/discovery.ts similarity index 78% rename from src/mcp-server/types/discovery.ts rename to src/tools/types/discovery.ts index 397196d..4d5f35f 100644 --- a/src/mcp-server/types/discovery.ts +++ b/src/tools/types/discovery.ts @@ -9,6 +9,16 @@ export const ACTIONABLE_ROLES = [ 'menuitem', ] as const; +export const STRUCTURAL_ROLES = [ + 'menu', + 'listbox', + 'option', + 'tab', + 'tabpanel', + 'list', + 'listitem', +] as const; + export const IMPORTANT_ROLES = [ 'dialog', 'alert', @@ -18,10 +28,12 @@ export const IMPORTANT_ROLES = [ export const INCLUDED_ROLES = [ ...ACTIONABLE_ROLES, + ...STRUCTURAL_ROLES, ...IMPORTANT_ROLES, ] as const; export type ActionableRole = (typeof ACTIONABLE_ROLES)[number]; +export type StructuralRole = (typeof STRUCTURAL_ROLES)[number]; export type ImportantRole = (typeof IMPORTANT_ROLES)[number]; export type IncludedRole = (typeof INCLUDED_ROLES)[number]; @@ -40,6 +52,8 @@ export type A11yNodeTrimmed = { checked?: boolean; expanded?: boolean; path: string[]; + testId?: string; + textContent?: string; }; export type RawA11yNode = { diff --git a/src/mcp-server/types/errors.ts b/src/tools/types/errors.ts similarity index 100% rename from src/mcp-server/types/errors.ts rename to src/tools/types/errors.ts diff --git a/src/mcp-server/types/index.ts b/src/tools/types/index.ts similarity index 88% rename from src/mcp-server/types/index.ts rename to src/tools/types/index.ts index 7a9edd6..e4c7ce1 100644 --- a/src/mcp-server/types/index.ts +++ b/src/tools/types/index.ts @@ -1,4 +1,3 @@ -export type * from './responses.js'; export * from './errors.js'; export * from './seeding.js'; export type * from './tool-inputs.js'; diff --git a/src/mcp-server/types/knowledge.ts b/src/tools/types/knowledge.ts similarity index 100% rename from src/mcp-server/types/knowledge.ts rename to src/tools/types/knowledge.ts diff --git a/src/mcp-server/types/seeding.ts b/src/tools/types/seeding.ts similarity index 100% rename from src/mcp-server/types/seeding.ts rename to src/tools/types/seeding.ts diff --git a/src/mcp-server/types/session.ts b/src/tools/types/session.ts similarity index 100% rename from src/mcp-server/types/session.ts rename to src/tools/types/session.ts diff --git a/src/mcp-server/types/step-record.ts b/src/tools/types/step-record.ts similarity index 93% rename from src/mcp-server/types/step-record.ts rename to src/tools/types/step-record.ts index 23d220b..175ad89 100644 --- a/src/mcp-server/types/step-record.ts +++ b/src/tools/types/step-record.ts @@ -65,6 +65,12 @@ export type StepRecordObservation = { testIds: TestIdItem[]; a11y: { nodes: A11yNodeTrimmed[]; + /** Present only in diff-mode compact observations (Phase 2). */ + diff?: { + added: string[]; + removed: string[]; + unchanged: number; + }; }; priorKnowledge?: PriorKnowledgeV1; }; diff --git a/src/mcp-server/types/tool-inputs.ts b/src/tools/types/tool-inputs.ts similarity index 85% rename from src/mcp-server/types/tool-inputs.ts rename to src/tools/types/tool-inputs.ts index 65bd1ac..fdf5747 100644 --- a/src/mcp-server/types/tool-inputs.ts +++ b/src/tools/types/tool-inputs.ts @@ -2,13 +2,6 @@ import type { SmartContractName } from './seeding.js'; export type TabRole = 'extension' | 'notification' | 'dapp' | 'other'; -export type ObservationPolicyOverride = 'default' | 'none' | 'failures'; - -export type HandlerOptions = { - signal?: AbortSignal; - observationPolicy?: ObservationPolicyOverride; -}; - export type BuildInput = { buildType?: 'build:test'; force?: boolean; @@ -16,6 +9,7 @@ export type BuildInput = { export type LaunchInput = { autoBuild?: boolean; + context?: 'e2e' | 'prod'; stateMode?: 'default' | 'onboarding' | 'custom'; fixturePreset?: string; fixture?: Record; @@ -29,6 +23,7 @@ export type LaunchInput = { flowTags?: string[]; tags?: string[]; seedContracts?: SmartContractName[]; + force?: boolean; }; export type CleanupInput = { @@ -59,7 +54,7 @@ export type DescribeScreenInput = { }; export type ScreenshotInput = { - name: string; + name?: string; fullPage?: boolean; selector?: string; includeBase64?: boolean; @@ -71,17 +66,31 @@ export type TargetSelection = { selector?: string; }; +export type WithinTarget = { + a11yRef?: string; + testId?: string; + selector?: string; +}; + export type ClickInput = TargetSelection & { timeoutMs?: number; + within?: WithinTarget; }; export type TypeInput = TargetSelection & { text: string; timeoutMs?: number; + within?: WithinTarget; }; export type WaitForInput = TargetSelection & { timeoutMs?: number; + within?: WithinTarget; +}; + +export type GetTextInput = TargetSelection & { + timeoutMs?: number; + within?: WithinTarget; }; export type KnowledgeScope = @@ -128,6 +137,7 @@ export type RunStepsInput = { }[]; stopOnError?: boolean; includeObservations?: 'none' | 'failures' | 'all'; + batchTimeoutMs?: number; }; export type SwitchToTabInput = { @@ -144,3 +154,8 @@ export type ClipboardInput = { action: 'write' | 'read'; text?: string; }; + +export type SetContextInput = { + context: 'e2e' | 'prod'; + options?: Record; +}; diff --git a/src/mcp-server/types/tool-outputs.ts b/src/tools/types/tool-outputs.ts similarity index 83% rename from src/mcp-server/types/tool-outputs.ts rename to src/tools/types/tool-outputs.ts index 541bd51..05e56c5 100644 --- a/src/mcp-server/types/tool-outputs.ts +++ b/src/tools/types/tool-outputs.ts @@ -59,6 +59,7 @@ export type ScreenshotInfo = { export type DescribeScreenResult = { state: ExtensionState; + activeTab?: TabInfo; testIds: { items: TestIdItem[]; }; @@ -93,6 +94,12 @@ export type WaitForResult = { target: string; }; +export type GetTextResult = { + text: string; + target: string; + length: number; +}; + export type StepResult = { tool: string; ok: boolean; @@ -105,6 +112,7 @@ export type StepResult = { meta: { durationMs: number; timestamp: string; + skipped?: boolean; }; }; @@ -115,6 +123,7 @@ export type RunStepsResult = { total: number; succeeded: number; failed: number; + skipped: number; durationMs: number; }; }; @@ -139,3 +148,19 @@ export type ClipboardResult = { success: boolean; text?: string; }; + +export type SetContextResult = { + previousContext: 'e2e' | 'prod'; + newContext: 'e2e' | 'prod'; + availableCapabilities: string[]; +}; + +export type GetContextResult = { + currentContext: 'e2e' | 'prod'; + hasActiveSession: boolean; + sessionId: string | null; + capabilities: { + available: string[]; + }; + canSwitchContext: boolean; +}; diff --git a/src/tools/utils.ts b/src/tools/utils.ts new file mode 100644 index 0000000..6eae6a2 --- /dev/null +++ b/src/tools/utils.ts @@ -0,0 +1,53 @@ +import { ErrorCodes } from './types'; +import type { ToolContext, ToolResponse } from '../types/http.js'; + +/** + * Wraps a result value in a successful tool response. + * + * @param result - The result payload to return. + * @returns A successful tool response containing the result. + */ +export function createToolSuccess( + result: TResult, +): ToolResponse { + return { ok: true, result }; +} + +/** + * Wraps an error code and message in a failed tool response. + * + * @param code - The error code identifying the failure type. + * @param message - A human-readable error description. + * @returns A failed tool response containing the error. + */ +export function createToolError( + code: string, + message: string, +): ToolResponse { + return { + ok: false, + error: { + code, + message, + }, + }; +} + +/** + * Returns an error response if no active session exists. + * + * @param context - The tool execution context. + * @returns An error response when no session is active, or undefined. + */ +export function requireActiveSession( + context: ToolContext, +): ToolResponse | undefined { + if (!context.sessionManager.hasActiveSession()) { + return createToolError( + ErrorCodes.MM_NO_ACTIVE_SESSION, + 'No active session. Call launch first.', + ); + } + + return undefined; +} diff --git a/src/mcp-server/constants.ts b/src/tools/utils/constants.ts similarity index 83% rename from src/mcp-server/constants.ts rename to src/tools/utils/constants.ts index fd1404f..8c65086 100644 --- a/src/mcp-server/constants.ts +++ b/src/tools/utils/constants.ts @@ -1,5 +1,5 @@ /** - * Constants for MCP server tool operations. + * Constants for HTTP daemon tool operations. * Centralized to ensure consistency and easy tuning. */ @@ -22,3 +22,6 @@ export const OBSERVATION_TESTID_LIMIT = 50; /** Maximum length for text content preview in discovery */ export const TEXT_PREVIEW_MAX_LENGTH = 100; + +/** Minimum number of option nodes under a combobox/listbox to trigger collapsing */ +export const OPTION_COLLAPSE_MIN_COUNT = 3; diff --git a/src/mcp-server/discovery.test.ts b/src/tools/utils/discovery.test.ts similarity index 60% rename from src/mcp-server/discovery.test.ts rename to src/tools/utils/discovery.test.ts index 048b43a..d738973 100644 --- a/src/mcp-server/discovery.test.ts +++ b/src/tools/utils/discovery.test.ts @@ -197,6 +197,50 @@ describe('collectTestIds', () => { expect(result[0].text?.length).toBeLessThanOrEqual(200); }); + it('handles isVisible rejection gracefully', async () => { + const mockLocators = [ + { + getAttribute: vi.fn().mockResolvedValue('btn-1'), + isVisible: vi.fn().mockRejectedValue(new Error('detached')), + textContent: vi.fn().mockResolvedValue('OK'), + }, + ]; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn().mockReturnValue({ + all: vi.fn().mockResolvedValue(mockLocators), + }), + } as unknown as Page; + + const result = await collectTestIds(page); + + expect(result).toHaveLength(1); + expect(result[0].visible).toBe(false); + }); + + it('handles textContent rejection gracefully', async () => { + const mockLocators = [ + { + getAttribute: vi.fn().mockResolvedValue('btn-1'), + isVisible: vi.fn().mockResolvedValue(true), + textContent: vi.fn().mockRejectedValue(new Error('detached')), + }, + ]; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn().mockReturnValue({ + all: vi.fn().mockResolvedValue(mockLocators), + }), + } as unknown as Page; + + const result = await collectTestIds(page); + + expect(result).toHaveLength(1); + expect(result[0].text).toBeUndefined(); + }); + it('handles page load state failure', async () => { const page = createMockPage({ testIds: [{ testId: 'test-1', visible: true }], @@ -308,6 +352,17 @@ describe('collectTrimmedA11ySnapshot', () => { expect(result.refMap.size).toBe(0); }); + it('handles empty parsed roots from valid yaml', async () => { + const page = createMockPage({ + a11ySnapshot: '- text: just text\n- /url: https://example.com', + }); + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(0); + expect(result.refMap.size).toBe(0); + }); + it('uses root selector when provided', async () => { const a11yTree = `- dialog:\n - button "Close"`; @@ -339,6 +394,245 @@ describe('collectTrimmedA11ySnapshot', () => { expect(result.nodes[1].name).toBe('Child'); expect(result.nodes[2].name).toBe('Grandchild'); }); + + it('collapses 3+ consecutive identical nodes into summary', async () => { + const a11yTree = [ + '- main:', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "Submit"', + ].join('\n'); + + const page = createMockPage({ a11ySnapshot: a11yTree }); + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(3); + expect(result.nodes[0]).toMatchObject({ + ref: 'e1', + role: 'button', + name: 'maskicon', + }); + expect(result.nodes[1].name).toContain('3 more'); + expect(result.nodes[1].name).toContain('maskicon'); + expect(result.nodes[2]).toMatchObject({ + ref: 'e5', + role: 'button', + name: 'Submit', + }); + expect(result.refMap.has('e1')).toBe(true); + expect(result.refMap.has('e2')).toBe(true); + expect(result.refMap.has('e3')).toBe(true); + expect(result.refMap.has('e4')).toBe(true); + }); + + it('does not collapse nodes with same role and name but different paths', async () => { + const a11yTree = [ + '- main:', + ' - dialog "A":', + ' - button "OK"', + ' - button "OK"', + ' - button "OK"', + ' - dialog "B":', + ' - button "OK"', + ' - button "OK"', + ' - button "OK"', + ].join('\n'); + + const page = createMockPage({ a11ySnapshot: a11yTree }); + const result = await collectTrimmedA11ySnapshot(page); + + const dialogAButtons = result.nodes.filter( + (n) => n.role === 'button' && n.path.some((p) => p.includes('dialog:A')), + ); + const dialogBButtons = result.nodes.filter( + (n) => n.role === 'button' && n.path.some((p) => p.includes('dialog:B')), + ); + expect(dialogAButtons.length).toBeGreaterThanOrEqual(1); + expect(dialogBButtons.length).toBeGreaterThanOrEqual(1); + }); + + it('does not collapse fewer than 3 identical nodes', async () => { + const a11yTree = [ + '- main:', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "Submit"', + ].join('\n'); + + const page = createMockPage({ a11ySnapshot: a11yTree }); + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(3); + expect(result.nodes[0].name).toBe('maskicon'); + expect(result.nodes[1].name).toBe('maskicon'); + expect(result.nodes[2].name).toBe('Submit'); + }); + + it('enriches nodes with short names using testId from DOM', async () => { + const a11yTree = `- main:\n - button "x"`; + const mockGetAttribute = vi.fn().mockResolvedValue('action-button'); + const mockTextContent = vi.fn().mockResolvedValue('Click me'); + const mockBodyLocator = { + ariaSnapshot: vi.fn().mockResolvedValue(a11yTree), + }; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn((selector: string) => { + if (selector === 'body') { + return { first: vi.fn().mockReturnValue(mockBodyLocator) }; + } + return { + first: vi.fn().mockReturnValue({ + getAttribute: mockGetAttribute, + textContent: mockTextContent, + }), + }; + }), + } as unknown as Page; + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes.length).toBeGreaterThan(0); + expect(result.nodes[0].testId).toBe('action-button'); + expect(result.nodes[0].textContent).toBe('Click me'); + }); + + it('skips textContent enrichment when text matches the node name', async () => { + const a11yTree = `- main:\n - button "maskicon"`; + const mockBodyLocator = { + ariaSnapshot: vi.fn().mockResolvedValue(a11yTree), + }; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn((selector: string) => { + if (selector === 'body') { + return { first: vi.fn().mockReturnValue(mockBodyLocator) }; + } + return { + first: vi.fn().mockReturnValue({ + getAttribute: vi.fn().mockResolvedValue(null), + textContent: vi.fn().mockResolvedValue('maskicon'), + }), + }; + }), + } as unknown as Page; + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes[0].textContent).toBeUndefined(); + expect(result.nodes[0].testId).toBeUndefined(); + }); + + it('skips enrichment when all node names exceed threshold', async () => { + const a11yTree = `- main:\n - button "A very long button name that exceeds threshold"`; + const page = createMockPage({ a11ySnapshot: a11yTree }); + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(1); + expect(result.nodes[0].testId).toBeUndefined(); + expect(result.nodes[0].textContent).toBeUndefined(); + }); + + it('handles enrichment errors when getAttribute/textContent reject', async () => { + const a11yTree = `- main:\n - button "x"`; + const mockBodyLocator = { + ariaSnapshot: vi.fn().mockResolvedValue(a11yTree), + }; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn((selector: string) => { + if (selector === 'body') { + return { first: vi.fn().mockReturnValue(mockBodyLocator) }; + } + return { + first: vi.fn().mockReturnValue({ + getAttribute: vi.fn().mockRejectedValue(new Error('detached')), + textContent: vi.fn().mockRejectedValue(new Error('detached')), + }), + }; + }), + } as unknown as Page; + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(1); + expect(result.nodes[0].testId).toBeUndefined(); + expect(result.nodes[0].textContent).toBeUndefined(); + }); + + it('handles enrichment errors when locator.first() throws', async () => { + const a11yTree = `- main:\n - button "y"`; + const mockBodyLocator = { + ariaSnapshot: vi.fn().mockResolvedValue(a11yTree), + }; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn((selector: string) => { + if (selector === 'body') { + return { first: vi.fn().mockReturnValue(mockBodyLocator) }; + } + return { + first: vi.fn().mockImplementation(() => { + throw new Error('locator disposed'); + }), + }; + }), + } as unknown as Page; + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(1); + expect(result.nodes[0].testId).toBeUndefined(); + expect(result.nodes[0].textContent).toBeUndefined(); + }); + + it('does not collapse nodes with different textContent', async () => { + const a11yTree = [ + '- main:', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "maskicon"', + ' - button "maskicon"', + ].join('\n'); + + const textValues = ['Rename', 'Account details', 'Hide', 'Remove']; + let callIdx = 0; + const mockBodyLocator = { + ariaSnapshot: vi.fn().mockResolvedValue(a11yTree), + }; + + const page = { + waitForLoadState: vi.fn().mockResolvedValue(undefined), + locator: vi.fn((selector: string) => { + if (selector === 'body') { + return { first: vi.fn().mockReturnValue(mockBodyLocator) }; + } + const idx = callIdx; + callIdx += 1; + return { + first: vi.fn().mockReturnValue({ + getAttribute: vi.fn().mockResolvedValue(null), + textContent: vi + .fn() + .mockResolvedValue(textValues[idx % textValues.length]), + }), + }; + }), + } as unknown as Page; + + const result = await collectTrimmedA11ySnapshot(page); + + expect(result.nodes).toHaveLength(4); + expect(result.nodes[0].textContent).toBe('Rename'); + expect(result.nodes[1].textContent).toBe('Account details'); + }); }); describe('resolveTarget', () => { @@ -442,6 +736,41 @@ describe('waitForTarget', () => { expect(page.locator).toHaveBeenCalledWith('.submit-button'); }); + + it('scopes target within a parent when within is provided', async () => { + const childLocator = createMockLocator({ visible: true }); + const firstParentLocator = { + waitFor: vi.fn().mockResolvedValue(undefined), + locator: vi.fn().mockReturnValue(childLocator), + }; + const parentLocator = { + first: vi.fn().mockReturnValue(firstParentLocator), + }; + + const page = { + locator: vi.fn().mockReturnValue(parentLocator), + } as unknown as Page; + + const result = await waitForTarget( + page, + 'testId', + 'end-accessory', + new Map(), + 5000, + { type: 'testId', value: 'account-cell' }, + ); + + expect(page.locator).toHaveBeenCalledWith('[data-testid="account-cell"]'); + expect(parentLocator.first).toHaveBeenCalled(); + expect(firstParentLocator.waitFor).toHaveBeenCalledWith({ + state: 'visible', + timeout: 5000, + }); + expect(firstParentLocator.locator).toHaveBeenCalledWith( + '[data-testid="end-accessory"]', + ); + expect(result).toBe(childLocator); + }); }); describe('parseAriaSnapshotYaml', () => { diff --git a/src/mcp-server/discovery.ts b/src/tools/utils/discovery.ts similarity index 59% rename from src/mcp-server/discovery.ts rename to src/tools/utils/discovery.ts index 5df6159..b3d37e7 100644 --- a/src/mcp-server/discovery.ts +++ b/src/tools/utils/discovery.ts @@ -1,14 +1,14 @@ import type { Page, Locator } from '@playwright/test'; import { TEXT_PREVIEW_MAX_LENGTH } from './constants.js'; +import { debugWarn } from '../../utils'; import type { TestIdItem, A11yNodeTrimmed, RawA11yNode, IncludedRole, -} from './types'; -import { INCLUDED_ROLES } from './types'; -import { debugWarn } from './utils'; +} from '../types'; +import { INCLUDED_ROLES } from '../types'; const INCLUDED_ROLES_SET = new Set(INCLUDED_ROLES); @@ -302,7 +302,154 @@ export async function collectTrimmedA11ySnapshot( traverseNode(root, []); } - return { nodes: trimmedNodes, refMap }; + await enrichNodesWithDOMContext(page, trimmedNodes, refMap); + + const collapsedNodes = collapseIdenticalRuns(trimmedNodes); + + return { nodes: collapsedNodes, refMap }; +} + +const GENERIC_NAME_MAX_LENGTH = 20; +const ENRICHMENT_BATCH_LIMIT = 100; +const ENRICHMENT_ELEMENT_TIMEOUT_MS = 500; +const TEXT_CONTENT_MAX_LENGTH = 60; + +type EnrichmentResult = { + ref: string; + testId: string | null; + textContent: string | null; +}; + +/** + * Enriches a11y nodes that have generic or empty names with data-testid + * values and visible text content from the corresponding DOM elements. + * + * @param page - The Playwright page to query. + * @param nodes - The trimmed a11y nodes to enrich (mutated in place). + * @param refMap - Map of a11y refs to selectors for element lookup. + */ +async function enrichNodesWithDOMContext( + page: Page, + nodes: A11yNodeTrimmed[], + refMap: Map, +): Promise { + const candidates = nodes.filter( + (node) => !node.name || node.name.length <= GENERIC_NAME_MAX_LENGTH, + ); + + if (candidates.length === 0) { + return; + } + + const enrichBatch = candidates.slice(0, ENRICHMENT_BATCH_LIMIT); + + const results = await Promise.allSettled( + enrichBatch.map(async (node): Promise => { + const selector = refMap.get(node.ref); + if (!selector) { + return { ref: node.ref, testId: null, textContent: null }; + } + try { + const locator = page.locator(selector).first(); + const [testId, rawText] = await Promise.all([ + locator + .getAttribute('data-testid', { + timeout: ENRICHMENT_ELEMENT_TIMEOUT_MS, + }) + .catch(() => null), + locator + .textContent({ timeout: ENRICHMENT_ELEMENT_TIMEOUT_MS }) + .catch(() => null), + ]); + const trimmedText = rawText?.trim().slice(0, TEXT_CONTENT_MAX_LENGTH); + const textContent = + trimmedText && trimmedText !== node.name ? trimmedText : null; + return { ref: node.ref, testId, textContent }; + } catch { + return { ref: node.ref, testId: null, textContent: null }; + } + }), + ); + + const enrichMap = new Map(); + for (const result of results) { + if (result.status === 'fulfilled') { + enrichMap.set(result.value.ref, result.value); + } + } + + for (const node of enrichBatch) { + const data = enrichMap.get(node.ref); + if (!data) { + continue; + } + if (data.testId) { + node.testId = data.testId; + } + if (data.textContent) { + node.textContent = data.textContent; + } + } +} + +const COLLAPSE_THRESHOLD = 3; + +/** + * Checks whether two string arrays contain identical elements in order. + * + * @param left - First array to compare. + * @param right - Second array to compare. + * @returns True if both arrays are equal. + */ +function arraysEqual(left: string[], right: string[]): boolean { + return ( + left.length === right.length && left.every((val, idx) => val === right[idx]) + ); +} + +/** + * Collapses consecutive runs of identical a11y nodes into a summary entry. + * The refMap retains individual entries so refs still resolve — collapsing + * only affects the agent-facing representation to reduce token waste. + * + * @param nodes - The flat list of trimmed a11y nodes to collapse. + * @returns A new array with runs of 3+ identical nodes collapsed. + */ +function collapseIdenticalRuns(nodes: A11yNodeTrimmed[]): A11yNodeTrimmed[] { + const collapsed: A11yNodeTrimmed[] = []; + let cursor = 0; + while (cursor < nodes.length) { + const current = nodes[cursor]; + let runEnd = cursor + 1; + while ( + runEnd < nodes.length && + nodes[runEnd].role === current.role && + nodes[runEnd].name === current.name && + nodes[runEnd].testId === current.testId && + nodes[runEnd].textContent === current.textContent && + arraysEqual(nodes[runEnd].path, current.path) + ) { + runEnd += 1; + } + + const runLength = runEnd - cursor; + if (runLength >= COLLAPSE_THRESHOLD) { + collapsed.push(current); + const lastInRun = nodes[runEnd - 1]; + collapsed.push({ + ref: `${current.ref}\u2013${lastInRun.ref}`, + role: current.role, + name: `\u2026 ${runLength - 1} more "${current.name || current.role}" (refs ${current.ref}\u2013${lastInRun.ref})`, + path: current.path, + }); + } else { + for (let idx = cursor; idx < runEnd; idx += 1) { + collapsed.push(nodes[idx]); + } + } + cursor = runEnd; + } + return collapsed; } /** @@ -318,20 +465,33 @@ function buildA11ySelector(role: IncludedRole, name: string): string { } /** - * Resolve a target element to a Playwright Locator. + * Target type for scoping selectors. + */ +export type TargetType = 'a11yRef' | 'testId' | 'selector'; + +/** + * Optional parent scope for chained locator resolution. + */ +export type WithinScope = { + type: TargetType; + value: string; +}; + +/** + * Resolve a target element to a Playwright Locator, optionally scoped within a parent. * - * @param page The Playwright page to search + * @param scope The Playwright Page or Locator to search within * @param targetType The type of target identifier (a11yRef, testId, or CSS selector) * @param targetValue The target value to resolve * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef') * @returns Playwright Locator for the resolved element */ -export async function resolveTarget( - page: Page, - targetType: 'a11yRef' | 'testId' | 'selector', +function resolveTargetScoped( + scope: Page | Locator, + targetType: TargetType, targetValue: string, refMap: Map, -): Promise { +): Locator { switch (targetType) { case 'a11yRef': { const selector = refMap.get(targetValue); @@ -341,12 +501,12 @@ export async function resolveTarget( `Available refs: ${Array.from(refMap.keys()).join(', ')}`, ); } - return page.locator(selector); + return scope.locator(selector); } case 'testId': - return page.locator(`[data-testid="${targetValue}"]`); + return scope.locator(`[data-testid="${targetValue}"]`); case 'selector': - return page.locator(targetValue); + return scope.locator(targetValue); default: { const exhaustiveCheck: never = targetType; throw new Error(`Unknown target type: ${exhaustiveCheck as string}`); @@ -355,23 +515,60 @@ export async function resolveTarget( } /** - * Wait for a target element to become visible. + * Resolve a target element to a Playwright Locator (page-level). + * + * @param page The Playwright page to search + * @param targetType The type of target identifier (a11yRef, testId, or CSS selector) + * @param targetValue The target value to resolve + * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef') + * @returns Playwright Locator for the resolved element + */ +export async function resolveTarget( + page: Page, + targetType: TargetType, + targetValue: string, + refMap: Map, +): Promise { + return resolveTargetScoped(page, targetType, targetValue, refMap); +} + +/** + * Wait for a target element to become visible, optionally scoped within a parent. * * @param page The Playwright page to search * @param targetType The type of target identifier (a11yRef, testId, or CSS selector) * @param targetValue The target value to resolve * @param refMap Map of a11y refs to selectors (used when targetType is 'a11yRef') * @param timeoutMs Maximum time to wait in milliseconds + * @param within Optional parent scope — resolves the target within this element * @returns Playwright Locator for the visible element */ export async function waitForTarget( page: Page, - targetType: 'a11yRef' | 'testId' | 'selector', + targetType: TargetType, targetValue: string, refMap: Map, timeoutMs: number, + within?: WithinScope, ): Promise { - const locator = await resolveTarget(page, targetType, targetValue, refMap); + let scope: Page | Locator = page; + if (within) { + const parentLocator = resolveTargetScoped( + page, + within.type, + within.value, + refMap, + ); + await parentLocator + .first() + .waitFor({ state: 'visible', timeout: timeoutMs }); + // Use .first() to guarantee the child search is scoped to exactly one + // parent element. Without this, Playwright chains the child locator + // across ALL matching parents, producing phantom multi-matches + // (e.g. 63 "end-accessory" buttons across 63 account cells). + scope = parentLocator.first(); + } + const locator = resolveTargetScoped(scope, targetType, targetValue, refMap); await locator.waitFor({ state: 'visible', timeout: timeoutMs }); return locator; } diff --git a/src/mcp-server/utils/targets.ts b/src/tools/utils/targets.ts similarity index 100% rename from src/mcp-server/utils/targets.ts rename to src/tools/utils/targets.ts diff --git a/src/mcp-server/utils/type-guards.test.ts b/src/tools/utils/type-guards.test.ts similarity index 100% rename from src/mcp-server/utils/type-guards.test.ts rename to src/tools/utils/type-guards.test.ts diff --git a/src/mcp-server/utils/type-guards.ts b/src/tools/utils/type-guards.ts similarity index 100% rename from src/mcp-server/utils/type-guards.ts rename to src/tools/utils/type-guards.ts diff --git a/src/types/http.ts b/src/types/http.ts new file mode 100644 index 0000000..d1cac1c --- /dev/null +++ b/src/types/http.ts @@ -0,0 +1,97 @@ +/** + * HTTP Server Type Definitions + * + * Types for standalone tool functions and HTTP response shapes. + */ + +import type { Page } from '@playwright/test'; + +import type { PortMap, WorkflowContext } from '../capabilities/context.js'; +import type { KnowledgeStore } from '../knowledge-store/knowledge-store.js'; +import type { ISessionManager } from '../server/session-manager.js'; + +/** + * Context passed to standalone tool functions. + * + * This context provides access to the session manager, current page, + * accessibility reference map, workflow capabilities, and knowledge store. + */ +export type ToolContext = { + /** Session manager for browser session control */ + sessionManager: ISessionManager; + /** Current active Playwright page (lazy — throws if no session) */ + get page(): Page; + /** Accessibility reference map (lazy — returns empty map if no session) */ + get refMap(): Map; + /** Workflow context with capabilities and environment config */ + workflowContext: WorkflowContext; + /** Knowledge store for session history and prior knowledge */ + knowledgeStore: KnowledgeStore; + /** Tool registry for batch execution (run_steps) */ + toolRegistry: Map>; +}; + +/** + * Result shape for tool responses. + * + * @template T The type of the successful result + */ +// eslint-disable-next-line @typescript-eslint/naming-convention +export type ToolResponse = + | { ok: true; result: T } + | { ok: false; error: { code: string; message: string } }; + +/** + * Standalone tool function signature. + * + * Tool functions receive parameters and a context, and return a ToolResponse. + * + * @template TParams The type of parameters the tool accepts + * @template TResult The type of the successful result + */ +export type ToolFunction = ( + params: TParams, + context: ToolContext, +) => Promise>; + +/** + * Configuration for createServer(). + * + * This configuration is used to initialize the HTTP server with + * session management, context factory, and optional settings. + */ +export type ServerConfig = { + /** Session manager instance */ + sessionManager: ISessionManager; + /** Factory function to create workflow context (may be sync or async) */ + contextFactory: () => WorkflowContext | Promise; + /** Shared knowledge store instance (optional — a new instance is created if omitted) */ + knowledgeStore?: KnowledgeStore; + /** Idle timeout for daemon auto-shutdown in milliseconds (default: 1_800_000 = 30 min) */ + idleShutdownMs?: number; + /** Per-request execution timeout in milliseconds (default: 30_000) */ + requestTimeoutMs?: number; + /** Path to log file (optional) */ + logFilePath?: string; +}; + +/** + * Shape of the .mm-server daemon state file. + * + * This file is created when the daemon starts and contains + * the port, PID, and port configuration for the running server. + */ +export type DaemonState = { + /** HTTP server port */ + port: number; + /** Process ID of the daemon */ + pid: number; + /** ISO 8601 timestamp when daemon started */ + startedAt: string; + /** Nonce for daemon identification */ + nonce: string; + /** Package version of the daemon process (absent in state files written before version tracking) */ + version?: string; + /** Port configuration for sub-services */ + subPorts: PortMap; +}; diff --git a/src/mcp-server/utils/errors.ts b/src/utils/errors.ts similarity index 100% rename from src/mcp-server/utils/errors.ts rename to src/utils/errors.ts diff --git a/src/utils/index.ts b/src/utils/index.ts index 6eae751..99c0548 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -7,3 +7,6 @@ export { waitForServiceReady, type WaitForServiceReadyOptions, } from './service-readiness.js'; +export { generateFilesafeTimestamp, generateSessionId } from './time.js'; +export { extractErrorMessage } from './errors.js'; +export { debugWarn } from './logger.js'; diff --git a/src/mcp-server/utils/logger.test.ts b/src/utils/logger.test.ts similarity index 82% rename from src/mcp-server/utils/logger.test.ts rename to src/utils/logger.test.ts index 9824771..37ce500 100644 --- a/src/mcp-server/utils/logger.test.ts +++ b/src/utils/logger.test.ts @@ -13,21 +13,21 @@ describe('debugWarn', () => { vi.resetModules(); }); - describe('when MCP_DEBUG is true', () => { + describe('when DEBUG is true', () => { it('logs warning with context and error message', async () => { - process.env.MCP_DEBUG = 'true'; + process.env.DEBUG = 'true'; vi.resetModules(); const { debugWarn } = await import('./logger.js'); debugWarn('test.context', new Error('test error')); expect(consoleWarnSpy).toHaveBeenCalledWith( - expect.stringContaining('[MCP:test.context]'), + expect.stringContaining('[Server:test.context] test error'), ); }); it('extracts error message from Error objects', async () => { - process.env.MCP_DEBUG = 'true'; + process.env.DEBUG = 'true'; vi.resetModules(); const { debugWarn } = await import('./logger.js'); @@ -40,7 +40,7 @@ describe('debugWarn', () => { }); it('handles string error messages', async () => { - process.env.MCP_DEBUG = 'true'; + process.env.DEBUG = 'true'; vi.resetModules(); const { debugWarn } = await import('./logger.js'); @@ -52,7 +52,7 @@ describe('debugWarn', () => { }); it('handles unknown error types', async () => { - process.env.MCP_DEBUG = 'true'; + process.env.DEBUG = 'true'; vi.resetModules(); const { debugWarn } = await import('./logger.js'); @@ -62,9 +62,9 @@ describe('debugWarn', () => { }); }); - describe('when MCP_DEBUG is false or unset', () => { + describe('when DEBUG is false or unset', () => { it('does not log anything', async () => { - delete process.env.MCP_DEBUG; + delete process.env.DEBUG; vi.resetModules(); const { debugWarn } = await import('./logger.js'); @@ -73,8 +73,8 @@ describe('debugWarn', () => { expect(consoleWarnSpy).not.toHaveBeenCalled(); }); - it('does not log when MCP_DEBUG is empty string', async () => { - process.env.MCP_DEBUG = ''; + it('does not log when DEBUG is empty string', async () => { + process.env.DEBUG = ''; vi.resetModules(); const { debugWarn } = await import('./logger.js'); diff --git a/src/mcp-server/utils/logger.ts b/src/utils/logger.ts similarity index 56% rename from src/mcp-server/utils/logger.ts rename to src/utils/logger.ts index f363068..0f1c414 100644 --- a/src/mcp-server/utils/logger.ts +++ b/src/utils/logger.ts @@ -1,16 +1,16 @@ import { extractErrorMessage } from './errors.js'; /** - * Debug logging for MCP server operations. - * Enabled via MCP_DEBUG=true environment variable. + * Debug logging for server operations. + * Enabled via DEBUG=true environment variable. * - * By default, logging is disabled to avoid polluting MCP protocol stdout. + * By default, logging is disabled to avoid noise in HTTP daemon logs. */ -const DEBUG = process.env.MCP_DEBUG === 'true'; +const DEBUG = process.env.DEBUG === 'true'; /** - * Log a debug warning message. Only outputs when MCP_DEBUG=true. + * Log a debug warning message. Only outputs when DEBUG=true. * Use this for caught errors that are intentionally suppressed. * * @param context - A short identifier for where the warning occurred (e.g., "discovery.collectTestIds") @@ -19,6 +19,6 @@ const DEBUG = process.env.MCP_DEBUG === 'true'; export function debugWarn(context: string, error: unknown): void { if (DEBUG) { const message = extractErrorMessage(error); - console.warn(`[MCP:${context}] ${message}`); + console.warn(`[Server:${context}] ${message}`); } } diff --git a/src/mcp-server/utils/time.test.ts b/src/utils/time.test.ts similarity index 100% rename from src/mcp-server/utils/time.test.ts rename to src/utils/time.test.ts diff --git a/src/mcp-server/utils/time.ts b/src/utils/time.ts similarity index 100% rename from src/mcp-server/utils/time.ts rename to src/utils/time.ts diff --git a/src/mcp-server/schemas.test.ts b/src/validation/schemas.test.ts similarity index 86% rename from src/mcp-server/schemas.test.ts rename to src/validation/schemas.test.ts index e2d915c..f38417a 100644 --- a/src/mcp-server/schemas.test.ts +++ b/src/validation/schemas.test.ts @@ -13,6 +13,7 @@ import { switchToTabInputSchema, closeTabInputSchema, clipboardInputSchema, + navigateInputSchema, } from './schemas.js'; describe('switchToTabInputSchema', () => { @@ -278,3 +279,47 @@ describe('clipboardInputSchema', () => { }); }); }); + +describe('navigateInputSchema', () => { + describe('refine validation: url required when screen is "url"', () => { + it('passes with screen "home"', () => { + const input = { screen: 'home' as const }; + const result = navigateInputSchema.safeParse(input); + + expect(result.success).toBe(true); + }); + + it('passes with screen "settings"', () => { + const input = { screen: 'settings' as const }; + const result = navigateInputSchema.safeParse(input); + + expect(result.success).toBe(true); + }); + + it('passes with screen "url" and url provided', () => { + const input = { screen: 'url' as const, url: 'https://example.com' }; + const result = navigateInputSchema.safeParse(input); + + expect(result.success).toBe(true); + }); + + it('fails with screen "url" and no url', () => { + const input = { screen: 'url' as const }; + const result = navigateInputSchema.safeParse(input); + + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0].message).toBe( + 'url is required when screen is "url"', + ); + } + }); + + it('fails with screen "url" and empty url', () => { + const input = { screen: 'url' as const, url: '' }; + const result = navigateInputSchema.safeParse(input); + + expect(result.success).toBe(false); + }); + }); +}); diff --git a/src/mcp-server/schemas.ts b/src/validation/schemas.ts similarity index 88% rename from src/mcp-server/schemas.ts rename to src/validation/schemas.ts index 489e6bf..5269cd4 100644 --- a/src/mcp-server/schemas.ts +++ b/src/validation/schemas.ts @@ -1,6 +1,6 @@ import { z } from 'zod'; -import { SMART_CONTRACT_NAMES, HARDFORKS } from './types/seeding.js'; +import { SMART_CONTRACT_NAMES, HARDFORKS } from '../tools/types/seeding.js'; export const a11yRefPattern = z .string() @@ -90,6 +90,14 @@ export const launchInputSchema = z.object({ .boolean() .default(true) .describe('Automatically run build if extension is not found'), + context: z + .enum(['e2e', 'prod']) + .describe( + 'Environment context to use for this session. ' + + 'Sets the context before launching so you can start in prod mode directly: ' + + 'mm launch --context prod --state onboarding', + ) + .optional(), stateMode: z .enum(['default', 'onboarding', 'custom']) .default('default') @@ -159,6 +167,10 @@ export const launchInputSchema = z.object({ .array(z.enum(smartContractNames)) .describe('Smart contracts to deploy on launch (before extension loads)') .optional(), + force: z + .boolean() + .default(false) + .describe('Force replace an existing active session (runs cleanup first)'), }); export const cleanupInputSchema = z.object({ @@ -241,7 +253,11 @@ export const describeScreenInputSchema = z.object({ }); export const screenshotInputSchema = z.object({ - name: z.string().min(1).describe('Screenshot filename (without extension)'), + name: z + .string() + .min(1) + .describe('Screenshot filename (without extension)') + .optional(), fullPage: z .boolean() .default(true) @@ -257,6 +273,29 @@ export const screenshotInputSchema = z.object({ .describe('Include base64-encoded image in response'), }); +export const withinTargetSchema = z + .object({ + a11yRef: a11yRefPattern.optional(), + testId: z.string().min(1).optional(), + selector: z.string().min(1).optional(), + }) + .refine( + (data) => { + const provided = [data.a11yRef, data.testId, data.selector].filter( + Boolean, + ); + return provided.length === 1; + }, + { + message: + 'Exactly one of a11yRef, testId, or selector must be provided in within', + }, + ) + .describe( + 'Scope the target search within a parent element. ' + + 'Accepts the same targeting options (a11yRef, testId, or selector).', + ); + export const clickInputSchema = targetSelectionSchema.and( z.object({ timeoutMs: z @@ -266,6 +305,7 @@ export const clickInputSchema = targetSelectionSchema.and( .max(60000) .default(15000) .describe('Timeout to wait for element to become visible'), + within: withinTargetSchema.optional(), }), ); @@ -279,6 +319,7 @@ export const typeInputSchema = targetSelectionSchema.and( .max(60000) .default(15000) .describe('Timeout to wait for element to become visible'), + within: withinTargetSchema.optional(), }), ); @@ -291,6 +332,20 @@ export const waitForInputSchema = targetSelectionSchema.and( .max(120000) .default(15000) .describe('Timeout to wait for element'), + within: withinTargetSchema.optional(), + }), +); + +export const getTextInputSchema = targetSelectionSchema.and( + z.object({ + timeoutMs: z + .number() + .int() + .min(0) + .max(60000) + .default(15000) + .describe('Timeout to wait for element to become visible'), + within: withinTargetSchema.optional(), }), ); @@ -441,6 +496,16 @@ export const runStepsInputSchema = z.object({ 'When to include observations in results: ' + 'none = never (fastest), failures = only for failed steps, all = always', ), + batchTimeoutMs: z + .number() + .int() + .min(1000) + .max(300_000) + .describe( + 'Overall timeout for the batch in milliseconds. ' + + 'When exceeded, remaining steps are marked as skipped and partial results are returned.', + ) + .optional(), }); export const setContextInputSchema = z.object({ @@ -500,6 +565,7 @@ export const toolSchemas = { click: clickInputSchema, type: typeInputSchema, wait_for: waitForInputSchema, + get_text: getTextInputSchema, knowledge_last: knowledgeLastInputSchema, knowledge_search: knowledgeSearchInputSchema, knowledge_summarize: knowledgeSummarizeInputSchema, @@ -533,6 +599,7 @@ export type ScreenshotInputZ = z.infer; export type ClickInputZ = z.infer; export type TypeInputZ = z.infer; export type WaitForInputZ = z.infer; +export type GetTextInputZ = z.infer; export type KnowledgeLastInputZ = z.infer; export type KnowledgeSearchInputZ = z.infer; export type KnowledgeSummarizeInputZ = z.infer< diff --git a/vitest.config.mts b/vitest.config.mts index 0470a62..004999a 100644 --- a/vitest.config.mts +++ b/vitest.config.mts @@ -27,7 +27,7 @@ export default defineConfig({ // The files to exclude from the coverage report. Vitest excludes test // files by default, but not `test-d.ts` files. - exclude: ['src/**/*.test-d.ts', 'src/mcp-server/test-utils/'], + exclude: ['src/**/*.test-d.ts', 'src/tools/test-utils/'], // Coverage thresholds. If the coverage is below these thresholds, the // test will fail. @@ -35,10 +35,10 @@ export default defineConfig({ // Auto-update the coverage thresholds when running locally. // Disabled in CI to prevent non-deterministic config changes. autoUpdate: !process.env.CI, - branches: 87.38, - functions: 92.98, - lines: 94.67, - statements: 94.46, + branches: 88.46, + functions: 91.66, + lines: 94.51, + statements: 94.26, }, }, diff --git a/yarn.lock b/yarn.lock index dfdade7..4ecedee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -727,15 +727,6 @@ __metadata: languageName: node linkType: hard -"@hono/node-server@npm:^1.19.9": - version: 1.19.9 - resolution: "@hono/node-server@npm:1.19.9" - peerDependencies: - hono: ^4 - checksum: 10/d4915c2e736ee1e3934b5538cde92b19914dc71346340528a04e4c7219afc7367965080cd1a5291ac9cbda7b0780b89b6ca93472a9418aa105d6d1183033dc8a - languageName: node - linkType: hard - "@humanfs/core@npm:^0.19.1": version: 0.19.1 resolution: "@humanfs/core@npm:0.19.1" @@ -930,14 +921,15 @@ __metadata: "@metamask/eslint-config-nodejs": "npm:^15.0.0" "@metamask/eslint-config-typescript": "npm:^15.0.0" "@metamask/eslint-config-vitest": "npm:^15.0.0" - "@modelcontextprotocol/sdk": "npm:^1.26.0" "@playwright/test": "npm:^1.49.0" "@ts-bridge/cli": "npm:^0.6.3" + "@types/express": "npm:^5.0.6" "@types/node": "npm:^20.0.0" "@typescript-eslint/utils": "npm:^8.6.0" "@vitest/coverage-istanbul": "npm:^3.0.7" "@vitest/eslint-plugin": "npm:^1.1.4" "@yarnpkg/types": "npm:^4.0.0-rc.52" + cosmiconfig: "npm:^9.0.0" depcheck: "npm:^1.4.3" eslint: "npm:^9.11.0" eslint-config-prettier: "npm:^9.1.0" @@ -947,6 +939,7 @@ __metadata: eslint-plugin-n: "npm:^17.10.3" eslint-plugin-prettier: "npm:^5.2.1" eslint-plugin-promise: "npm:^7.1.0" + express: "npm:^5.2.1" playwright: "npm:^1.49.0" prettier: "npm:^3.3.3" prettier-plugin-packagejson: "npm:^2.5.8" @@ -960,6 +953,8 @@ __metadata: peerDependencies: "@playwright/test": ^1.49.0 playwright: ^1.49.0 + bin: + mm: ./dist/cli/mm.cjs languageName: unknown linkType: soft @@ -1026,39 +1021,6 @@ __metadata: languageName: node linkType: hard -"@modelcontextprotocol/sdk@npm:^1.26.0": - version: 1.26.0 - resolution: "@modelcontextprotocol/sdk@npm:1.26.0" - dependencies: - "@hono/node-server": "npm:^1.19.9" - ajv: "npm:^8.17.1" - ajv-formats: "npm:^3.0.1" - content-type: "npm:^1.0.5" - cors: "npm:^2.8.5" - cross-spawn: "npm:^7.0.5" - eventsource: "npm:^3.0.2" - eventsource-parser: "npm:^3.0.0" - express: "npm:^5.2.1" - express-rate-limit: "npm:^8.2.1" - hono: "npm:^4.11.4" - jose: "npm:^6.1.3" - json-schema-typed: "npm:^8.0.2" - pkce-challenge: "npm:^5.0.0" - raw-body: "npm:^3.0.0" - zod: "npm:^3.25 || ^4.0" - zod-to-json-schema: "npm:^3.25.1" - peerDependencies: - "@cfworker/json-schema": ^4.1.1 - zod: ^3.25 || ^4.0 - peerDependenciesMeta: - "@cfworker/json-schema": - optional: true - zod: - optional: false - checksum: 10/a206b2a4d61a23be8b8f4c886528dd9348d11b17ce36013b350edf5c082b1c1f07941d52ea098f721daf3828085b6f6276bb844c484a0e9913edbc028517a3d5 - languageName: node - linkType: hard - "@napi-rs/wasm-runtime@npm:^0.2.11": version: 0.2.12 resolution: "@napi-rs/wasm-runtime@npm:0.2.12" @@ -1623,6 +1585,16 @@ __metadata: languageName: node linkType: hard +"@types/body-parser@npm:*": + version: 1.19.6 + resolution: "@types/body-parser@npm:1.19.6" + dependencies: + "@types/connect": "npm:*" + "@types/node": "npm:*" + checksum: 10/33041e88eae00af2cfa0827e951e5f1751eafab2a8b6fce06cd89ef368a988907996436b1325180edaeddd1c0c7d0d0d4c20a6c9ff294a91e0039a9db9e9b658 + languageName: node + linkType: hard + "@types/chai@npm:^5.2.2": version: 5.2.3 resolution: "@types/chai@npm:5.2.3" @@ -1633,6 +1605,15 @@ __metadata: languageName: node linkType: hard +"@types/connect@npm:*": + version: 3.4.38 + resolution: "@types/connect@npm:3.4.38" + dependencies: + "@types/node": "npm:*" + checksum: 10/7eb1bc5342a9604facd57598a6c62621e244822442976c443efb84ff745246b10d06e8b309b6e80130026a396f19bf6793b7cecd7380169f369dac3bfc46fb99 + languageName: node + linkType: hard + "@types/deep-eql@npm:*": version: 4.0.2 resolution: "@types/deep-eql@npm:4.0.2" @@ -1647,6 +1628,29 @@ __metadata: languageName: node linkType: hard +"@types/express-serve-static-core@npm:^5.0.0": + version: 5.1.1 + resolution: "@types/express-serve-static-core@npm:5.1.1" + dependencies: + "@types/node": "npm:*" + "@types/qs": "npm:*" + "@types/range-parser": "npm:*" + "@types/send": "npm:*" + checksum: 10/7f3d8cf7e68764c9f3e8f6a12825b69ccf5287347fc1c20b29803d4f08a4abc1153ae11d7258852c61aad50f62ef72d4c1b9c97092b0a90462c3dddec2f6026c + languageName: node + linkType: hard + +"@types/express@npm:^5.0.6": + version: 5.0.6 + resolution: "@types/express@npm:5.0.6" + dependencies: + "@types/body-parser": "npm:*" + "@types/express-serve-static-core": "npm:^5.0.0" + "@types/serve-static": "npm:^2" + checksum: 10/da2cc3de1b1a4d7f20ed3fb6f0a8ee08e99feb3c2eb5a8d643db77017d8d0e70fee9e95da38a73f51bcdf5eda3bb6435073c0271dc04fb16fda92e55daf911fa + languageName: node + linkType: hard + "@types/hast@npm:^3.0.0, @types/hast@npm:^3.0.4": version: 3.0.4 resolution: "@types/hast@npm:3.0.4" @@ -1656,6 +1660,13 @@ __metadata: languageName: node linkType: hard +"@types/http-errors@npm:*": + version: 2.0.5 + resolution: "@types/http-errors@npm:2.0.5" + checksum: 10/a88da669366bc483e8f3b3eb3d34ada5f8d13eeeef851b1204d77e2ba6fc42aba4566d877cca5c095204a3f4349b87fe397e3e21288837bdd945dd514120755b + languageName: node + linkType: hard + "@types/json-schema@npm:^7.0.15": version: 7.0.15 resolution: "@types/json-schema@npm:7.0.15" @@ -1679,6 +1690,15 @@ __metadata: languageName: node linkType: hard +"@types/node@npm:*": + version: 25.5.2 + resolution: "@types/node@npm:25.5.2" + dependencies: + undici-types: "npm:~7.18.0" + checksum: 10/11782030f910ecf600cd537791980bd8b68496570ecd633d512d713b5b8a16ea3740fce85c82d0593305f809a7c205d7e86c07f179063fc98f014a7f9b013166 + languageName: node + linkType: hard + "@types/node@npm:^20.0.0": version: 20.19.31 resolution: "@types/node@npm:20.19.31" @@ -1695,6 +1715,39 @@ __metadata: languageName: node linkType: hard +"@types/qs@npm:*": + version: 6.15.0 + resolution: "@types/qs@npm:6.15.0" + checksum: 10/871162881f1c83e61d0c8c243c65549be5dddf33a6911f3324edeebd4087207b1174644da9a3afaa20cf494c5288d2a1ece09e10e4822f755339f14a05c339ea + languageName: node + linkType: hard + +"@types/range-parser@npm:*": + version: 1.2.7 + resolution: "@types/range-parser@npm:1.2.7" + checksum: 10/95640233b689dfbd85b8c6ee268812a732cf36d5affead89e806fe30da9a430767af8ef2cd661024fd97e19d61f3dec75af2df5e80ec3bea000019ab7028629a + languageName: node + linkType: hard + +"@types/send@npm:*": + version: 1.2.1 + resolution: "@types/send@npm:1.2.1" + dependencies: + "@types/node": "npm:*" + checksum: 10/81ef5790037ba1d2d458392e4241501f0f8b4838cc8797e169e179e099410e12069ec68e8dbd39211cb097c4a9b1ff1682dbcea897ab4ce21dad93438b862d27 + languageName: node + linkType: hard + +"@types/serve-static@npm:^2": + version: 2.2.0 + resolution: "@types/serve-static@npm:2.2.0" + dependencies: + "@types/http-errors": "npm:*" + "@types/node": "npm:*" + checksum: 10/f2bad1304c7d0d3b7221faff3e490c40129d3803f4fb1b2fb84f31f561071c5e6a4b876c41bbbe82d5645034eea936e946bcaaf993dac1093ce68b56effad6e0 + languageName: node + linkType: hard + "@types/unist@npm:*, @types/unist@npm:^3.0.0": version: 3.0.3 resolution: "@types/unist@npm:3.0.3" @@ -2218,20 +2271,6 @@ __metadata: languageName: node linkType: hard -"ajv-formats@npm:^3.0.1": - version: 3.0.1 - resolution: "ajv-formats@npm:3.0.1" - dependencies: - ajv: "npm:^8.0.0" - peerDependencies: - ajv: ^8.0.0 - peerDependenciesMeta: - ajv: - optional: true - checksum: 10/5679b9f9ced9d0213a202a37f3aa91efcffe59a6de1a6e3da5c873344d3c161820a1f11cc29899661fee36271fd2895dd3851b6461c902a752ad661d1c1e8722 - languageName: node - linkType: hard - "ajv@npm:^6.12.4": version: 6.12.6 resolution: "ajv@npm:6.12.6" @@ -2244,18 +2283,6 @@ __metadata: languageName: node linkType: hard -"ajv@npm:^8.0.0, ajv@npm:^8.17.1": - version: 8.17.1 - resolution: "ajv@npm:8.17.1" - dependencies: - fast-deep-equal: "npm:^3.1.3" - fast-uri: "npm:^3.0.1" - json-schema-traverse: "npm:^1.0.0" - require-from-string: "npm:^2.0.2" - checksum: 10/ee3c62162c953e91986c838f004132b6a253d700f1e51253b99791e2dbfdb39161bc950ebdc2f156f8568035bb5ed8be7bd78289cd9ecbf3381fe8f5b82e3f33 - languageName: node - linkType: hard - "ansi-escapes@npm:^7.0.0": version: 7.2.0 resolution: "ansi-escapes@npm:7.2.0" @@ -2773,16 +2800,6 @@ __metadata: languageName: node linkType: hard -"cors@npm:^2.8.5": - version: 2.8.6 - resolution: "cors@npm:2.8.6" - dependencies: - object-assign: "npm:^4" - vary: "npm:^1" - checksum: 10/aa7174305b21ceb90f9c84f4eaa32f04432d333addbfdc0d1eb7310393c48902e5364aada5ac2f5d054528d63b3179238444475426fcb74e1e345077de485727 - languageName: node - linkType: hard - "cosmiconfig@npm:^7.1.0": version: 7.1.0 resolution: "cosmiconfig@npm:7.1.0" @@ -2796,6 +2813,23 @@ __metadata: languageName: node linkType: hard +"cosmiconfig@npm:^9.0.0": + version: 9.0.1 + resolution: "cosmiconfig@npm:9.0.1" + dependencies: + env-paths: "npm:^2.2.1" + import-fresh: "npm:^3.3.0" + js-yaml: "npm:^4.1.0" + parse-json: "npm:^5.2.0" + peerDependencies: + typescript: ">=4.9.5" + peerDependenciesMeta: + typescript: + optional: true + checksum: 10/89fcac84d062f0710091bb2d6a6175bcde22f5448877db9c43429694408191d3d4e215193b3ac4d54f7f89ef188d55cd481c7a2295b0dc572e65b528bf6fec01 + languageName: node + linkType: hard + "create-require@npm:^1.1.0": version: 1.1.1 resolution: "create-require@npm:1.1.1" @@ -2803,7 +2837,7 @@ __metadata: languageName: node linkType: hard -"cross-spawn@npm:^7.0.3, cross-spawn@npm:^7.0.5, cross-spawn@npm:^7.0.6": +"cross-spawn@npm:^7.0.3, cross-spawn@npm:^7.0.6": version: 7.0.6 resolution: "cross-spawn@npm:7.0.6" dependencies: @@ -3045,7 +3079,7 @@ __metadata: languageName: node linkType: hard -"env-paths@npm:^2.2.0": +"env-paths@npm:^2.2.0, env-paths@npm:^2.2.1": version: 2.2.1 resolution: "env-paths@npm:2.2.1" checksum: 10/65b5df55a8bab92229ab2b40dad3b387fad24613263d103a97f91c9fe43ceb21965cd3392b1ccb5d77088021e525c4e0481adb309625d0cb94ade1d1fb8dc17e @@ -3623,22 +3657,6 @@ __metadata: languageName: node linkType: hard -"eventsource-parser@npm:^3.0.0, eventsource-parser@npm:^3.0.1": - version: 3.0.6 - resolution: "eventsource-parser@npm:3.0.6" - checksum: 10/febf7058b9c2168ecbb33e92711a1646e06bd1568f60b6eb6a01a8bf9f8fcd29cc8320d57247059cacf657a296280159f21306d2e3ff33309a9552b2ef889387 - languageName: node - linkType: hard - -"eventsource@npm:^3.0.2": - version: 3.0.7 - resolution: "eventsource@npm:3.0.7" - dependencies: - eventsource-parser: "npm:^3.0.1" - checksum: 10/e034915bc97068d1d38617951afd798e6776d6a3a78e36a7569c235b177c7afc2625c9fe82656f7341ab72c7eeecb3fd507b7f88e9328f2448872ff9c4742bb6 - languageName: node - linkType: hard - "execa@npm:^5.1.1": version: 5.1.1 resolution: "execa@npm:5.1.1" @@ -3679,17 +3697,6 @@ __metadata: languageName: node linkType: hard -"express-rate-limit@npm:^8.2.1": - version: 8.2.1 - resolution: "express-rate-limit@npm:8.2.1" - dependencies: - ip-address: "npm:10.0.1" - peerDependencies: - express: ">= 4.11" - checksum: 10/7cbf70df2e88e590e463d2d8f93380775b2ea181d97f2c50c2ff9f2c666c247f83109a852b21d9c99ccc5762119101f281f54a27252a2f1a0a918be6d71f955b - languageName: node - linkType: hard - "express@npm:^5.2.1": version: 5.2.1 resolution: "express@npm:5.2.1" @@ -3754,13 +3761,6 @@ __metadata: languageName: node linkType: hard -"fast-uri@npm:^3.0.1": - version: 3.1.0 - resolution: "fast-uri@npm:3.1.0" - checksum: 10/818b2c96dc913bcf8511d844c3d2420e2c70b325c0653633f51821e4e29013c2015387944435cd0ef5322c36c9beecc31e44f71b257aeb8e0b333c1d62bb17c2 - languageName: node - linkType: hard - "fdir@npm:^6.4.4, fdir@npm:^6.5.0": version: 6.5.0 resolution: "fdir@npm:6.5.0" @@ -4166,13 +4166,6 @@ __metadata: languageName: node linkType: hard -"hono@npm:^4.11.4": - version: 4.11.7 - resolution: "hono@npm:4.11.7" - checksum: 10/16f5a715f70430bd4050b250207adf7c567774c1d91386d5454577fbc191fc4a50b912628845ce8392fae0e3fd9f364a947412961e3747a9f0b2f714790b738e - languageName: node - linkType: hard - "hosted-git-info@npm:^9.0.0": version: 9.0.2 resolution: "hosted-git-info@npm:9.0.2" @@ -4275,7 +4268,7 @@ __metadata: languageName: node linkType: hard -"import-fresh@npm:^3.2.1": +"import-fresh@npm:^3.2.1, import-fresh@npm:^3.3.0": version: 3.3.1 resolution: "import-fresh@npm:3.3.1" dependencies: @@ -4313,13 +4306,6 @@ __metadata: languageName: node linkType: hard -"ip-address@npm:10.0.1": - version: 10.0.1 - resolution: "ip-address@npm:10.0.1" - checksum: 10/09731acda32cd8e14c46830c137e7e5940f47b36d63ffb87c737331270287d631cf25aa95570907a67d3f919fdb25f4470c404eda21e62f22e0a55927f4dd0fb - languageName: node - linkType: hard - "ip-address@npm:^10.0.1": version: 10.1.0 resolution: "ip-address@npm:10.1.0" @@ -4496,13 +4482,6 @@ __metadata: languageName: node linkType: hard -"jose@npm:^6.1.3": - version: 6.1.3 - resolution: "jose@npm:6.1.3" - checksum: 10/9626c51e8c3792b505e954f3094698c182208617b62dfb27269230f31e57560b083985ed8128b8a9753aa92daf18d3a2341cc826d149503f14569abe87d42389 - languageName: node - linkType: hard - "js-tokens@npm:^4.0.0": version: 4.0.0 resolution: "js-tokens@npm:4.0.0" @@ -4529,7 +4508,7 @@ __metadata: languageName: node linkType: hard -"js-yaml@npm:^4.1.1": +"js-yaml@npm:^4.1.0, js-yaml@npm:^4.1.1": version: 4.1.1 resolution: "js-yaml@npm:4.1.1" dependencies: @@ -4584,20 +4563,6 @@ __metadata: languageName: node linkType: hard -"json-schema-traverse@npm:^1.0.0": - version: 1.0.0 - resolution: "json-schema-traverse@npm:1.0.0" - checksum: 10/02f2f466cdb0362558b2f1fd5e15cce82ef55d60cd7f8fa828cf35ba74330f8d767fcae5c5c2adb7851fa811766c694b9405810879bc4e1ddd78a7c0e03658ad - languageName: node - linkType: hard - -"json-schema-typed@npm:^8.0.2": - version: 8.0.2 - resolution: "json-schema-typed@npm:8.0.2" - checksum: 10/fa866d1fe91e3a94aa4fe007861475cd03dcaf47b719861cab171ef2f8598478007c634d29ae45de94ee34ddff4e13414c63ea5ff06c5b868b613142c699d511 - languageName: node - linkType: hard - "json-stable-stringify-without-jsonify@npm:^1.0.1": version: 1.0.1 resolution: "json-stable-stringify-without-jsonify@npm:1.0.1" @@ -5214,7 +5179,7 @@ __metadata: languageName: node linkType: hard -"object-assign@npm:^4, object-assign@npm:^4.0.1": +"object-assign@npm:^4.0.1": version: 4.1.1 resolution: "object-assign@npm:4.1.1" checksum: 10/fcc6e4ea8c7fe48abfbb552578b1c53e0d194086e2e6bbbf59e0a536381a292f39943c6e9628af05b5528aa5e3318bb30d6b2e53cadaf5b8fe9e12c4b69af23f @@ -5330,7 +5295,7 @@ __metadata: languageName: node linkType: hard -"parse-json@npm:^5.0.0": +"parse-json@npm:^5.0.0, parse-json@npm:^5.2.0": version: 5.2.0 resolution: "parse-json@npm:5.2.0" dependencies: @@ -5476,13 +5441,6 @@ __metadata: languageName: node linkType: hard -"pkce-challenge@npm:^5.0.0": - version: 5.0.1 - resolution: "pkce-challenge@npm:5.0.1" - checksum: 10/51d11f68d5a78617cfb2e9c2706dadcc2cbe55ffb55b21d42a6ed848ac5159db2657bf6c966a5a414119aa839ceb64240afea35e9e1c06946b57606ed0b43789 - languageName: node - linkType: hard - "playwright-core@npm:1.58.1": version: 1.58.1 resolution: "playwright-core@npm:1.58.1" @@ -5630,7 +5588,7 @@ __metadata: languageName: node linkType: hard -"raw-body@npm:^3.0.0, raw-body@npm:^3.0.1": +"raw-body@npm:^3.0.1": version: 3.0.2 resolution: "raw-body@npm:3.0.2" dependencies: @@ -5691,13 +5649,6 @@ __metadata: languageName: node linkType: hard -"require-from-string@npm:^2.0.2": - version: 2.0.2 - resolution: "require-from-string@npm:2.0.2" - checksum: 10/839a3a890102a658f4cb3e7b2aa13a1f80a3a976b512020c3d1efc418491c48a886b6e481ea56afc6c4cb5eef678f23b2a4e70575e7534eccadf5e30ed2e56eb - languageName: node - linkType: hard - "require-package-name@npm:^2.0.1": version: 2.0.1 resolution: "require-package-name@npm:2.0.1" @@ -6649,6 +6600,13 @@ __metadata: languageName: node linkType: hard +"undici-types@npm:~7.18.0": + version: 7.18.2 + resolution: "undici-types@npm:7.18.2" + checksum: 10/e61a5918f624d68420c3ca9d301e9f15b61cba6e97be39fe2ce266dd6151e4afe424d679372638826cb506be33952774e0424141200111a9857e464216c009af + languageName: node + linkType: hard + "unicode-emoji-modifier-base@npm:^1.0.0": version: 1.0.0 resolution: "unicode-emoji-modifier-base@npm:1.0.0" @@ -6857,7 +6815,7 @@ __metadata: languageName: node linkType: hard -"vary@npm:^1, vary@npm:^1.1.2": +"vary@npm:^1.1.2": version: 1.1.2 resolution: "vary@npm:1.1.2" checksum: 10/31389debef15a480849b8331b220782230b9815a8e0dbb7b9a8369559aed2e9a7800cd904d4371ea74f4c3527db456dc8e7ac5befce5f0d289014dbdf47b2242 @@ -7258,16 +7216,7 @@ __metadata: languageName: node linkType: hard -"zod-to-json-schema@npm:^3.25.1": - version: 3.25.1 - resolution: "zod-to-json-schema@npm:3.25.1" - peerDependencies: - zod: ^3.25 || ^4 - checksum: 10/744dd370f4452c8db120de1475ea4d484a11df884c4636111d630e5e1351b8a7590d99cf14a2b9f21e7906f8b78721d958663a7973a40994e7d28770876674cc - languageName: node - linkType: hard - -"zod@npm:^3.25 || ^4.0, zod@npm:^4.3.5": +"zod@npm:^4.3.5": version: 4.3.6 resolution: "zod@npm:4.3.6" checksum: 10/25fc0f62e01b557b4644bf0b393bbaf47542ab30877c37837ea8caf314a8713d220c7d7fe51f68ffa72f0e1018ddfa34d96f1973d23033f5a2a5a9b6b9d9da01