From 938c003eb57e1c7ec25e725416c588ff719a85c9 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 17:54:56 +0800 Subject: [PATCH 01/16] chore(test): add shared helpers and integration scaffold --- package.json | 1 + packages/agent/__tests__/tsconfig.json | 14 ++- packages/agent/jest.config.js | 1 + packages/agentic-kit/__tests__/tsconfig.json | 14 ++- packages/agentic-kit/jest.config.js | 2 + packages/anthropic/__tests__/tsconfig.json | 14 ++- packages/anthropic/jest.config.js | 6 + packages/ollama/__tests__/tsconfig.json | 14 ++- packages/ollama/jest.config.js | 6 + packages/openai/__tests__/tsconfig.json | 14 ++- packages/openai/jest.config.js | 6 + tests/integration/README.md | 14 +++ tests/integration/jest.config.js | 24 ++++ tests/integration/tsconfig.json | 18 +++ tools/test/README.md | 24 ++++ tools/test/fixtures.ts | 40 +++++++ tools/test/index.ts | 3 + tools/test/scripted-provider.ts | 115 +++++++++++++++++++ tools/test/scripted-sse.ts | 88 ++++++++++++++ 19 files changed, 408 insertions(+), 10 deletions(-) create mode 100644 tests/integration/README.md create mode 100644 tests/integration/jest.config.js create mode 100644 tests/integration/tsconfig.json create mode 100644 tools/test/README.md create mode 100644 tools/test/fixtures.ts create mode 100644 tools/test/index.ts create mode 100644 tools/test/scripted-provider.ts create mode 100644 tools/test/scripted-sse.ts diff --git a/package.json b/package.json index c5a1ff6..ad4e2f2 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "build": "pnpm -r run build", "build:dev": "pnpm -r run build:dev", "test": "pnpm -r run test", + "test:integration": "jest --config tests/integration/jest.config.js", "typecheck": "node ./scripts/typecheck.js", "test:live:ollama": "pnpm --filter @agentic-kit/ollama run test:live:smoke", "test:live:ollama:extended": "pnpm --filter @agentic-kit/ollama run test:live:extended", diff --git a/packages/agent/__tests__/tsconfig.json b/packages/agent/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/agent/__tests__/tsconfig.json +++ b/packages/agent/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/agent/jest.config.js b/packages/agent/jest.config.js index 6622fd1..2069518 100644 --- a/packages/agent/jest.config.js +++ b/packages/agent/jest.config.js @@ -17,6 +17,7 @@ module.exports = { modulePathIgnorePatterns: ['dist/*'], moduleNameMapper: { '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', '^agentic-kit$': '/../agentic-kit/src', '^@agentic-kit/(.*)$': '/../$1/src', }, diff --git a/packages/agentic-kit/__tests__/tsconfig.json b/packages/agentic-kit/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/agentic-kit/__tests__/tsconfig.json +++ b/packages/agentic-kit/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/agentic-kit/jest.config.js b/packages/agentic-kit/jest.config.js index c539b86..79ccd00 100644 --- a/packages/agentic-kit/jest.config.js +++ b/packages/agentic-kit/jest.config.js @@ -17,6 +17,8 @@ module.exports = { modulePathIgnorePatterns: ['dist/*'], moduleNameMapper: { '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/src', '^@agentic-kit/(.*)$': '/../$1/src', }, setupFilesAfterEnv: ['/jest.setup.js'] diff --git a/packages/anthropic/__tests__/tsconfig.json b/packages/anthropic/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/anthropic/__tests__/tsconfig.json +++ b/packages/anthropic/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/anthropic/jest.config.js b/packages/anthropic/jest.config.js index e11f478..d0dfaaa 100644 --- a/packages/anthropic/jest.config.js +++ b/packages/anthropic/jest.config.js @@ -15,5 +15,11 @@ module.exports = { testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/packages/ollama/__tests__/tsconfig.json b/packages/ollama/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/ollama/__tests__/tsconfig.json +++ b/packages/ollama/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/ollama/jest.config.js b/packages/ollama/jest.config.js index 5b89d20..061b4b9 100644 --- a/packages/ollama/jest.config.js +++ b/packages/ollama/jest.config.js @@ -16,5 +16,11 @@ module.exports = { moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], testPathIgnorePatterns: process.env.OLLAMA_LIVE_READY === '1' ? [] : ['\\.live\\.test\\.ts$'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/packages/openai/__tests__/tsconfig.json b/packages/openai/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/openai/__tests__/tsconfig.json +++ b/packages/openai/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/openai/jest.config.js b/packages/openai/jest.config.js index e11f478..d0dfaaa 100644 --- a/packages/openai/jest.config.js +++ b/packages/openai/jest.config.js @@ -15,5 +15,11 @@ module.exports = { testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..3ec22dd --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,14 @@ +# Integration tests + +Workspace-level lane that runs in-process integration tests against the kit's +HTTP boundary. Brings up `http.createServer`, exercises real serialization +and `fetch`, with mocked providers. + +Empty in Phase 0 — scaffolding only. First tests land with Phase 1.3 (run +serialization helpers) and 1.1 (pause/resume). Run with: + +```sh +pnpm test:integration +``` + +`passWithNoTests` is set, so the script is safe to run while empty. diff --git a/tests/integration/jest.config.js b/tests/integration/jest.config.js new file mode 100644 index 0000000..4507567 --- /dev/null +++ b/tests/integration/jest.config.js @@ -0,0 +1,24 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', + rootDir: '.', + passWithNoTests: true, + transform: { + '^.+\\.tsx?$': [ + 'ts-jest', + { + babelConfig: false, + tsconfig: '/tsconfig.json', + }, + ], + }, + testRegex: '\\.test\\.(jsx?|tsx?)$', + moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../../packages/agentic-kit/src', + '^@agentic-kit/(.*)$': '/../../packages/$1/src', + }, +}; diff --git a/tests/integration/tsconfig.json b/tests/integration/tsconfig.json new file mode 100644 index 0000000..f287326 --- /dev/null +++ b/tests/integration/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "rootDir": "../..", + "baseUrl": "../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"], + "@agentic-kit/anthropic": ["packages/anthropic/src"], + "@agentic-kit/openai": ["packages/openai/src"], + "@agentic-kit/ollama": ["packages/ollama/src"] + }, + "types": ["jest", "node"] + }, + "include": ["./**/*.ts", "../../tools/test/**/*.ts"] +} diff --git a/tools/test/README.md b/tools/test/README.md new file mode 100644 index 0000000..0513eb6 --- /dev/null +++ b/tools/test/README.md @@ -0,0 +1,24 @@ +# Shared test helpers + +Repo-internal helpers for unit tests. Imported via the `@test/*` tsconfig path +alias (see each package's `__tests__/tsconfig.json` and `jest.config.js`). +Not a workspace package, not published. + +## Helpers + +- `makeFakeModel(overrides?)` — `ModelDescriptor` with sane defaults. +- `makeFakeAssistantMessage(overrides?)` — `AssistantMessage` with zero usage and stop reason. +- `createScriptedProvider({ responses })` — `ProviderAdapter` that emits a derived event sequence per `AssistantMessage` in `responses` on successive `stream()` calls. `stopReason` of `error` or `aborted` produces an `error` terminal event; otherwise `done`. +- `createScriptedSSEResponse(events)` — `Response` whose body serializes each `AgentEvent` as one SSE frame (`data: \n\n`). +- `parseSSEStream(stream)` — async iterable that parses `AgentEvent` SSE frames from a `ReadableStream`. Handles split chunks, multi-line `data:`, comment lines, event-type framing, trailing newlines, and mid-event abort (incomplete trailing event is dropped, per SSE spec). + +## Deferred + +`runRunStoreContractTests(makeStore)` lands with Phase 1.2 alongside the +`RunStore` interface. Adding it now would be dead scaffolding. + +## Adding a helper + +Promote a helper to `tools/test/` only when a third package needs the same +idiom. Duplication of a 30-line scripted helper across two packages is fine; +duplicating across three is the trigger for promotion. diff --git a/tools/test/fixtures.ts b/tools/test/fixtures.ts new file mode 100644 index 0000000..003fa2b --- /dev/null +++ b/tools/test/fixtures.ts @@ -0,0 +1,40 @@ +import type { AssistantMessage, ModelDescriptor, Usage } from 'agentic-kit'; + +const ZERO_USAGE: Usage = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, +}; + +export function makeFakeModel(overrides: Partial = {}): ModelDescriptor { + return { + id: 'demo', + name: 'Demo', + api: 'fake-api', + provider: 'fake', + baseUrl: 'http://fake.local', + input: ['text'], + reasoning: false, + tools: true, + ...overrides, + }; +} + +export function makeFakeAssistantMessage( + overrides: Partial = {} +): AssistantMessage { + return { + role: 'assistant', + api: 'fake-api', + provider: 'fake', + model: 'demo', + usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } }, + stopReason: 'stop', + timestamp: Date.now(), + content: [{ type: 'text', text: '' }], + ...overrides, + }; +} diff --git a/tools/test/index.ts b/tools/test/index.ts new file mode 100644 index 0000000..4c8ab2e --- /dev/null +++ b/tools/test/index.ts @@ -0,0 +1,3 @@ +export { makeFakeAssistantMessage, makeFakeModel } from './fixtures'; +export { createScriptedProvider, type ScriptedProviderOptions } from './scripted-provider'; +export { createScriptedSSEResponse, parseSSEStream } from './scripted-sse'; diff --git a/tools/test/scripted-provider.ts b/tools/test/scripted-provider.ts new file mode 100644 index 0000000..da43921 --- /dev/null +++ b/tools/test/scripted-provider.ts @@ -0,0 +1,115 @@ +import { + type AssistantMessage, + type AssistantMessageEvent, + createAssistantMessageEventStream, + type ModelDescriptor, + type ProviderAdapter, +} from 'agentic-kit'; + +import { makeFakeAssistantMessage, makeFakeModel } from './fixtures'; + +export interface ScriptedProviderOptions { + responses: AssistantMessage[]; + delayMs?: number; + api?: string; + provider?: string; +} + +export function createScriptedProvider(opts: ScriptedProviderOptions): ProviderAdapter { + const api = opts.api ?? 'fake-api'; + const provider = opts.provider ?? 'fake'; + let callIndex = 0; + + return { + api, + provider, + createModel: (modelId: string, overrides?: Partial) => + makeFakeModel({ id: modelId, api, provider, ...overrides }), + stream: () => { + const stream = createAssistantMessageEventStream(); + const message = + opts.responses[callIndex++] ?? + makeFakeAssistantMessage({ + api, + provider, + stopReason: 'error', + errorMessage: 'scripted provider: no response queued for this call', + content: [], + }); + + const events = deriveEventSequence(message); + const emit = () => { + for (const event of events) { + stream.push(event); + } + stream.end(message); + }; + + if (opts.delayMs && opts.delayMs > 0) { + setTimeout(emit, opts.delayMs); + } else { + queueMicrotask(emit); + } + + return stream; + }, + }; +} + +function deriveEventSequence(message: AssistantMessage): AssistantMessageEvent[] { + const events: AssistantMessageEvent[] = []; + events.push({ type: 'start', partial: message }); + + for (let i = 0; i < message.content.length; i++) { + const block = message.content[i]; + if (block.type === 'text') { + events.push({ type: 'text_start', contentIndex: i, partial: message }); + if (block.text.length > 0) { + events.push({ + type: 'text_delta', + contentIndex: i, + delta: block.text, + partial: message, + }); + } + events.push({ + type: 'text_end', + contentIndex: i, + content: block.text, + partial: message, + }); + } else if (block.type === 'thinking') { + events.push({ type: 'thinking_start', contentIndex: i, partial: message }); + if (block.thinking.length > 0) { + events.push({ + type: 'thinking_delta', + contentIndex: i, + delta: block.thinking, + partial: message, + }); + } + events.push({ + type: 'thinking_end', + contentIndex: i, + content: block.thinking, + partial: message, + }); + } else if (block.type === 'toolCall') { + events.push({ type: 'toolcall_start', contentIndex: i, partial: message }); + events.push({ + type: 'toolcall_end', + contentIndex: i, + toolCall: block, + partial: message, + }); + } + } + + if (message.stopReason === 'error' || message.stopReason === 'aborted') { + events.push({ type: 'error', reason: message.stopReason, error: message }); + } else { + events.push({ type: 'done', reason: message.stopReason, message }); + } + + return events; +} diff --git a/tools/test/scripted-sse.ts b/tools/test/scripted-sse.ts new file mode 100644 index 0000000..ae7497e --- /dev/null +++ b/tools/test/scripted-sse.ts @@ -0,0 +1,88 @@ +import type { AgentEvent } from '@agentic-kit/agent'; + +export function createScriptedSSEResponse(events: AgentEvent[]): Response { + const encoder = new TextEncoder(); + const body = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`)); + } + controller.close(); + }, + }); + + return new Response(body, { + status: 200, + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }, + }); +} + +export async function* parseSSEStream( + stream: ReadableStream +): AsyncIterable { + const reader = stream.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + buffer = buffer.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + + let blankIdx = buffer.indexOf('\n\n'); + while (blankIdx !== -1) { + const rawEvent = buffer.slice(0, blankIdx); + buffer = buffer.slice(blankIdx + 2); + const event = parseEvent(rawEvent); + if (event) { + yield event; + } + blankIdx = buffer.indexOf('\n\n'); + } + } + } finally { + reader.releaseLock(); + } +} + +function parseEvent(raw: string): AgentEvent | null { + const dataLines: string[] = []; + for (const line of raw.split('\n')) { + if (line === '' || line.startsWith(':')) { + continue; + } + const colon = line.indexOf(':'); + const field = colon === -1 ? line : line.slice(0, colon); + let value = colon === -1 ? '' : line.slice(colon + 1); + if (value.startsWith(' ')) { + value = value.slice(1); + } + if (field === 'data') { + dataLines.push(value); + } + } + + if (dataLines.length === 0) { + return null; + } + + const data = dataLines.join('\n'); + if (data === '[DONE]') { + return null; + } + + try { + return JSON.parse(data) as AgentEvent; + } catch { + return null; + } +} From 0dbeba86e393f2460bcd9ac3f00784128b7c81ae Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 17:55:06 +0800 Subject: [PATCH 02/16] test: refactor unit tests to shared helpers --- packages/agent/__tests__/agent.test.ts | 211 +++--------------- .../agentic-kit/__tests__/adapter.test.ts | 162 +++----------- 2 files changed, 69 insertions(+), 304 deletions(-) diff --git a/packages/agent/__tests__/agent.test.ts b/packages/agent/__tests__/agent.test.ts index aa6681c..c231f46 100644 --- a/packages/agent/__tests__/agent.test.ts +++ b/packages/agent/__tests__/agent.test.ts @@ -4,117 +4,35 @@ import { createAssistantMessageEventStream, type ModelDescriptor, } from 'agentic-kit'; +import { + createScriptedProvider, + makeFakeAssistantMessage, + makeFakeModel, +} from '@test/index'; import { Agent } from '../src'; -function createModel(): ModelDescriptor { - return { - id: 'demo', - name: 'Demo', - api: 'fake', - provider: 'fake', - baseUrl: 'http://fake.local', - input: ['text'], - reasoning: false, - tools: true, - }; -} - describe('@agentic-kit/agent', () => { it('runs a minimal sequential tool loop', async () => { const responses = [ - { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'toolUse' as const, - timestamp: Date.now(), + makeFakeAssistantMessage({ + usage: makeUsage(), + stopReason: 'toolUse', content: [ - { type: 'toolCall' as const, id: 'tool_1', name: 'echo', arguments: { text: 'hello' } }, + { type: 'toolCall', id: 'tool_1', name: 'echo', arguments: { text: 'hello' } }, ], - }, - { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop' as const, - timestamp: Date.now(), - content: [{ type: 'text' as const, text: 'done' }], - }, + }), + makeFakeAssistantMessage({ + usage: makeUsage(), + stopReason: 'stop', + content: [{ type: 'text', text: 'done' }], + }), ]; - let callIndex = 0; - const streamFn = (_model: ModelDescriptor, _context: Context) => { - const stream = createAssistantMessageEventStream(); - const response = responses[callIndex++]; - - queueMicrotask(() => { - stream.push({ type: 'start', partial: response }); - if (response.content[0].type === 'toolCall') { - stream.push({ - type: 'toolcall_start', - contentIndex: 0, - partial: response, - }); - stream.push({ - type: 'toolcall_end', - contentIndex: 0, - toolCall: response.content[0], - partial: response, - }); - } else { - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: response, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'done', - partial: response, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: 'done', - partial: response, - }); - } - stream.push({ - type: 'done', - reason: response.stopReason === 'toolUse' ? 'toolUse' : 'stop', - message: response, - }); - stream.end(response); - }); - - return stream; - }; - + const provider = createScriptedProvider({ responses }); const agent = new Agent({ - initialState: { - model: createModel(), - }, - streamFn, + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, + streamFn: provider.stream, }); agent.setTools([ @@ -155,20 +73,20 @@ describe('@agentic-kit/agent', () => { it('turns tool argument validation failures into error tool results and continues', async () => { const responses = [ - createAssistantResponse({ + makeFakeAssistantMessage({ stopReason: 'toolUse', content: [{ type: 'toolCall', id: 'tool_1', name: 'echo', arguments: {} }], }), - createAssistantResponse({ + makeFakeAssistantMessage({ stopReason: 'stop', content: [{ type: 'text', text: 'recovered' }], }), ]; - let callIndex = 0; + const provider = createScriptedProvider({ responses }); const agent = new Agent({ - initialState: { model: createModel() }, - streamFn: () => streamMessage(responses[callIndex++]), + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, + streamFn: provider.stream, }); const execute = jest.fn(async () => ({ @@ -211,10 +129,10 @@ describe('@agentic-kit/agent', () => { it('records aborted assistant turns when the active stream is cancelled', async () => { const agent = new Agent({ - initialState: { model: createModel() }, + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, streamFn: (_model: ModelDescriptor, _context: Context, options) => { const stream = createAssistantMessageEventStream(); - const partial = createAssistantResponse({ + const partial = makeFakeAssistantMessage({ stopReason: 'stop', content: [{ type: 'text', text: '' }], }); @@ -225,7 +143,7 @@ describe('@agentic-kit/agent', () => { options?.signal?.addEventListener( 'abort', () => { - const aborted = createAssistantResponse({ + const aborted: AssistantMessage = makeFakeAssistantMessage({ stopReason: 'aborted', errorMessage: 'aborted by test', content: [], @@ -256,76 +174,13 @@ describe('@agentic-kit/agent', () => { }); }); -function createAssistantResponse(overrides: Partial): AssistantMessage { +function makeUsage() { return { - ...createAssistantResponseBase(), - ...overrides, + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } - -function createAssistantResponseBase(): AssistantMessage { - return { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop' as const, - timestamp: Date.now(), - content: [] as AssistantMessage['content'], - }; -} - -function streamMessage(message: AssistantMessage) { - const stream = createAssistantMessageEventStream(); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: message }); - if (message.content[0]?.type === 'toolCall') { - stream.push({ - type: 'toolcall_start', - contentIndex: 0, - partial: message, - }); - stream.push({ - type: 'toolcall_end', - contentIndex: 0, - toolCall: message.content[0], - partial: message, - }); - } else { - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: message, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: message.content[0]?.type === 'text' ? message.content[0].text : '', - partial: message, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: message.content[0]?.type === 'text' ? message.content[0].text : '', - partial: message, - }); - } - stream.push({ - type: 'done', - reason: message.stopReason === 'toolUse' ? 'toolUse' : 'stop', - message, - }); - stream.end(message); - }); - - return stream; -} diff --git a/packages/agentic-kit/__tests__/adapter.test.ts b/packages/agentic-kit/__tests__/adapter.test.ts index b186f64..60dccdd 100644 --- a/packages/agentic-kit/__tests__/adapter.test.ts +++ b/packages/agentic-kit/__tests__/adapter.test.ts @@ -1,47 +1,28 @@ +import { + createScriptedProvider, + makeFakeAssistantMessage, + makeFakeModel, +} from '@test/index'; + import { AgentKit, type AssistantMessage, - createAssistantMessageEventStream, getMessageText, type ModelDescriptor, - type ProviderAdapter, transformMessages, } from '../src'; function createFakeModel(): ModelDescriptor { - return { - id: 'demo', - name: 'Demo', - api: 'fake-api', - provider: 'fake', - baseUrl: 'http://fake.local', - input: ['text'], - reasoning: false, - tools: true, - }; + return makeFakeModel({ name: 'Demo' }); } function createAssistantMessage( overrides: Partial = {} ): AssistantMessage { - return { - role: 'assistant', - api: 'fake-api', - provider: 'fake', - model: 'demo', - usage: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 0, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop', - timestamp: Date.now(), + return makeFakeAssistantMessage({ content: [{ type: 'text', text: 'hello world' }], ...overrides, - }; + }); } describe('agentic-kit core', () => { @@ -236,43 +217,11 @@ describe('agentic-kit core', () => { }); it('keeps the legacy AgentKit generate API working through structured streams', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const message = createAssistantMessage(); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: { ...message, content: [{ type: 'text', text: '' }] } }); - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: { ...message, content: [{ type: 'text', text: '' }] }, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'hello world', - partial: message, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: 'hello world', - partial: message, - }); - stream.push({ type: 'done', reason: 'stop', message }); - stream.end(message); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [createAssistantMessage(), createAssistantMessage()], + }) + ); const chunks: string[] = []; await kit.generate( { model: 'demo', prompt: 'hi', stream: true }, @@ -284,29 +233,17 @@ describe('agentic-kit core', () => { }); it('rejects legacy generate when a provider returns a terminal error in non-stream mode', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const failure = createAssistantMessage({ - stopReason: 'error', - errorMessage: 'provider failed', - content: [{ type: 'text', text: '' }], - }); - - queueMicrotask(() => { - stream.push({ type: 'error', reason: 'error', error: failure }); - stream.end(failure); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [ + createAssistantMessage({ + stopReason: 'error', + errorMessage: 'provider failed', + content: [{ type: 'text', text: '' }], + }), + ], + }) + ); const onComplete = jest.fn(); const onError = jest.fn(); const onStateChange = jest.fn(); @@ -324,44 +261,17 @@ describe('agentic-kit core', () => { }); it('rejects legacy generate when a provider returns a terminal error in stream mode', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const partial = createAssistantMessage({ - content: [{ type: 'text', text: 'partial' }], - }); - const failure = createAssistantMessage({ - stopReason: 'error', - errorMessage: 'provider failed', - content: [{ type: 'text', text: 'partial' }], - }); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: { ...partial, content: [{ type: 'text', text: '' }] } }); - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: { ...partial, content: [{ type: 'text', text: '' }] }, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'partial', - partial, - }); - stream.push({ type: 'error', reason: 'error', error: failure }); - stream.end(failure); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [ + createAssistantMessage({ + stopReason: 'error', + errorMessage: 'provider failed', + content: [{ type: 'text', text: 'partial' }], + }), + ], + }) + ); const chunks: string[] = []; const onComplete = jest.fn(); const onError = jest.fn(); From a1fae59755ff6698b45b6528b0c84b5154d94d45 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 17:55:06 +0800 Subject: [PATCH 03/16] test(agent): add sse parser tests --- packages/agent/__tests__/sse.test.ts | 114 +++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 packages/agent/__tests__/sse.test.ts diff --git a/packages/agent/__tests__/sse.test.ts b/packages/agent/__tests__/sse.test.ts new file mode 100644 index 0000000..682e3dd --- /dev/null +++ b/packages/agent/__tests__/sse.test.ts @@ -0,0 +1,114 @@ +// Exercises the `parseSSEStream` helper from tools/test/, not a production +// parser. The kit ships no SSE parser today; consumers parse on their side. +// These tests pin down the helper's edge-case behavior so future parser work +// has a baseline to match. +import { parseSSEStream } from '@test/index'; + +import type { AgentEvent } from '../src'; + +const encoder = new TextEncoder(); + +function streamFromChunks(chunks: string[]): ReadableStream { + return new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk)); + } + controller.close(); + }, + }); +} + +async function collect(stream: ReadableStream): Promise { + const out: AgentEvent[] = []; + for await (const event of parseSSEStream(stream)) { + out.push(event); + } + return out; +} + +describe('parseSSEStream', () => { + it('parses a single complete event', async () => { + const events = await collect(streamFromChunks(['data: {"type":"agent_start"}\n\n'])); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('reassembles a payload split across chunks', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agen', 't_start"}\n', '\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('joins multiple data: lines with newlines into a single payload', async () => { + const events = await collect( + streamFromChunks(['data: {"type":\ndata: "agent_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('ignores comment lines starting with `:`', async () => { + const events = await collect( + streamFromChunks([': keepalive\ndata: {"type":"turn_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'turn_start' }]); + }); + + it('ignores event:, id:, and retry: framing fields', async () => { + const events = await collect( + streamFromChunks([ + 'event: turn_start\nid: 1\nretry: 1000\ndata: {"type":"turn_start"}\n\n', + ]) + ); + expect(events).toEqual([{ type: 'turn_start' }]); + }); + + it('skips a [DONE] marker without yielding an event', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\ndata: [DONE]\n\n', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('handles trailing newlines without emitting a spurious event', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agent_start"}\n\n\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('handles CRLF line endings', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agent_start"}\r\n\r\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('drops a final incomplete event when the stream ends mid-event', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\n', + 'data: {"type":"turn_start"}', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('yields multiple complete events in order', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\ndata: {"type":"turn_start"}\n\n', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }, { type: 'turn_start' }]); + }); + + it('honors an optional space after the `data:` field name', async () => { + const events = await collect( + streamFromChunks(['data:{"type":"agent_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); +}); From 536d30eefd04e1454a1f9bd426731ea150131291 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 17:55:07 +0800 Subject: [PATCH 04/16] docs: add roadmap --- ROADMAP.md | 588 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 588 insertions(+) create mode 100644 ROADMAP.md diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..adbf0f4 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,588 @@ +# Agentic Kit Roadmap + +This document plans the next phases of work for `agentic-kit`. It supersedes +neither `REDESIGN_DECISIONS.md` nor `README.md` — those describe what exists. +This describes what will exist next, why, and what is explicitly out of scope. + +## Current State (snapshot) + +| Package | Status | +|---|---| +| `agentic-kit` | Core portability layer. Streaming, message model, providers registry, cross-provider transforms, usage/cost. | +| `@agentic-kit/agent` | Sequential agent loop. Tool execution, lifecycle events, abort/continue, JSON Schema validation. | +| `@agentic-kit/anthropic` | Provider adapter. Streaming, thinking, tool calls, multimodal, abort. | +| `@agentic-kit/openai` | Provider adapter. Streaming, reasoning, tool calls, multimodal, abort. OpenAI-compatible endpoints. | +| `@agentic-kit/ollama` | Provider adapter. Local inference, embeddings. **Tool execution in streaming is a stub.** | + +The agent loop today runs to completion in-process: it does not pause for +out-of-band input and has no transport layer above it. Consumers wire it into +their own HTTP layer and supply their own React bindings. + +## Design Principles (carried forward) + +- Provider-agnostic core; OpenAI-compatible is a compatibility class, not a brand. +- No schema-library coupling at the core (JSON Schema only). +- Normalize provider differences inward; do not leak them. +- Runtime-agnostic; consume standard Web platform primitives (`Response`, + `ReadableStream`, `AbortSignal`, `fetch`). +- Headless. The kit ships no opinionated UI. +- Composable. Core stays minimal; extensions are opt-in packages. +- Storage is pluggable. Defaults work for development; production swaps in. + +## Phase 0 — Test Infrastructure (do first) + +Phase 1 cannot land cleanly without a small set of shared test helpers. Build +these first; everything afterward inherits the same testing idiom. + +### 0.1 Test Conventions + +Three rules the kit follows: + +1. **Deterministic by default.** Every package's default `pnpm test` runs only + unit tests against scripted mocks. No network, no API keys, no flakes. +2. **Live tests are gated and opt-in.** Files named `*.live.test.ts` and + workspace scripts like `test:live:*` exist for exercising real provider + APIs. Never required in CI by default. +3. **One environment per package.** Most packages run `testEnvironment: 'node'`. + The single exception is `@agentic-kit/react`, which runs `jsdom`. There is + no workspace-wide jsdom; the asymmetry is intentional. + +### 0.2 Shared Test Helpers (repo-internal, not a package) + +The kit needs a small set of reusable test helpers — scripted providers, SSE +stubs, parsers, contract suites. These live as a **repo-internal directory**, +not a published package and not a workspace package. + +Layout: `tools/test/` at the repo root, plain `.ts` files, imported via a +tsconfig `paths` alias (e.g., `@test/scripted-provider`) from each package's +test config. No `package.json`, no version, no public API surface, no +publishing concerns. + +Why not a package: +- Dev-only code in a `"private": true` workspace package is a publishing + ceremony with no upside; the alternative is a directory. +- Promotes test code to a load-bearing public API the moment a consumer + installs it. +- Reference: AI SDK keeps its test helpers in-package, not as a separate + workspace package. + +Helpers live wherever they are simplest to maintain: shared idioms in +`tools/test/`, package-specific helpers co-located in that package's +`__tests__/`. Duplication of a 30-line scripted provider across packages is +acceptable; promotion to `tools/test/` happens when a third package needs the +same helper. + +```ts +// scripted mock provider — replaces inline streamFn boilerplate +function createScriptedProvider(opts: { + responses: AssistantMessageResponse[] + delayMs?: number +}): ProviderAdapter + +// SSE response stub for serialization tests and useChat fetch mocks +function createScriptedSSEResponse(events: AgentEvent[]): Response + +// SSE parser for assertions on emitted bytes +function parseSSEStream(stream: ReadableStream): AsyncIterable + +// portable contract suite for any RunStore implementation +function runRunStoreContractTests(makeStore: () => RunStore | Promise): void + +// small fixtures +function makeFakeModel(overrides?: Partial): ModelDescriptor +``` + +Existing tests that inline-construct a scripted provider migrate to use the +helper as part of this phase. No behavior change; cleanup only. + +If a consumer application later wants to write provider-mocking tests of its +own, it copies the relevant helper (each is small) rather than installing a +dep. That is intentional. + +### 0.3 Integration Test Lane + +A workspace-level `pnpm test:integration` script. Brings up +`http.createServer` in-process, runs `agent.start(...).toResponse()` against +it, exercises pause/resume across a real HTTP boundary via `fetch`. Mock +providers, real HTTP, real serialization. Catches wire-format and abort +regressions that pure unit tests miss. + +Optional in Phase 1 PRs; required for any 1.0 release of `@agentic-kit/agent`'s +new pause/resume APIs. + +### 0.4 SSE Wire-Format Tests + +A dedicated `__tests__/sse.test.ts` in `@agentic-kit/agent` covers parser +edge cases: chunks split mid-event, multi-line `data:` lines, comment lines, +event-type framing, trailing newlines, mid-event abort. Easy to under-test, +easy to break silently. Hand-crafted byte sequences only; no provider in +the loop. + +--- + +## Phase 1 — Pause/Resume + React Bindings (must) + +The single architectural change behind Phase 1: the agent loop becomes +**checkpoint-able**. Tools may declare a `decision` schema; when the loop hits +such a tool, it persists run state, emits a structured event, and waits for a +matching decision payload before continuing. Everything else in Phase 1 follows +from this. + +### 1.1 Pausable Tools + +#### Problem + +Many real agent flows need structured input from outside the loop before a +tool can be considered safe or actionable: human approval on destructive +operations, multi-choice routing on a generated proposal, signed authorization, +delayed completion of a long-running external job. Today the loop has no way +to express this — tools must either run unconditionally or be elided. + +#### Design + +Extend `AgentTool` with an optional `decision` JSON Schema. The agent loop: + +1. When the LLM emits a call to a tool that declares `decision`: + - Validate the LLM's input against `parameters` as today. + - Emit a `tool_decision_pending` event with the input and the schema. + - Persist the run via the configured `RunStore` (see 1.2). + - Halt the loop and return. +2. The host invokes `agent.resume(runId, decision)`: + - Load run state from the `RunStore`. + - Validate `decision` against the tool's `decision` schema. + - Call `tool.execute(input, decision, ctx)`. + - Continue the loop with the result. + +Tools without a `decision` schema run as today — synchronously inside the loop. + +#### API + +```ts +interface AgentTool extends ToolDefinition { + label: string + decision?: JsonSchema // optional; declares structured outside-input + execute( + toolCallId: string, + input: Record, + decision: unknown, // undefined for non-pausable tools + signal?: AbortSignal, + onUpdate?: (partial: AgentToolResult) => void, + ): Promise +} + +class Agent { + // existing + prompt(input: string | Message): Promise + abort(): void + + // new + resume(runId: string, decision: unknown): Promise +} + +type AgentEvent = + // ... existing events + | { type: 'tool_decision_pending' + runId: string + toolCallId: string + toolName: string + input: Record + schema: JsonSchema } +``` + +A pausable tool with no `decision` is invalid — the field's presence is the +mechanism. Validation runs before `execute` is called; a malformed decision +rejects with a typed error and does not consume the run. + +#### Naming + +The field is named **`decision`** because the dominant case is a user or +upstream system choosing how the tool should proceed. The variable inside +`execute` is also `decision`; React surfaces it as `respondWithDecision`. If +later phases introduce a categorically different out-of-band input (e.g., raw +results from a client-executed tool), it gets a sibling field with its own +shape — the kit does not over-generalize now. + +#### Testing + +Unit tests in `@agentic-kit/agent`. Uses `createScriptedProvider` from 0.2. + +- Scripted provider emits a tool call to a `decision`-bearing tool. Assert: + `tool_decision_pending` event emitted, `runStore.save` called, loop halted. +- `agent.resume(runId, valid)` with a fresh scripted response. Assert: + `tool.execute` invoked with the decision argument, loop continues, final + event emitted. +- Resume with a decision that fails schema validation. Assert: typed + validation error, run not consumed, retry permitted. +- Resume with non-existent `runId`. Assert: typed `RunNotFound` error. +- `agent.abort()` while paused. Assert: clean cancellation, run cleaned up. +- Tool without `decision` still runs synchronously (regression guard). + +### 1.2 RunStore + +#### Problem + +Pause/resume across HTTP requests requires the loop's state to survive between +the pause and the resume call. The kit must define where that state lives +without forcing a specific backend on consumers. + +#### Design + +A small interface plus a default implementation. The kit owns the schema of +what gets persisted (the run record); the consumer owns where it lands. + +```ts +interface AgentRun { + id: string + model: string + systemPrompt?: string + tools: ToolDefinition[] + messages: Message[] + pending?: { + toolCallId: string + toolName: string + input: Record + } + createdAt: number + updatedAt: number +} + +interface RunStore { + save(run: AgentRun): Promise + load(id: string): Promise + delete(id: string): Promise +} + +class MemoryRunStore implements RunStore { /* default, ephemeral */ } +``` + +`@agentic-kit/agent` ships `MemoryRunStore` for development and single-process +deployments. Production users supply a Redis-, KV-, or DB-backed implementation. +The kit ships no production backend. + +The kit deliberately does **not** persist final conversation history. That is a +consumer concern. See 1.4 for lifecycle hooks. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- `MemoryRunStore`: save → load round-trip; `load` of missing id returns + `undefined`; `delete` is idempotent; `delete` then `load` returns `undefined`. +- `runRunStoreContractTests(makeMemoryStore)` from 0.2 runs the portable + contract suite against `MemoryRunStore`. The same export is consumed by + any third-party `RunStore` implementation. +- Concurrent save/load on the same id (last write wins, no torn reads). + +### 1.3 Run Serialization Helpers + +#### Problem + +The agent emits a stream of typed events. To use it across an HTTP boundary — +or any boundary that requires bytes — the consumer needs to serialize. The +kit should ship the canonical form so consumers do not reinvent it. + +#### Design + +Standard Web primitives only. No framework helpers. The agent run object +exposes both pull-based and push-based access. + +```ts +interface AgentRunHandle { + events(): AsyncIterable + toReadableStream(): ReadableStream + toResponse(init?: ResponseInit): Response // SSE-shaped body +} + +const handle = agent.start({ messages, ... }) +return handle.toResponse() +``` + +`toResponse` returns a `Response` with `Content-Type: text/event-stream`, each +`AgentEvent` serialized as one SSE frame. Compatible with any runtime that +speaks standard `Response` and `ReadableStream`: Next.js App Router, Hono, +Bun, Deno, Cloudflare Workers, raw Node 18+. + +A symmetric pair handles resume: + +```ts +const handle = agent.resumeRun({ runId, decision, runStore }) +return handle.toResponse() +``` + +The wire format is the kit's `AgentEvent` discriminated union, serialized as +JSON in SSE `data:` lines. No translation to any third-party protocol; if a +consumer wants to bridge to one, they write the bridge. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- `events()`: scripted provider events come out of the async iterable in + emission order with correct shapes. +- `toReadableStream()`: bytes parsed back via `parseSSEStream` (from 0.2) + reproduce the original event sequence. +- `toResponse()`: assert `Content-Type: text/event-stream`, no caching headers, + body parses as above. +- Wire-format edge cases live in `__tests__/sse.test.ts` (0.4): split chunks, + multi-line `data:`, comments, trailing newlines, mid-event abort. +- Backpressure: stream consumer pauses; producer respects it (no unbounded + buffer). + +### 1.4 `@agentic-kit/react` + +#### Problem + +The dominant consumer surface is browser UIs that stream from an agent endpoint. +A canonical React hook avoids every consumer reimplementing the same fetch / +parse / state-update / abort / resume loop. + +#### Design + +One hook. Headless — returns state and actions; renders nothing. Persistence +is delegated to the consumer via lifecycle callbacks. + +```ts +import { useChat } from '@agentic-kit/react' + +const chat = useChat({ + api: '/api/chat', + body: () => ({ /* extra request body fields */ }), + initialMessages: storedMessages, + onMessage: (m) => {}, // streaming partial state + onFinish: (m) => {}, // turn complete; consumer may persist + onDecisionPending: (event) => {}, // tool paused; consumer renders UI +}) + +chat.send('hello') +chat.respondWithDecision(value) // delivers decision to /resume +chat.abort() +chat.messages // Message[] +chat.isStreaming // boolean +chat.pendingDecision // event | undefined +chat.error // unknown | undefined +``` + +Behaviors the hook is responsible for: + +- POSTing to `api` with `messages` plus any consumer-supplied body fields. +- Parsing the SSE response into `AgentEvent`s and folding them into `messages`. +- Emitting `onMessage` per partial update, `onFinish` per turn end. +- Surfacing `tool_decision_pending` events as `chat.pendingDecision` and via + `onDecisionPending`. +- Rebroadcasting `respondWithDecision(value)` as a POST to `/resume` (path + configurable) with `{ runId, decision }`, and resuming stream consumption + from the response. +- Plumbing an `AbortSignal` through `chat.abort()`. + +The hook does not own persistence, modes, system prompts, or any UI shape. + +#### Testing + +The only package using `testEnvironment: 'jsdom'`. Adds devDeps: +`jest-environment-jsdom`, `@testing-library/react`, `react`, `react-dom`. Adds +peerDeps: `react`, `react-dom`. `globalThis.fetch` is stubbed per-test to +return `createScriptedSSEResponse(events)` from 0.2. + +- Send → stream → finish: messages assemble in order; `isStreaming` transitions; + `onMessage` and `onFinish` fire with correct payloads. +- `body()` callback's fields appear in the POST body. +- `chat.abort()` reaches the fetch mock's `AbortSignal`; state cleans up; no + late updates after abort. +- Decision-pending: `onDecisionPending` fires; `chat.pendingDecision` set; + `respondWithDecision(value)` POSTs to `/resume` with `{ runId, decision }`; + the resumed stream folds into `messages`. +- Network error / non-200 response: `chat.error` set; `messages` not corrupted. +- Malformed SSE bytes: hook surfaces an error rather than crashing. +- `initialMessages` hydrates state on mount. + +--- + +## Phase 2 — Production Polish (should) + +### 2.1 Prompt Caching API + +The kit currently reads `cacheRead` and `cacheWrite` from `Usage` but exposes +no API to *set* cache control on outgoing messages. Both Anthropic and OpenAI +(via Anthropic-compatible providers and recent OpenAI features) support +prompt caching, and the cost savings are material at scale. + +Design sketch: add an optional `cache?: 'short' | 'long'` flag at the message +level (or at content-block level). Each provider adapter translates to its +native control mechanism (Anthropic `cache_control: { type: 'ephemeral' }`, +OpenAI cache strategy hints). The flag is advisory; providers without support +ignore it. + +#### Testing + +Unit tests per provider adapter, matching the existing +`anthropic.test.ts` / `openai.test.ts` idiom. + +- Mock HTTP intercepts the outgoing request body and headers. +- Build a `Context` whose messages carry `cache: 'short' | 'long'`. +- Anthropic: assert `cache_control: { type: 'ephemeral' }` on flagged blocks. +- OpenAI: assert the corresponding native cache hint. +- Ollama and other no-support providers: assert the flag is silently ignored, + no error. +- `Usage.cacheRead` / `cacheWrite` are populated correctly on the assistant + response (existing usage assertion pattern). + +### 2.2 Telemetry / Middleware Hooks + +The agent loop today has no insertion points for observability or +interception. Production consumers need at minimum: + +- A `before/after` provider call hook (latency, errors, token counts). +- A `before/after` tool call hook (arguments, results, durations). +- Stream event tap (without buffering the stream). + +Design as middleware composition over the run, akin to a small async +interceptor chain. Standard error type for transient vs. terminal failures +to support upstream retry logic. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- Register middleware, run a scripted loop, assert hook invocation order and + arguments (provider request, response, tool call, tool result). +- Multiple middlewares compose left-to-right with predictable ordering. +- A throwing middleware does not crash the loop; the error surfaces via the + defined channel. +- `before/after` pairs see matching correlation IDs (request ↔ response). +- Stream-event tap does not buffer or reorder events. + +--- + +## Phase 3 — Optional Extensions (could) + +### 3.1 Full Ollama Tool Support + +The Ollama adapter currently does not parse tool calls in streaming responses. +Bring it to feature parity with the Anthropic and OpenAI adapters: tool call +deltas, tool result round-trips, and live tests covering the full loop. + +#### Testing + +- Unit: parse scripted Ollama NDJSON tool-call chunks; assert the canonical + `AssistantMessageEvent` sequence is emitted. +- Live (gated, in `ollama.live.test.ts`): tool-using smoke test against a + known-good local Ollama model. Skipped when `OLLAMA_LIVE_MODEL` is unset. + +### 3.2 Retry / Backoff + +A small built-in retry policy for transient provider failures (HTTP 408, 425, +429, 500, 502, 503, 504; aborted-not-by-user network errors). Configurable +attempt count, jittered exponential backoff. Disabled by default — consumers +opt in. Layered above provider adapters, below the agent loop. + +#### Testing + +Unit tests using an injectable clock. + +- Provider mock returns scripted transient errors then success; assert retry + count and final outcome. +- Backoff timings match the configured curve (use a fake clock; never sleep + for real in tests). +- Non-retriable errors (400, 401, 403) fail immediately; no retries attempted. +- Abort during a retry wait cancels promptly; no further attempts. +- Retries respect a global deadline; total time bounded. + +### 3.3 Stream Resume on Disconnect + +If the agent loop is mid-run when the SSE connection drops, the client should +be able to reconnect with the run ID and pick up where it left off. The +machinery is largely a free side-effect of `RunStore` — the run survives; +only stream-position tracking and an event replay endpoint are new. Useful +for flaky-network and long-running flows. + +#### Testing + +- Unit: abort an in-flight `events()` iterator. Reload the run by id and call + `resumeRun`. Assert: events continue from the last-emitted checkpoint, no + duplicate side effects. +- Integration (lane from 0.3): same flow over real HTTP — drop the connection + mid-stream, reconnect with `runId`, assert event continuity and correct + `Last-Event-ID` semantics. + +### 3.4 Client-Side Tool Execution + +For tools that genuinely require browser-only capabilities (DOM access, +WebRTC, File System Access API, locally-running services, hardware bridges, +wallet signing), introduce a `runs: 'client'` flag. The mechanism reuses the +pause/resume rails: such tools emit a `tool_client_execute_pending` event, +the browser-side dispatcher runs the registered local executor, and the +result returns via the same resume endpoint shape. + +This is deferred until a real use case appears. Most agent applications do +not need it, and shipping it prematurely would constrain the design. + +#### Testing + +- Unit (in `@agentic-kit/agent`): protocol layer only. Scripted provider + emits a `runs: 'client'` tool call. Assert: `tool_client_execute_pending` + event fires, loop halts. `agent.resume(runId, { result })` continues with + the supplied result as the tool result. +- Unit (in `@agentic-kit/react`, jsdom): client dispatcher. Register a local + executor, fire a synthetic pending event, assert: executor runs with the + tool input, resulting POST to `/resume` includes the correct payload, the + resumed stream folds into `messages`. + +--- + +## Non-Goals + +The kit will not ship the following. They belong in consumer applications, +companion packages, or other ecosystems entirely. + +- **Conversation history persistence.** Lifecycle hooks expose what is needed; + storage is consumer-owned. Browser, server, sync model — none of it is the + kit's call. +- **Structured output / `generateObject` analog.** Tool calls already provide + typed structured outputs via JSON Schema. A second mechanism is redundant. +- **Schema library coupling.** No `@agentic-kit/zod`, no `@agentic-kit/typebox`. + Consumers convert their schema library of choice to JSON Schema at the + boundary; this is a one-line operation for every popular library. +- **Framework-specific helpers.** No Next.js, Hono, Express, Fastify packages. + Standard `Response` and `ReadableStream` cover all of them. +- **UI rendering / component library.** The kit is headless. React hook + exposes state and actions; consumers render however they want. +- **Embeddings as a primary capability.** Per `REDESIGN_DECISIONS.md` #14, + embeddings live behind an optional capability interface or companion + package, not in the conversational core. +- **System prompt construction utilities.** Prompt design is consumer-owned. +- **Conversation modes / agent personas.** Application concern. +- **Built-in production storage backends.** `MemoryRunStore` is the only + implementation the kit ships; Redis, KV, DB backends are for consumers. + +## Package Layout After Phase 1 + +| Package | Change | +|---|---| +| `agentic-kit` | unchanged | +| `@agentic-kit/agent` | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) | +| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | +| `@agentic-kit/react` | **new** — `useChat` hook | + +Shared test helpers live in `tools/test/` (repo-internal directory, not a +package). Phase 2 and 3 add no new packages; everything extends in place. + +## Open Questions + +- **Run record schema versioning.** Once `RunStore` is shipped, the on-disk + `AgentRun` shape becomes a compatibility surface. Decide on an explicit + version field and migration story before 1.0. +- **Decision schema validator scope.** The agent already validates tool inputs + against JSON Schema (`packages/agent/src/validation.ts`). The decision + validator should reuse that same code path. Confirm coverage of the + features needed (discriminated unions in particular). +- **SSE vs. NDJSON.** SSE is the proposed default. NDJSON is simpler but lacks + reconnection semantics and event-type framing. Revisit if real-world + consumers report SSE problems behind specific proxies. +- **`onDecisionPending` ergonomics.** Whether the React hook should auto-route + the next stream from `/resume` or require the consumer to call a follow-up + method explicitly. Default to auto for ergonomics; expose an opt-out. +- **Live test policy for paid providers.** Anthropic/OpenAI live tests would + burn API credits. Default position: gated `*.live.test.ts` files with + env-var keys, manually triggered, never required by per-PR CI. From 92f701b7f556ed7a5898d6269ae0d1c97e72fc6c Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 18:30:11 +0800 Subject: [PATCH 05/16] feat(agent): add pausable tools and run store --- packages/agent/__tests__/agent.test.ts | 212 ++++++++++++++- packages/agent/src/agent.ts | 358 ++++++++++++++++++++----- packages/agent/src/index.ts | 1 + packages/agent/src/run-store.ts | 81 ++++++ packages/agent/src/types.ts | 14 + packages/agent/src/validation.ts | 2 +- 6 files changed, 597 insertions(+), 71 deletions(-) create mode 100644 packages/agent/src/run-store.ts diff --git a/packages/agent/__tests__/agent.test.ts b/packages/agent/__tests__/agent.test.ts index c231f46..d724c8f 100644 --- a/packages/agent/__tests__/agent.test.ts +++ b/packages/agent/__tests__/agent.test.ts @@ -10,7 +10,15 @@ import { makeFakeModel, } from '@test/index'; -import { Agent } from '../src'; +import { + Agent, + type AgentEvent, + type AgentTool, + DecisionValidationError, + MemoryRunStore, + RunNotFoundError, + ToolNotRegisteredError, +} from '../src'; describe('@agentic-kit/agent', () => { it('runs a minimal sequential tool loop', async () => { @@ -47,7 +55,7 @@ describe('@agentic-kit/agent', () => { }, required: ['text'], }, - execute: async (_toolCallId, params) => ({ + execute: async (_toolCallId, params, _decision) => ({ content: [{ type: 'text', text: String(params.text) }], }), }, @@ -184,3 +192,203 @@ function makeUsage() { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } + +describe('@agentic-kit/agent — pausable tools', () => { + function makeApprovalTool(execute: AgentTool['execute']): AgentTool { + return { + name: 'approve', + label: 'Approve', + description: 'Tool that requires explicit approval', + parameters: { + type: 'object', + properties: { target: { type: 'string' } }, + required: ['target'], + }, + decision: { + type: 'object', + properties: { approved: { type: 'boolean' } }, + required: ['approved'], + }, + execute, + }; + } + + function pauseResponse() { + return makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { type: 'toolCall', id: 'tool_1', name: 'approve', arguments: { target: 'thing' } }, + ], + }); + } + + function finalResponse() { + return makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'finalized' }], + }); + } + + it('pauses on a decision-bearing tool, persists the run, and emits tool_decision_pending', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse()] }); + const runStore = new MemoryRunStore(); + const saveSpy = jest.spyOn(runStore, 'save'); + const execute = jest.fn(); + const events: AgentEvent[] = []; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.subscribe((event) => events.push(event)); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + + expect(execute).not.toHaveBeenCalled(); + expect(saveSpy).toHaveBeenCalledTimes(1); + + const pendingEvent = events.find((e) => e.type === 'tool_decision_pending'); + expect(pendingEvent).toMatchObject({ + type: 'tool_decision_pending', + toolCallId: 'tool_1', + toolName: 'approve', + input: { target: 'thing' }, + schema: expect.objectContaining({ type: 'object' }), + }); + + const runId = (pendingEvent as { runId: string }).runId; + expect(runId).toBeTruthy(); + expect(agent.pendingRunId).toBe(runId); + expect(agent.state.isStreaming).toBe(false); + + expect(events.some((e) => e.type === 'agent_end')).toBe(false); + + const stored = await runStore.load(runId); + expect(stored).toMatchObject({ + id: runId, + pending: { toolCallId: 'tool_1', toolName: 'approve', input: { target: 'thing' } }, + }); + expect(stored?.tools[0]).not.toHaveProperty('execute'); + }); + + it('resume invokes execute with the decision argument and continues the loop', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const execute = jest.fn( + async (_id: string, _params: Record, decision: unknown) => ({ + content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }], + }) + ); + const events: AgentEvent[] = []; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.subscribe((event) => events.push(event)); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + expect(runId).toBeTruthy(); + + await agent.resume(runId, { approved: true }); + + expect(execute).toHaveBeenCalledTimes(1); + expect(execute.mock.calls[0]?.[2]).toEqual({ approved: true }); + expect(agent.pendingRunId).toBeUndefined(); + + expect(agent.state.messages.at(-1)).toMatchObject({ + role: 'assistant', + content: [{ type: 'text', text: 'finalized' }], + }); + expect(events.some((e) => e.type === 'agent_end')).toBe(true); + }); + + it('rejects a malformed decision and leaves the run resumable', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const runStore = new MemoryRunStore(); + const execute = jest.fn( + async (_id: string, _params: Record, decision: unknown) => ({ + content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }], + }) + ); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + + await expect(agent.resume(runId, { approved: 'yes' })).rejects.toBeInstanceOf( + DecisionValidationError + ); + expect(execute).not.toHaveBeenCalled(); + expect(agent.pendingRunId).toBe(runId); + expect(await runStore.load(runId)).toBeDefined(); + + await agent.resume(runId, { approved: true }); + + expect(execute).toHaveBeenCalledTimes(1); + expect(agent.pendingRunId).toBeUndefined(); + expect(await runStore.load(runId)).toBeUndefined(); + }); + + it('throws RunNotFoundError when resuming an unknown run', async () => { + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: createScriptedProvider({ responses: [] }).stream, + }); + + await expect(agent.resume('does-not-exist', { approved: true })).rejects.toBeInstanceOf( + RunNotFoundError + ); + }); + + it('cleans up the persisted run when abort() is called while paused', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse()] }); + const runStore = new MemoryRunStore(); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.setTools([makeApprovalTool(jest.fn())]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + expect(await runStore.load(runId)).toBeDefined(); + + agent.abort(); + await new Promise((resolve) => setImmediate(resolve)); + + expect(agent.pendingRunId).toBeUndefined(); + expect(await runStore.load(runId)).toBeUndefined(); + }); + + it('throws ToolNotRegisteredError when resuming after the tool has been removed', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const tool = makeApprovalTool(jest.fn()); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.setTools([tool]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + + agent.setTools([]); + + await expect(agent.resume(runId, { approved: true })).rejects.toBeInstanceOf( + ToolNotRegisteredError + ); + }); +}); diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 8fc8503..a7ff4fd 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -1,3 +1,5 @@ +import { randomUUID } from 'node:crypto'; + import { type AssistantMessage, type Context, @@ -6,8 +8,19 @@ import { type Message, stream, type StreamOptions, + type ToolCallContent, + type ToolDefinition, } from 'agentic-kit'; +import { + type AgentRun, + type AgentRunPending, + DecisionValidationError, + MemoryRunStore, + RunNotFoundError, + type RunStore, + ToolNotRegisteredError, +} from './run-store.js'; import type { AgentEvent, AgentOptions, @@ -15,15 +28,22 @@ import type { AgentTool, AgentToolResult, } from './types.js'; -import { validateToolArguments as defaultValidateToolArguments } from './validation.js'; +import { + validateSchema, + validateToolArguments as defaultValidateToolArguments, +} from './validation.js'; export class Agent { private readonly listeners = new Set<(event: AgentEvent) => void>(); private readonly transformContext?: AgentOptions['transformContext']; private readonly streamFn: NonNullable; private readonly validateToolArguments: NonNullable; + private readonly runStore: RunStore; + private readonly generateRunId: () => string; private abortController?: AbortController; private running?: Promise; + private currentRunId?: string; + private pausedRunId?: string; private _state: AgentState; @@ -40,6 +60,8 @@ export class Agent { this.streamFn = options.streamFn ?? stream; this.transformContext = options.transformContext; this.validateToolArguments = options.validateToolArguments ?? defaultValidateToolArguments; + this.runStore = options.runStore ?? new MemoryRunStore(); + this.generateRunId = options.generateRunId ?? randomUUID; } get state(): AgentState { @@ -88,6 +110,12 @@ export class Agent { } abort(): void { + if (this.pausedRunId) { + const runId = this.pausedRunId; + this.pausedRunId = undefined; + void this.runStore.delete(runId); + return; + } this.abortController?.abort(); } @@ -99,15 +127,21 @@ export class Agent { if (this._state.isStreaming) { throw new Error('Agent is already processing a prompt'); } + if (this.pausedRunId) { + throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); + } const message = typeof input === 'string' ? createUserMessage(input) : input; - await this.runLoop([message]); + await this.runLoop({ runId: this.generateRunId(), initialMessages: [message] }); } async continue(): Promise { if (this._state.isStreaming) { throw new Error('Agent is already processing'); } + if (this.pausedRunId) { + throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); + } const lastMessage = this._state.messages[this._state.messages.length - 1]; if (!lastMessage) { @@ -117,62 +151,131 @@ export class Agent { throw new Error('Cannot continue from message role: assistant'); } - await this.runLoop(); + await this.runLoop({ runId: this.generateRunId() }); + } + + get pendingRunId(): string | undefined { + return this.pausedRunId; } - private async runLoop(initialMessages?: Message[]): Promise { + async resume(runId: string, decision: unknown): Promise { + if (this._state.isStreaming) { + throw new Error('Agent is already processing'); + } + + const run = await this.runStore.load(runId); + if (!run) { + throw new RunNotFoundError(runId); + } + if (!run.pending) { + throw new Error(`Run ${runId} is not paused`); + } + + const tool = this._state.tools.find((t) => t.name === run.pending!.toolName); + if (!tool) { + throw new ToolNotRegisteredError(runId, run.pending.toolName); + } + if (!tool.decision) { + throw new Error( + `Tool '${tool.name}' has no decision schema; cannot resume run ${runId}` + ); + } + + const errors = validateSchema(tool.decision, decision, 'root'); + if (errors.length > 0) { + throw new DecisionValidationError(runId, tool.name, errors); + } + + this._state.model = run.model; + if (run.systemPrompt !== undefined) { + this._state.systemPrompt = run.systemPrompt; + } + this._state.messages = [...run.messages]; + this.pausedRunId = undefined; + + await this.runLoop({ + runId, + resumeDecision: { toolCallId: run.pending.toolCallId, decision }, + }); + } + + private async runLoop(opts: { + runId: string; + initialMessages?: Message[]; + resumeDecision?: { toolCallId: string; decision: unknown }; + }): Promise { this.running = (async () => { this.abortController = new AbortController(); this._state.isStreaming = true; this._state.streamMessage = null; this._state.error = undefined; + this.currentRunId = opts.runId; try { this.emit({ type: 'agent_start' }); - if (initialMessages && initialMessages.length > 0) { - for (const message of initialMessages) { + if (opts.initialMessages && opts.initialMessages.length > 0) { + for (const message of opts.initialMessages) { this.emit({ type: 'message_start', message }); this.appendMessage(message); this.emit({ type: 'message_end', message }); } } - while (true) { - this.emit({ type: 'turn_start' }); - - const assistantMessage = await this.generateAssistantMessage(this.abortController.signal); - this.appendMessage(assistantMessage); - this.emit({ type: 'message_end', message: assistantMessage }); + let resumeDecision = opts.resumeDecision; - if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') { - this._state.error = assistantMessage.errorMessage; - this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); - break; + while (true) { + let assistantMessage: AssistantMessage; + + if (resumeDecision) { + const last = this._state.messages[this._state.messages.length - 1]; + if (!last || last.role !== 'assistant') { + throw new Error('Cannot resume: last message is not an assistant message'); + } + assistantMessage = last; + } else { + this.emit({ type: 'turn_start' }); + assistantMessage = await this.generateAssistantMessage(this.abortController.signal); + this.appendMessage(assistantMessage); + this.emit({ type: 'message_end', message: assistantMessage }); + + if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') { + this._state.error = assistantMessage.errorMessage; + this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); + break; + } } - const toolCalls = assistantMessage.content.filter((block) => block.type === 'toolCall'); + const toolCalls = assistantMessage.content.filter( + (block): block is ToolCallContent => block.type === 'toolCall' + ); if (toolCalls.length === 0) { this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); break; } - const toolResults = await this.executeToolCalls(toolCalls, this.abortController.signal); - for (const toolResult of toolResults) { - this.emit({ type: 'message_start', message: toolResult }); - this.appendMessage(toolResult); - this.emit({ type: 'message_end', message: toolResult }); + const outcome = await this.executeToolCalls( + toolCalls, + this.abortController.signal, + resumeDecision + ); + resumeDecision = undefined; + + if (outcome.status === 'paused') { + return; } - this.emit({ type: 'turn_end', message: assistantMessage, toolResults }); + this.emit({ type: 'turn_end', message: assistantMessage, toolResults: outcome.results }); } this.emit({ type: 'agent_end', messages: [...this._state.messages] }); + await this.runStore.delete(opts.runId); } finally { this._state.isStreaming = false; this._state.streamMessage = null; this.abortController = undefined; this.running = undefined; + this.currentRunId = undefined; } })(); @@ -228,34 +331,143 @@ export class Agent { } private async executeToolCalls( - toolCalls: Array>, - signal: AbortSignal - ) { - const results = []; + toolCalls: ToolCallContent[], + signal: AbortSignal, + resumeDecision?: { toolCallId: string; decision: unknown } + ): Promise< + | { status: 'completed'; results: ReturnType[] } + | { status: 'paused' } + > { + const completedToolCallIds = new Set( + this._state.messages + .filter((m): m is Extract => m.role === 'toolResult') + .map((m) => m.toolCallId) + ); + + const results: ReturnType[] = []; for (const toolCall of toolCalls) { - const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name); - this.emit({ - type: 'tool_execution_start', - toolCallId: toolCall.id, - toolName: toolCall.name, - args: toolCall.arguments as Record, - }); + if (completedToolCallIds.has(toolCall.id)) { + continue; + } - let result: AgentToolResult; - let isError = false; + const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name); + const args = toolCall.arguments as Record; + const isResumeTarget = resumeDecision?.toolCallId === toolCall.id; + + if (tool?.decision && !isResumeTarget) { + let validatedArgs: Record; + try { + validatedArgs = this.validateToolArguments(tool.parameters, args); + } catch (error) { + const result: AgentToolResult = { + content: [ + { + type: 'text', + text: error instanceof Error ? error.message : String(error), + }, + ], + }; + this.emit({ + type: 'tool_execution_start', + toolCallId: toolCall.id, + toolName: toolCall.name, + args, + }); + this.emit({ + type: 'tool_execution_end', + toolCallId: toolCall.id, + toolName: toolCall.name, + result, + isError: true, + }); + const toolResult = createToolResultMessage(toolCall.id, toolCall.name, result.content, true); + this.appendMessageWithEvents(toolResult); + continue; + } - try { - if (!tool) { - throw new Error(`Tool '${toolCall.name}' not found`); + for (const toolResult of results) { + this.appendMessageWithEvents(toolResult); } - const validatedArgs = this.validateToolArguments( - tool.parameters, - toolCall.arguments as Record - ); + const runId = this.currentRunId!; + const pending: AgentRunPending = { + toolCallId: toolCall.id, + toolName: toolCall.name, + input: validatedArgs, + }; + const now = Date.now(); + const run: AgentRun = { + id: runId, + model: this._state.model, + systemPrompt: this._state.systemPrompt, + tools: this._state.tools.map(toToolDefinition), + messages: [...this._state.messages], + pending, + createdAt: now, + updatedAt: now, + }; + await this.runStore.save(run); + this.pausedRunId = runId; + this.emit({ + type: 'tool_decision_pending', + runId, + toolCallId: toolCall.id, + toolName: toolCall.name, + input: validatedArgs, + schema: tool.decision, + }); + return { status: 'paused' }; + } + + const decisionForExecute = isResumeTarget ? resumeDecision!.decision : undefined; + const toolResult = await this.executeOneTool( + tool, + toolCall, + args, + decisionForExecute, + signal + ); + results.push(toolResult); + } + + for (const toolResult of results) { + this.appendMessageWithEvents(toolResult); + } + + return { status: 'completed', results }; + } + + private async executeOneTool( + tool: AgentTool | undefined, + toolCall: ToolCallContent, + args: Record, + decision: unknown, + signal: AbortSignal + ): Promise> { + this.emit({ + type: 'tool_execution_start', + toolCallId: toolCall.id, + toolName: toolCall.name, + args, + }); + + let result: AgentToolResult; + let isError = false; + + try { + if (!tool) { + throw new Error(`Tool '${toolCall.name}' not found`); + } + + const validatedArgs = this.validateToolArguments(tool.parameters, args); - result = await tool.execute(toolCall.id, validatedArgs, signal, (partialResult) => { + result = await tool.execute( + toolCall.id, + validatedArgs, + decision, + signal, + (partialResult) => { this.emit({ type: 'tool_execution_update', toolCallId: toolCall.id, @@ -263,33 +475,35 @@ export class Agent { args: validatedArgs, partialResult, }); - }); - } catch (error) { - result = { - content: [ - { - type: 'text', - text: error instanceof Error ? error.message : String(error), - }, - ], - }; - isError = true; - } - - this.emit({ - type: 'tool_execution_end', - toolCallId: toolCall.id, - toolName: toolCall.name, - result, - isError, - }); - - results.push( - createToolResultMessage(toolCall.id, toolCall.name, result.content, isError) + } ); + } catch (error) { + result = { + content: [ + { + type: 'text', + text: error instanceof Error ? error.message : String(error), + }, + ], + }; + isError = true; } - return results; + this.emit({ + type: 'tool_execution_end', + toolCallId: toolCall.id, + toolName: toolCall.name, + result, + isError, + }); + + return createToolResultMessage(toolCall.id, toolCall.name, result.content, isError); + } + + private appendMessageWithEvents(message: Message): void { + this.emit({ type: 'message_start', message }); + this.appendMessage(message); + this.emit({ type: 'message_end', message }); } private emit(event: AgentEvent): void { @@ -298,3 +512,11 @@ export class Agent { } } } + +function toToolDefinition(tool: AgentTool): ToolDefinition { + return { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }; +} diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index b8b99bb..bad2c86 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -1,3 +1,4 @@ export * from './agent.js'; +export * from './run-store.js'; export * from './types.js'; export * from './validation.js'; diff --git a/packages/agent/src/run-store.ts b/packages/agent/src/run-store.ts new file mode 100644 index 0000000..a658d69 --- /dev/null +++ b/packages/agent/src/run-store.ts @@ -0,0 +1,81 @@ +import type { Message, ModelDescriptor, ToolDefinition } from 'agentic-kit'; + +export interface AgentRunPending { + toolCallId: string; + toolName: string; + input: Record; +} + +export interface AgentRun { + id: string; + model: ModelDescriptor; + systemPrompt?: string; + tools: ToolDefinition[]; + messages: Message[]; + pending?: AgentRunPending; + createdAt: number; + updatedAt: number; +} + +export interface RunStore { + save(run: AgentRun): Promise; + load(id: string): Promise; + delete(id: string): Promise; +} + +export class MemoryRunStore implements RunStore { + private readonly runs = new Map(); + + async save(run: AgentRun): Promise { + this.runs.set(run.id, cloneRun(run)); + } + + async load(id: string): Promise { + const run = this.runs.get(id); + return run ? cloneRun(run) : undefined; + } + + async delete(id: string): Promise { + this.runs.delete(id); + } +} + +export class RunNotFoundError extends Error { + readonly runId: string; + + constructor(runId: string) { + super(`Run not found: ${runId}`); + this.name = 'RunNotFoundError'; + this.runId = runId; + } +} + +export class DecisionValidationError extends Error { + readonly runId: string; + readonly toolName: string; + readonly errors: string[]; + + constructor(runId: string, toolName: string, errors: string[]) { + super(`Decision validation failed for tool '${toolName}':\n${errors.map((e) => `- ${e}`).join('\n')}`); + this.name = 'DecisionValidationError'; + this.runId = runId; + this.toolName = toolName; + this.errors = errors; + } +} + +export class ToolNotRegisteredError extends Error { + readonly runId: string; + readonly toolName: string; + + constructor(runId: string, toolName: string) { + super(`Tool '${toolName}' is not registered on the agent resuming run ${runId}`); + this.name = 'ToolNotRegisteredError'; + this.runId = runId; + this.toolName = toolName; + } +} + +function cloneRun(run: AgentRun): AgentRun { + return JSON.parse(JSON.stringify(run)) as AgentRun; +} diff --git a/packages/agent/src/types.ts b/packages/agent/src/types.ts index 1486987..9e724d9 100644 --- a/packages/agent/src/types.ts +++ b/packages/agent/src/types.ts @@ -10,6 +10,8 @@ import type { ToolResultMessage, } from 'agentic-kit'; +import type { RunStore } from './run-store.js'; + export interface AgentToolResult { content: ToolResultMessage['content']; details?: TDetails; @@ -21,9 +23,11 @@ export type AgentToolUpdateCallback = ( export interface AgentTool extends ToolDefinition { label: string; + decision?: JsonSchema; execute: ( toolCallId: string, params: Record, + decision: unknown, signal?: AbortSignal, onUpdate?: AgentToolUpdateCallback ) => Promise>; @@ -66,6 +70,14 @@ export type AgentEvent = toolName: string; result: AgentToolResult; isError: boolean; + } + | { + type: 'tool_decision_pending'; + runId: string; + toolCallId: string; + toolName: string; + input: Record; + schema: JsonSchema; }; export interface AgentOptions { @@ -80,4 +92,6 @@ export interface AgentOptions { schema: JsonSchema, args: Record ) => Record; + runStore?: RunStore; + generateRunId?: () => string; } diff --git a/packages/agent/src/validation.ts b/packages/agent/src/validation.ts index 51634c7..3fb227e 100644 --- a/packages/agent/src/validation.ts +++ b/packages/agent/src/validation.ts @@ -12,7 +12,7 @@ export function validateToolArguments( throw new Error(`Tool argument validation failed:\n${errors.map((error) => `- ${error}`).join('\n')}`); } -function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] { +export function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] { if (!schema || Object.keys(schema).length === 0) { return []; } From 1adfadb5b9f3e04012e72d08165c5dcc9b03919e Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 27 Apr 2026 18:30:20 +0800 Subject: [PATCH 06/16] docs: update roadmap for phase 1.1 progress --- ROADMAP.md | 56 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index adbf0f4..507b319 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -6,13 +6,13 @@ This describes what will exist next, why, and what is explicitly out of scope. ## Current State (snapshot) -| Package | Status | -|---|---| -| `agentic-kit` | Core portability layer. Streaming, message model, providers registry, cross-provider transforms, usage/cost. | -| `@agentic-kit/agent` | Sequential agent loop. Tool execution, lifecycle events, abort/continue, JSON Schema validation. | -| `@agentic-kit/anthropic` | Provider adapter. Streaming, thinking, tool calls, multimodal, abort. | -| `@agentic-kit/openai` | Provider adapter. Streaming, reasoning, tool calls, multimodal, abort. OpenAI-compatible endpoints. | -| `@agentic-kit/ollama` | Provider adapter. Local inference, embeddings. **Tool execution in streaming is a stub.** | +| Package | Status | +| ------------------------ | ------------------------------------------------------------------------------------------------------------ | +| `agentic-kit` | Core portability layer. Streaming, message model, providers registry, cross-provider transforms, usage/cost. | +| `@agentic-kit/agent` | Sequential agent loop. Tool execution, lifecycle events, abort/continue, JSON Schema validation. | +| `@agentic-kit/anthropic` | Provider adapter. Streaming, thinking, tool calls, multimodal, abort. | +| `@agentic-kit/openai` | Provider adapter. Streaming, reasoning, tool calls, multimodal, abort. OpenAI-compatible endpoints. | +| `@agentic-kit/ollama` | Provider adapter. Local inference, embeddings. **Tool execution in streaming is a stub.** | The agent loop today runs to completion in-process: it does not pause for out-of-band input and has no transport layer above it. Consumers wire it into @@ -272,6 +272,16 @@ Unit tests in `@agentic-kit/agent`. contract suite against `MemoryRunStore`. The same export is consumed by any third-party `RunStore` implementation. - Concurrent save/load on the same id (last write wins, no torn reads). +- Re-pause `createdAt` preservation: a second `save()` of the same run id keeps + the original `createdAt`; only `updatedAt` advances. (1.1 does not yet + enforce this — fold into the contract suite.) +- Abort-during-save race: `agent.abort()` while a `runStore.save()` is + in-flight resolves without orphaning the persisted record or surfacing a + rejected save promise. +- Mixed-batch tool ordering: when an assistant turn contains a regular tool + call followed by a decision-bearing tool whose arguments fail validation, + the persisted `messages` order matches the LLM's tool-call order. (Latent + in 1.1's invalid-args branch; surfaces only via the contract suite.) ### 1.3 Run Serialization Helpers @@ -556,14 +566,14 @@ companion packages, or other ecosystems entirely. ## Package Layout After Phase 1 -| Package | Change | -|---|---| -| `agentic-kit` | unchanged | -| `@agentic-kit/agent` | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) | -| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | -| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | -| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | -| `@agentic-kit/react` | **new** — `useChat` hook | +| Package | Change | +| ------------------------ | ------------------------------------------------------------------------------------------- | +| `agentic-kit` | unchanged | +| `@agentic-kit/agent` | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) | +| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | +| `@agentic-kit/react` | **new** — `useChat` hook | Shared test helpers live in `tools/test/` (repo-internal directory, not a package). Phase 2 and 3 add no new packages; everything extends in place. @@ -573,10 +583,18 @@ package). Phase 2 and 3 add no new packages; everything extends in place. - **Run record schema versioning.** Once `RunStore` is shipped, the on-disk `AgentRun` shape becomes a compatibility surface. Decide on an explicit version field and migration story before 1.0. -- **Decision schema validator scope.** The agent already validates tool inputs - against JSON Schema (`packages/agent/src/validation.ts`). The decision - validator should reuse that same code path. Confirm coverage of the - features needed (discriminated unions in particular). +- **Decision schema validator scope.** Resolved (1.1): the decision validator + reuses `validateSchema` from `packages/agent/src/validation.ts` — same code + path as tool inputs. Discriminated-union and `oneOf` / `anyOf` coverage is + still untested; fold into the 1.2 contract suite. +- **Lifecycle events across pause boundaries.** On resume, `agent_start` + re-fires (each `runLoop` entry is a fresh start) but `turn_start` does not + (the persisted assistant message is reused, not regenerated). This + asymmetry is invisible to a single-prompt consumer but matters for any + listener that tracks turn vs. run lifecycle. Decide before 1.4 whether to + introduce a distinct `agent_resume` event or to redocument `agent_start` + with explicit "loop entry" semantics — the `@agentic-kit/react` hook will + codify whichever choice externally. - **SSE vs. NDJSON.** SSE is the proposed default. NDJSON is simpler but lacks reconnection semantics and event-type framing. Revisit if real-world consumers report SSE problems behind specific proxies. From 08fc6c1dcbaa3633ae284e868dc227a4f9dd82b3 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 16:52:08 +0800 Subject: [PATCH 07/16] feat(agent): message-log pause/resume + run handle --- packages/agent/__tests__/agent.test.ts | 224 +++++++--- packages/agent/__tests__/run-handle.test.ts | 444 ++++++++++++++++++++ packages/agent/src/agent.ts | 267 ++++++------ packages/agent/src/index.ts | 2 +- packages/agent/src/run-handle.ts | 198 +++++++++ packages/agent/src/run-store.ts | 81 ---- packages/agent/src/types.ts | 5 - packages/agent/src/validation.ts | 11 + packages/agentic-kit/src/types.ts | 1 + 9 files changed, 932 insertions(+), 301 deletions(-) create mode 100644 packages/agent/__tests__/run-handle.test.ts create mode 100644 packages/agent/src/run-handle.ts delete mode 100644 packages/agent/src/run-store.ts diff --git a/packages/agent/__tests__/agent.test.ts b/packages/agent/__tests__/agent.test.ts index d724c8f..3f66e71 100644 --- a/packages/agent/__tests__/agent.test.ts +++ b/packages/agent/__tests__/agent.test.ts @@ -3,6 +3,7 @@ import { type Context, createAssistantMessageEventStream, type ModelDescriptor, + type ToolCallContent, } from 'agentic-kit'; import { createScriptedProvider, @@ -15,9 +16,6 @@ import { type AgentEvent, type AgentTool, DecisionValidationError, - MemoryRunStore, - RunNotFoundError, - ToolNotRegisteredError, } from '../src'; describe('@agentic-kit/agent', () => { @@ -229,17 +227,26 @@ describe('@agentic-kit/agent — pausable tools', () => { }); } - it('pauses on a decision-bearing tool, persists the run, and emits tool_decision_pending', async () => { + function attachDecision(agent: Agent, toolCallId: string, decision: unknown): void { + const messages = agent.state.messages; + const last = messages[messages.length - 1] as AssistantMessage; + const updatedContent = last.content.map((block) => + block.type === 'toolCall' && block.id === toolCallId + ? ({ ...block, decision } as ToolCallContent) + : block + ); + const updated: AssistantMessage = { ...last, content: updatedContent }; + agent.replaceMessages([...messages.slice(0, -1), updated]); + } + + it('pauses on a decision-bearing tool and emits tool_decision_pending without runId', async () => { const provider = createScriptedProvider({ responses: [pauseResponse()] }); - const runStore = new MemoryRunStore(); - const saveSpy = jest.spyOn(runStore, 'save'); const execute = jest.fn(); const events: AgentEvent[] = []; const agent = new Agent({ initialState: { model: makeFakeModel() }, streamFn: provider.stream, - runStore, }); agent.subscribe((event) => events.push(event)); agent.setTools([makeApprovalTool(execute)]); @@ -247,33 +254,26 @@ describe('@agentic-kit/agent — pausable tools', () => { await agent.prompt('approve thing'); expect(execute).not.toHaveBeenCalled(); - expect(saveSpy).toHaveBeenCalledTimes(1); + expect(agent.state.isStreaming).toBe(false); + expect(events.some((e) => e.type === 'agent_end')).toBe(false); const pendingEvent = events.find((e) => e.type === 'tool_decision_pending'); - expect(pendingEvent).toMatchObject({ + expect(pendingEvent).toEqual({ type: 'tool_decision_pending', toolCallId: 'tool_1', toolName: 'approve', input: { target: 'thing' }, schema: expect.objectContaining({ type: 'object' }), }); + expect(pendingEvent).not.toHaveProperty('runId'); - const runId = (pendingEvent as { runId: string }).runId; - expect(runId).toBeTruthy(); - expect(agent.pendingRunId).toBe(runId); - expect(agent.state.isStreaming).toBe(false); - - expect(events.some((e) => e.type === 'agent_end')).toBe(false); - - const stored = await runStore.load(runId); - expect(stored).toMatchObject({ - id: runId, - pending: { toolCallId: 'tool_1', toolName: 'approve', input: { target: 'thing' } }, - }); - expect(stored?.tools[0]).not.toHaveProperty('execute'); + const lastMessage = agent.state.messages.at(-1); + expect(lastMessage).toMatchObject({ role: 'assistant', stopReason: 'toolUse' }); + const toolResults = agent.state.messages.filter((m) => m.role === 'toolResult'); + expect(toolResults).toHaveLength(0); }); - it('resume invokes execute with the decision argument and continues the loop', async () => { + it('continue() invokes execute with the decision attached to the tool call and continues the loop', async () => { const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); const execute = jest.fn( async (_id: string, _params: Record, decision: unknown) => ({ @@ -290,14 +290,13 @@ describe('@agentic-kit/agent — pausable tools', () => { agent.setTools([makeApprovalTool(execute)]); await agent.prompt('approve thing'); - const runId = agent.pendingRunId!; - expect(runId).toBeTruthy(); - await agent.resume(runId, { approved: true }); + attachDecision(agent, 'tool_1', { approved: true }); + + await agent.continue(); expect(execute).toHaveBeenCalledTimes(1); expect(execute.mock.calls[0]?.[2]).toEqual({ approved: true }); - expect(agent.pendingRunId).toBeUndefined(); expect(agent.state.messages.at(-1)).toMatchObject({ role: 'assistant', @@ -306,89 +305,178 @@ describe('@agentic-kit/agent — pausable tools', () => { expect(events.some((e) => e.type === 'agent_end')).toBe(true); }); - it('rejects a malformed decision and leaves the run resumable', async () => { + it('continue() throws DecisionValidationError synchronously on a malformed decision', async () => { const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); - const runStore = new MemoryRunStore(); - const execute = jest.fn( - async (_id: string, _params: Record, decision: unknown) => ({ - content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }], - }) - ); + const execute = jest.fn(async () => ({ + content: [{ type: 'text' as const, text: 'ok' }], + })); const agent = new Agent({ initialState: { model: makeFakeModel() }, streamFn: provider.stream, - runStore, }); agent.setTools([makeApprovalTool(execute)]); await agent.prompt('approve thing'); - const runId = agent.pendingRunId!; - await expect(agent.resume(runId, { approved: 'yes' })).rejects.toBeInstanceOf( - DecisionValidationError - ); + attachDecision(agent, 'tool_1', { approved: 'yes' }); + + expect(() => agent.continue()).toThrow(DecisionValidationError); expect(execute).not.toHaveBeenCalled(); - expect(agent.pendingRunId).toBe(runId); - expect(await runStore.load(runId)).toBeDefined(); + const toolResults = agent.state.messages.filter((m) => m.role === 'toolResult'); + expect(toolResults).toHaveLength(0); - await agent.resume(runId, { approved: true }); + attachDecision(agent, 'tool_1', { approved: true }); + await agent.continue(); expect(execute).toHaveBeenCalledTimes(1); - expect(agent.pendingRunId).toBeUndefined(); - expect(await runStore.load(runId)).toBeUndefined(); }); - it('throws RunNotFoundError when resuming an unknown run', async () => { + it('continue() rejects when the trailing assistant has tool calls but no decisions attached', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse()] }); + const agent = new Agent({ initialState: { model: makeFakeModel() }, - streamFn: createScriptedProvider({ responses: [] }).stream, + streamFn: provider.stream, }); + agent.setTools([makeApprovalTool(jest.fn())]); - await expect(agent.resume('does-not-exist', { approved: true })).rejects.toBeInstanceOf( - RunNotFoundError - ); + await agent.prompt('approve thing'); + + expect(() => agent.continue()).toThrow(/no tool calls awaiting a decision/); }); - it('cleans up the persisted run when abort() is called while paused', async () => { + it('abort() while paused stops further work without throwing', async () => { const provider = createScriptedProvider({ responses: [pauseResponse()] }); - const runStore = new MemoryRunStore(); const agent = new Agent({ initialState: { model: makeFakeModel() }, streamFn: provider.stream, - runStore, }); agent.setTools([makeApprovalTool(jest.fn())]); await agent.prompt('approve thing'); - const runId = agent.pendingRunId!; - expect(await runStore.load(runId)).toBeDefined(); - agent.abort(); - await new Promise((resolve) => setImmediate(resolve)); + expect(() => agent.abort()).not.toThrow(); + expect(agent.state.isStreaming).toBe(false); + }); + + it('flushes prior tool results before the args-validation error tool_result on a mixed batch', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { type: 'toolCall', id: 'tool_regular', name: 'echo', arguments: { text: 'first' } }, + { type: 'toolCall', id: 'tool_approve', name: 'approve', arguments: {} }, + ], + }), + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'recovered' }], + }), + ], + }); + + const regularExecute = jest.fn(async () => ({ + content: [{ type: 'text' as const, text: 'first-result' }], + })); + const approveExecute = jest.fn(async () => ({ + content: [{ type: 'text' as const, text: 'should not run' }], + })); - expect(agent.pendingRunId).toBeUndefined(); - expect(await runStore.load(runId)).toBeUndefined(); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.setTools([ + { + name: 'echo', + label: 'Echo', + description: 'Echo text', + parameters: { + type: 'object', + properties: { text: { type: 'string' } }, + required: ['text'], + }, + execute: regularExecute, + }, + makeApprovalTool(approveExecute), + ]); + + await agent.prompt('go'); + + expect(regularExecute).toHaveBeenCalledTimes(1); + expect(approveExecute).not.toHaveBeenCalled(); + + const messages = agent.state.messages; + expect(messages[1]).toMatchObject({ role: 'assistant', stopReason: 'toolUse' }); + expect(messages[2]).toMatchObject({ + role: 'toolResult', + toolCallId: 'tool_regular', + toolName: 'echo', + content: [{ type: 'text', text: 'first-result' }], + }); + expect(messages[3]).toMatchObject({ + role: 'toolResult', + toolCallId: 'tool_approve', + toolName: 'approve', + isError: true, + }); + expect(messages[3].content[0]).toMatchObject({ + type: 'text', + text: expect.stringContaining('Tool argument validation failed'), + }); + expect(messages[4]).toMatchObject({ + role: 'assistant', + content: [{ type: 'text', text: 'recovered' }], + }); }); - it('throws ToolNotRegisteredError when resuming after the tool has been removed', async () => { - const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); - const tool = makeApprovalTool(jest.fn()); + it('regression: a tool without a decision schema runs without pausing', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { type: 'toolCall', id: 'tool_1', name: 'echo', arguments: { text: 'hi' } }, + ], + }), + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'done' }], + }), + ], + }); + const execute = jest.fn(async () => ({ + content: [{ type: 'text' as const, text: 'hi' }], + })); const agent = new Agent({ initialState: { model: makeFakeModel() }, streamFn: provider.stream, }); - agent.setTools([tool]); + agent.setTools([ + { + name: 'echo', + label: 'Echo', + description: 'Echo text', + parameters: { + type: 'object', + properties: { text: { type: 'string' } }, + required: ['text'], + }, + execute, + }, + ]); - await agent.prompt('approve thing'); - const runId = agent.pendingRunId!; + const events: AgentEvent[] = []; + agent.subscribe((e) => events.push(e)); - agent.setTools([]); + await agent.prompt('go'); - await expect(agent.resume(runId, { approved: true })).rejects.toBeInstanceOf( - ToolNotRegisteredError - ); + expect(execute).toHaveBeenCalledTimes(1); + expect(events.some((e) => e.type === 'tool_decision_pending')).toBe(false); + expect(events.some((e) => e.type === 'agent_end')).toBe(true); }); }); diff --git a/packages/agent/__tests__/run-handle.test.ts b/packages/agent/__tests__/run-handle.test.ts new file mode 100644 index 0000000..cb8fe2b --- /dev/null +++ b/packages/agent/__tests__/run-handle.test.ts @@ -0,0 +1,444 @@ +import { + type AssistantMessageEvent, + type Context, + createAssistantMessageEventStream, + type ModelDescriptor, + type StreamOptions, +} from 'agentic-kit'; +import { + createScriptedProvider, + makeFakeAssistantMessage, + makeFakeModel, + parseSSEStream, +} from '@test/index'; + +import { Agent, type AgentEvent, type AgentTool } from '../src'; + +describe('AgentRunHandle', () => { + describe('events()', () => { + it('yields scripted events in emission order with correct shapes', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'hello' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + const handle = agent.prompt('hi'); + const collected: AgentEvent[] = []; + for await (const event of handle.events()) { + collected.push(event); + } + + expect(collected[0]).toEqual({ type: 'agent_start' }); + const types = collected.map((e) => e.type); + expect(types).toContain('message_start'); + expect(types).toContain('turn_start'); + expect(types).toContain('turn_end'); + expect(types[types.length - 1]).toBe('agent_end'); + + const subscribeEvents: AgentEvent[] = []; + const agent2 = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'hello' }], + }), + ], + }).stream, + }); + agent2.subscribe((e) => subscribeEvents.push(e)); + await agent2.prompt('hi'); + expect(collected.map((e) => e.type)).toEqual(subscribeEvents.map((e) => e.type)); + }); + }); + + describe('toReadableStream()', () => { + it('produces a ReadableStream whose events match the subscribe channel exactly', async () => { + const subscribeEvents: AgentEvent[] = []; + const subscribeAgent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'world' }], + }), + ], + }).stream, + }); + subscribeAgent.subscribe((e) => subscribeEvents.push(e)); + await subscribeAgent.prompt('hi'); + + const streamAgent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'world' }], + }), + ], + }).stream, + }); + + const stream = streamAgent.prompt('hi').toReadableStream(); + const reader = stream.getReader(); + const events: AgentEvent[] = []; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + events.push(value); + } + + expect(events.map((e) => e.type)).toEqual(subscribeEvents.map((e) => e.type)); + }); + }); + + describe('toResponse()', () => { + it('sets SSE headers and emits a body parseable by parseSSEStream', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'sse' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + const response = agent.prompt('hi').toResponse(); + expect(response.headers.get('Content-Type')).toBe('text/event-stream'); + const cacheControl = response.headers.get('Cache-Control') ?? ''; + expect(cacheControl).toMatch(/no-cache/); + expect(response.headers.get('Connection')).toBe('keep-alive'); + + expect(response.body).toBeInstanceOf(ReadableStream); + const events: AgentEvent[] = []; + for await (const event of parseSSEStream(response.body!)) { + events.push(event); + } + + expect(events[0]).toEqual({ type: 'agent_start' }); + expect(events.at(-1)?.type).toBe('agent_end'); + }); + + it('respects user-supplied headers without clobbering them', () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'sse' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + const response = agent.prompt('hi').toResponse({ + status: 202, + headers: { 'X-Custom': 'yes', 'Cache-Control': 'private' }, + }); + expect(response.status).toBe(202); + expect(response.headers.get('X-Custom')).toBe('yes'); + expect(response.headers.get('Cache-Control')).toBe('private'); + // Drain to avoid leaking + void response.body?.cancel(); + }); + }); + + describe('backpressure', () => { + it('throttles the producer when the consumer stops reading', async () => { + const target = makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'x'.repeat(200) }], + }); + const TOTAL_DELTAS = 200; + + const burstStreamFn = ( + _model: ModelDescriptor, + _context: Context, + _options?: StreamOptions + ) => { + const stream = createAssistantMessageEventStream(); + queueMicrotask(() => { + stream.push({ type: 'start', partial: target }); + stream.push({ type: 'text_start', contentIndex: 0, partial: target }); + for (let i = 0; i < TOTAL_DELTAS; i++) { + const delta: AssistantMessageEvent = { + type: 'text_delta', + contentIndex: 0, + delta: 'x', + partial: target, + }; + stream.push(delta); + } + stream.push({ + type: 'text_end', + contentIndex: 0, + content: target.content[0].type === 'text' ? target.content[0].text : '', + partial: target, + }); + stream.push({ type: 'done', reason: 'stop', message: target }); + stream.end(target); + }); + return stream; + }; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: burstStreamFn, + }); + + let emitCount = 0; + agent.subscribe(() => { + emitCount++; + }); + + const stream = agent.prompt('go').toReadableStream(); + const reader = stream.getReader(); + + // Read just enough to start, then stop. The producer should stall well + // before reaching TOTAL_DELTAS. + await reader.read(); + + // Yield repeatedly so the agent loop has every chance to push more. + for (let i = 0; i < 50; i++) { + await new Promise((resolve) => setImmediate(resolve)); + } + + // hwm=8 by default; one read frees one slot. Allow plenty of headroom + // for events the agent emits before each text_delta begins streaming + // and for any in-flight push. + expect(emitCount).toBeLessThan(50); + expect(emitCount).toBeLessThan(TOTAL_DELTAS); + + // Drain the stream so the run can finish cleanly. + while (true) { + const { done } = await reader.read(); + if (done) break; + } + + expect(emitCount).toBeGreaterThan(TOTAL_DELTAS); + }); + }); + + describe('cancel propagation', () => { + function makeAbortableStreamFn(): { + streamFn: (model: ModelDescriptor, context: Context, options?: StreamOptions) => ReturnType; + getSignal: () => AbortSignal | undefined; + } { + let capturedSignal: AbortSignal | undefined; + const streamFn = ( + _model: ModelDescriptor, + _context: Context, + options?: StreamOptions + ) => { + capturedSignal = options?.signal; + const stream = createAssistantMessageEventStream(); + const finishAborted = () => { + const aborted = makeFakeAssistantMessage({ + stopReason: 'aborted', + errorMessage: 'cancelled by consumer', + content: [], + }); + stream.push({ type: 'error', reason: 'aborted', error: aborted }); + stream.end(aborted); + }; + if (options?.signal?.aborted) { + queueMicrotask(finishAborted); + } else { + options?.signal?.addEventListener('abort', finishAborted, { once: true }); + } + return stream; + }; + return { streamFn, getSignal: () => capturedSignal }; + } + + it('aborts streamFn and clears isStreaming when reader.cancel() is called', async () => { + const { streamFn, getSignal } = makeAbortableStreamFn(); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn, + }); + + const handle = agent.prompt('go'); + const stream = handle.toReadableStream(); + const reader = stream.getReader(); + + await reader.read(); + await reader.cancel(); + await handle; + + expect(getSignal()?.aborted).toBe(true); + expect(agent.state.isStreaming).toBe(false); + }); + + it('aborts streamFn and clears isStreaming when response.body.cancel() is called', async () => { + const { streamFn, getSignal } = makeAbortableStreamFn(); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn, + }); + + const handle = agent.prompt('go'); + const response = handle.toResponse(); + const reader = response.body!.getReader(); + + await reader.read(); + await reader.cancel(); + await handle; + + expect(getSignal()?.aborted).toBe(true); + expect(agent.state.isStreaming).toBe(false); + }); + }); + + describe('single-use enforcement', () => { + it('throws when a second consumer is attached', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'x' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + const handle = agent.prompt('hi'); + handle.events(); + expect(() => handle.toReadableStream()).toThrow(/already consumed/); + expect(() => handle.toResponse()).toThrow(/already consumed/); + expect(() => handle.events()).toThrow(/already consumed/); + }); + + it('throws when toResponse() is called twice', () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'x' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + const handle = agent.prompt('hi'); + const first = handle.toResponse(); + expect(() => handle.toResponse()).toThrow(/already consumed/); + void first.body?.cancel(); + }); + }); + + describe('PromiseLike auto-sink', () => { + it('await on the handle drives the run to completion without an explicit consumer', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'done' }], + }), + ], + }); + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + + await agent.prompt('hi'); + + expect(agent.state.messages.at(-1)).toMatchObject({ + role: 'assistant', + content: [{ type: 'text', text: 'done' }], + }); + expect(agent.state.isStreaming).toBe(false); + }); + + it('rejects the awaited handle when the binder rejects (e.g. streamFn throws)', async () => { + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: () => { + throw new Error('binder failure'); + }, + }); + + await expect(agent.prompt('hi')).rejects.toThrow(/binder failure/); + expect(agent.state.isStreaming).toBe(false); + }); + + it('resolves when the run pauses on a decision-bearing tool', async () => { + const provider = createScriptedProvider({ + responses: [ + makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { + type: 'toolCall', + id: 'tool_1', + name: 'approve', + arguments: { target: 'thing' }, + }, + ], + }), + ], + }); + const execute = jest.fn(); + const tool: AgentTool = { + name: 'approve', + label: 'Approve', + description: 'Tool that requires explicit approval', + parameters: { + type: 'object', + properties: { target: { type: 'string' } }, + required: ['target'], + }, + decision: { + type: 'object', + properties: { approved: { type: 'boolean' } }, + required: ['approved'], + }, + execute, + }; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.setTools([tool]); + + const events: AgentEvent[] = []; + agent.subscribe((e) => events.push(e)); + + await agent.prompt('approve thing'); + + expect(execute).not.toHaveBeenCalled(); + expect(agent.state.isStreaming).toBe(false); + const pending = events.find((e) => e.type === 'tool_decision_pending'); + expect(pending).toMatchObject({ + type: 'tool_decision_pending', + toolCallId: 'tool_1', + toolName: 'approve', + }); + }); + }); +}); diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index a7ff4fd..714a371 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -1,5 +1,3 @@ -import { randomUUID } from 'node:crypto'; - import { type AssistantMessage, type Context, @@ -9,18 +7,13 @@ import { stream, type StreamOptions, type ToolCallContent, - type ToolDefinition, } from 'agentic-kit'; import { - type AgentRun, - type AgentRunPending, - DecisionValidationError, - MemoryRunStore, - RunNotFoundError, - type RunStore, - ToolNotRegisteredError, -} from './run-store.js'; + type AgentRunHandle, + DefaultAgentRunHandle, + type RunChannelPush, +} from './run-handle.js'; import type { AgentEvent, AgentOptions, @@ -29,6 +22,7 @@ import type { AgentToolResult, } from './types.js'; import { + DecisionValidationError, validateSchema, validateToolArguments as defaultValidateToolArguments, } from './validation.js'; @@ -38,12 +32,9 @@ export class Agent { private readonly transformContext?: AgentOptions['transformContext']; private readonly streamFn: NonNullable; private readonly validateToolArguments: NonNullable; - private readonly runStore: RunStore; - private readonly generateRunId: () => string; private abortController?: AbortController; private running?: Promise; - private currentRunId?: string; - private pausedRunId?: string; + private runChannel?: { push: RunChannelPush }; private _state: AgentState; @@ -60,8 +51,6 @@ export class Agent { this.streamFn = options.streamFn ?? stream; this.transformContext = options.transformContext; this.validateToolArguments = options.validateToolArguments ?? defaultValidateToolArguments; - this.runStore = options.runStore ?? new MemoryRunStore(); - this.generateRunId = options.generateRunId ?? randomUUID; } get state(): AgentState { @@ -110,12 +99,6 @@ export class Agent { } abort(): void { - if (this.pausedRunId) { - const runId = this.pausedRunId; - this.pausedRunId = undefined; - void this.runStore.delete(runId); - return; - } this.abortController?.abort(); } @@ -123,125 +106,141 @@ export class Agent { return this.running ?? Promise.resolve(); } - async prompt(input: string | Message): Promise { + prompt(input: string | Message): AgentRunHandle { if (this._state.isStreaming) { throw new Error('Agent is already processing a prompt'); } - if (this.pausedRunId) { - throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); - } const message = typeof input === 'string' ? createUserMessage(input) : input; - await this.runLoop({ runId: this.generateRunId(), initialMessages: [message] }); + + return new DefaultAgentRunHandle(async (push, signal) => + this.runLoop({ + initialMessages: [message], + externalPush: push ?? undefined, + externalAbortSignal: signal, + }) + ); } - async continue(): Promise { + continue(): AgentRunHandle { if (this._state.isStreaming) { throw new Error('Agent is already processing'); } - if (this.pausedRunId) { - throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); - } const lastMessage = this._state.messages[this._state.messages.length - 1]; if (!lastMessage) { throw new Error('No messages to continue from'); } + if (lastMessage.role === 'assistant') { - throw new Error('Cannot continue from message role: assistant'); + const pendingDecisions = this.findPendingDecisions(lastMessage); + if (pendingDecisions.length === 0) { + throw new Error( + 'Cannot continue from trailing assistant message: no tool calls awaiting a decision' + ); + } + for (const { tool, decision } of pendingDecisions) { + const errors = validateSchema(tool.decision!, decision, 'root'); + if (errors.length > 0) { + throw new DecisionValidationError(tool.name, errors); + } + } } - await this.runLoop({ runId: this.generateRunId() }); - } - - get pendingRunId(): string | undefined { - return this.pausedRunId; + return new DefaultAgentRunHandle(async (push, signal) => + this.runLoop({ + externalPush: push ?? undefined, + externalAbortSignal: signal, + }) + ); } - async resume(runId: string, decision: unknown): Promise { - if (this._state.isStreaming) { - throw new Error('Agent is already processing'); - } - - const run = await this.runStore.load(runId); - if (!run) { - throw new RunNotFoundError(runId); - } - if (!run.pending) { - throw new Error(`Run ${runId} is not paused`); - } - - const tool = this._state.tools.find((t) => t.name === run.pending!.toolName); - if (!tool) { - throw new ToolNotRegisteredError(runId, run.pending.toolName); - } - if (!tool.decision) { - throw new Error( - `Tool '${tool.name}' has no decision schema; cannot resume run ${runId}` - ); - } - - const errors = validateSchema(tool.decision, decision, 'root'); - if (errors.length > 0) { - throw new DecisionValidationError(runId, tool.name, errors); - } + private findPendingDecisions( + message: AssistantMessage + ): Array<{ toolCall: ToolCallContent; tool: AgentTool; decision: unknown }> { + const completedToolCallIds = new Set( + this._state.messages + .filter((m): m is Extract => m.role === 'toolResult') + .map((m) => m.toolCallId) + ); - this._state.model = run.model; - if (run.systemPrompt !== undefined) { - this._state.systemPrompt = run.systemPrompt; + const pending: Array<{ toolCall: ToolCallContent; tool: AgentTool; decision: unknown }> = []; + for (const block of message.content) { + if (block.type !== 'toolCall') { + continue; + } + if (completedToolCallIds.has(block.id)) { + continue; + } + if (!('decision' in block) || block.decision === undefined) { + continue; + } + const tool = this._state.tools.find((t) => t.name === block.name); + if (!tool || !tool.decision) { + continue; + } + pending.push({ toolCall: block, tool, decision: block.decision }); } - this._state.messages = [...run.messages]; - this.pausedRunId = undefined; - - await this.runLoop({ - runId, - resumeDecision: { toolCallId: run.pending.toolCallId, decision }, - }); + return pending; } private async runLoop(opts: { - runId: string; initialMessages?: Message[]; - resumeDecision?: { toolCallId: string; decision: unknown }; + externalPush?: RunChannelPush; + externalAbortSignal?: AbortSignal; }): Promise { this.running = (async () => { this.abortController = new AbortController(); + const localAbortController = this.abortController; this._state.isStreaming = true; this._state.streamMessage = null; this._state.error = undefined; - this.currentRunId = opts.runId; + if (opts.externalPush) { + this.runChannel = { push: opts.externalPush }; + } + + const onExternalAbort = () => localAbortController.abort(); + if (opts.externalAbortSignal) { + if (opts.externalAbortSignal.aborted) { + localAbortController.abort(); + } else { + opts.externalAbortSignal.addEventListener('abort', onExternalAbort, { once: true }); + } + } try { - this.emit({ type: 'agent_start' }); + await this.emit({ type: 'agent_start' }); if (opts.initialMessages && opts.initialMessages.length > 0) { for (const message of opts.initialMessages) { - this.emit({ type: 'message_start', message }); + await this.emit({ type: 'message_start', message }); this.appendMessage(message); - this.emit({ type: 'message_end', message }); + await this.emit({ type: 'message_end', message }); } } - let resumeDecision = opts.resumeDecision; + let resumingFromTrailingAssistant = + this._state.messages[this._state.messages.length - 1]?.role === 'assistant'; while (true) { let assistantMessage: AssistantMessage; - if (resumeDecision) { + if (resumingFromTrailingAssistant) { const last = this._state.messages[this._state.messages.length - 1]; if (!last || last.role !== 'assistant') { throw new Error('Cannot resume: last message is not an assistant message'); } assistantMessage = last; + resumingFromTrailingAssistant = false; } else { - this.emit({ type: 'turn_start' }); - assistantMessage = await this.generateAssistantMessage(this.abortController.signal); + await this.emit({ type: 'turn_start' }); + assistantMessage = await this.generateAssistantMessage(localAbortController.signal); this.appendMessage(assistantMessage); - this.emit({ type: 'message_end', message: assistantMessage }); + await this.emit({ type: 'message_end', message: assistantMessage }); if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') { this._state.error = assistantMessage.errorMessage; - this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); + await this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); break; } } @@ -250,32 +249,29 @@ export class Agent { (block): block is ToolCallContent => block.type === 'toolCall' ); if (toolCalls.length === 0) { - this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); + await this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); break; } - const outcome = await this.executeToolCalls( - toolCalls, - this.abortController.signal, - resumeDecision - ); - resumeDecision = undefined; + const outcome = await this.executeToolCalls(toolCalls, localAbortController.signal); if (outcome.status === 'paused') { return; } - this.emit({ type: 'turn_end', message: assistantMessage, toolResults: outcome.results }); + await this.emit({ type: 'turn_end', message: assistantMessage, toolResults: outcome.results }); } - this.emit({ type: 'agent_end', messages: [...this._state.messages] }); - await this.runStore.delete(opts.runId); + await this.emit({ type: 'agent_end', messages: [...this._state.messages] }); } finally { + if (opts.externalAbortSignal) { + opts.externalAbortSignal.removeEventListener('abort', onExternalAbort); + } this._state.isStreaming = false; this._state.streamMessage = null; this.abortController = undefined; this.running = undefined; - this.currentRunId = undefined; + this.runChannel = undefined; } })(); @@ -302,7 +298,7 @@ export class Agent { switch (event.type) { case 'start': this._state.streamMessage = event.partial; - this.emit({ type: 'message_start', message: event.partial }); + await this.emit({ type: 'message_start', message: event.partial }); break; case 'text_start': case 'text_delta': @@ -314,7 +310,7 @@ export class Agent { case 'toolcall_delta': case 'toolcall_end': this._state.streamMessage = event.partial; - this.emit({ + await this.emit({ type: 'message_update', message: event.partial, assistantMessageEvent: event, @@ -332,8 +328,7 @@ export class Agent { private async executeToolCalls( toolCalls: ToolCallContent[], - signal: AbortSignal, - resumeDecision?: { toolCallId: string; decision: unknown } + signal: AbortSignal ): Promise< | { status: 'completed'; results: ReturnType[] } | { status: 'paused' } @@ -353,13 +348,18 @@ export class Agent { const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name); const args = toolCall.arguments as Record; - const isResumeTarget = resumeDecision?.toolCallId === toolCall.id; + const decisionAttached = 'decision' in toolCall && toolCall.decision !== undefined; - if (tool?.decision && !isResumeTarget) { + if (tool?.decision && !decisionAttached) { let validatedArgs: Record; try { validatedArgs = this.validateToolArguments(tool.parameters, args); } catch (error) { + for (const prior of results) { + await this.appendMessageWithEvents(prior); + } + results.length = 0; + const result: AgentToolResult = { content: [ { @@ -368,13 +368,13 @@ export class Agent { }, ], }; - this.emit({ + await this.emit({ type: 'tool_execution_start', toolCallId: toolCall.id, toolName: toolCall.name, args, }); - this.emit({ + await this.emit({ type: 'tool_execution_end', toolCallId: toolCall.id, toolName: toolCall.name, @@ -382,36 +382,16 @@ export class Agent { isError: true, }); const toolResult = createToolResultMessage(toolCall.id, toolCall.name, result.content, true); - this.appendMessageWithEvents(toolResult); + await this.appendMessageWithEvents(toolResult); continue; } for (const toolResult of results) { - this.appendMessageWithEvents(toolResult); + await this.appendMessageWithEvents(toolResult); } - const runId = this.currentRunId!; - const pending: AgentRunPending = { - toolCallId: toolCall.id, - toolName: toolCall.name, - input: validatedArgs, - }; - const now = Date.now(); - const run: AgentRun = { - id: runId, - model: this._state.model, - systemPrompt: this._state.systemPrompt, - tools: this._state.tools.map(toToolDefinition), - messages: [...this._state.messages], - pending, - createdAt: now, - updatedAt: now, - }; - await this.runStore.save(run); - this.pausedRunId = runId; - this.emit({ + await this.emit({ type: 'tool_decision_pending', - runId, toolCallId: toolCall.id, toolName: toolCall.name, input: validatedArgs, @@ -420,7 +400,7 @@ export class Agent { return { status: 'paused' }; } - const decisionForExecute = isResumeTarget ? resumeDecision!.decision : undefined; + const decisionForExecute = decisionAttached ? toolCall.decision : undefined; const toolResult = await this.executeOneTool( tool, toolCall, @@ -432,7 +412,7 @@ export class Agent { } for (const toolResult of results) { - this.appendMessageWithEvents(toolResult); + await this.appendMessageWithEvents(toolResult); } return { status: 'completed', results }; @@ -445,7 +425,7 @@ export class Agent { decision: unknown, signal: AbortSignal ): Promise> { - this.emit({ + await this.emit({ type: 'tool_execution_start', toolCallId: toolCall.id, toolName: toolCall.name, @@ -468,7 +448,7 @@ export class Agent { decision, signal, (partialResult) => { - this.emit({ + void this.emit({ type: 'tool_execution_update', toolCallId: toolCall.id, toolName: toolCall.name, @@ -489,7 +469,7 @@ export class Agent { isError = true; } - this.emit({ + await this.emit({ type: 'tool_execution_end', toolCallId: toolCall.id, toolName: toolCall.name, @@ -500,23 +480,18 @@ export class Agent { return createToolResultMessage(toolCall.id, toolCall.name, result.content, isError); } - private appendMessageWithEvents(message: Message): void { - this.emit({ type: 'message_start', message }); + private async appendMessageWithEvents(message: Message): Promise { + await this.emit({ type: 'message_start', message }); this.appendMessage(message); - this.emit({ type: 'message_end', message }); + await this.emit({ type: 'message_end', message }); } - private emit(event: AgentEvent): void { + private async emit(event: AgentEvent): Promise { for (const listener of this.listeners) { listener(event); } + if (this.runChannel) { + await this.runChannel.push(event); + } } } - -function toToolDefinition(tool: AgentTool): ToolDefinition { - return { - name: tool.name, - description: tool.description, - parameters: tool.parameters, - }; -} diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index bad2c86..220b76b 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -1,4 +1,4 @@ export * from './agent.js'; -export * from './run-store.js'; +export * from './run-handle.js'; export * from './types.js'; export * from './validation.js'; diff --git a/packages/agent/src/run-handle.ts b/packages/agent/src/run-handle.ts new file mode 100644 index 0000000..038ad20 --- /dev/null +++ b/packages/agent/src/run-handle.ts @@ -0,0 +1,198 @@ +import type { AgentEvent } from './types.js'; + +export type RunChannelPush = (event: AgentEvent) => Promise; + +export type AgentRunBinder = ( + push: RunChannelPush | null, + signal: AbortSignal +) => Promise; + +export interface AgentRunHandle extends PromiseLike { + events(): AsyncIterable; + toReadableStream(): ReadableStream; + toResponse(init?: ResponseInit): Response; +} + +const DEFAULT_HIGH_WATER_MARK = 8; + +export interface AgentRunHandleOptions { + highWaterMark?: number; +} + +export class DefaultAgentRunHandle implements AgentRunHandle { + private startedAs: 'events' | 'stream' | 'response' | 'sink' | null = null; + private completion: Promise | null = null; + private readonly highWaterMark: number; + + constructor( + private readonly bind: AgentRunBinder, + options: AgentRunHandleOptions = {} + ) { + this.highWaterMark = options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK; + } + + events(): AsyncIterable { + const stream = this.startStream('events'); + return readableStreamToAsyncIterable(stream); + } + + toReadableStream(): ReadableStream { + return this.startStream('stream'); + } + + toResponse(init?: ResponseInit): Response { + const stream = this.startStream('response'); + const sse = stream.pipeThrough(createSSETransform()); + + const headers = new Headers(init?.headers); + if (!headers.has('Content-Type')) { + headers.set('Content-Type', 'text/event-stream'); + } + if (!headers.has('Cache-Control')) { + headers.set('Cache-Control', 'no-cache, no-transform'); + } + if (!headers.has('Connection')) { + headers.set('Connection', 'keep-alive'); + } + + const responseInit: ResponseInit = { ...init, headers }; + return new Response(sse, responseInit); + } + + then( + onfulfilled?: ((value: void) => TResult1 | PromiseLike) | null, + onrejected?: ((reason: unknown) => TResult2 | PromiseLike) | null + ): Promise { + if (!this.startedAs) { + this.startSink(); + } + return this.completion!.then(onfulfilled, onrejected); + } + + private ensureNotStarted(via: NonNullable): void { + if (this.startedAs && this.startedAs !== via) { + throw new Error( + `AgentRunHandle already consumed via ${this.startedAs}; cannot also call ${via}()` + ); + } + if (this.startedAs === via) { + throw new Error(`AgentRunHandle already consumed via ${via}()`); + } + } + + private startStream(via: 'events' | 'stream' | 'response'): ReadableStream { + this.ensureNotStarted(via); + this.startedAs = via; + + const abortController = new AbortController(); + let cancelled = false; + const pullWaiters = new Set<() => void>(); + + const releasePullWaiters = () => { + if (pullWaiters.size === 0) { + return; + } + const waiters = Array.from(pullWaiters); + pullWaiters.clear(); + for (const resolve of waiters) { + resolve(); + } + }; + + let runPromise: Promise; + + const stream = new ReadableStream( + { + start: (controller) => { + const push: RunChannelPush = async (event) => { + if (cancelled) { + return; + } + try { + controller.enqueue(event); + } catch { + cancelled = true; + return; + } + while (!cancelled && (controller.desiredSize ?? 1) <= 0) { + await new Promise((resolve) => { + pullWaiters.add(resolve); + }); + } + }; + + runPromise = (async () => { + try { + await this.bind(push, abortController.signal); + if (!cancelled) { + try { + controller.close(); + } catch { + // already closed + } + } + } catch (err) { + if (!cancelled) { + try { + controller.error(err); + } catch { + // already closed + } + } + throw err; + } + })(); + + this.completion = runPromise; + this.completion.catch(() => {}); + }, + pull: () => { + releasePullWaiters(); + }, + cancel: () => { + cancelled = true; + abortController.abort(); + releasePullWaiters(); + }, + }, + { highWaterMark: this.highWaterMark } + ); + + return stream; + } + + private startSink(): void { + this.ensureNotStarted('sink'); + this.startedAs = 'sink'; + + const abortController = new AbortController(); + this.completion = this.bind(null, abortController.signal); + this.completion.catch(() => {}); + } +} + +async function* readableStreamToAsyncIterable( + stream: ReadableStream +): AsyncIterableIterator { + const reader = stream.getReader(); + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + return; + } + yield value; + } + } finally { + reader.releaseLock(); + } +} + +function createSSETransform(): TransformStream { + const encoder = new TextEncoder(); + return new TransformStream({ + transform(event, controller) { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`)); + }, + }); +} diff --git a/packages/agent/src/run-store.ts b/packages/agent/src/run-store.ts deleted file mode 100644 index a658d69..0000000 --- a/packages/agent/src/run-store.ts +++ /dev/null @@ -1,81 +0,0 @@ -import type { Message, ModelDescriptor, ToolDefinition } from 'agentic-kit'; - -export interface AgentRunPending { - toolCallId: string; - toolName: string; - input: Record; -} - -export interface AgentRun { - id: string; - model: ModelDescriptor; - systemPrompt?: string; - tools: ToolDefinition[]; - messages: Message[]; - pending?: AgentRunPending; - createdAt: number; - updatedAt: number; -} - -export interface RunStore { - save(run: AgentRun): Promise; - load(id: string): Promise; - delete(id: string): Promise; -} - -export class MemoryRunStore implements RunStore { - private readonly runs = new Map(); - - async save(run: AgentRun): Promise { - this.runs.set(run.id, cloneRun(run)); - } - - async load(id: string): Promise { - const run = this.runs.get(id); - return run ? cloneRun(run) : undefined; - } - - async delete(id: string): Promise { - this.runs.delete(id); - } -} - -export class RunNotFoundError extends Error { - readonly runId: string; - - constructor(runId: string) { - super(`Run not found: ${runId}`); - this.name = 'RunNotFoundError'; - this.runId = runId; - } -} - -export class DecisionValidationError extends Error { - readonly runId: string; - readonly toolName: string; - readonly errors: string[]; - - constructor(runId: string, toolName: string, errors: string[]) { - super(`Decision validation failed for tool '${toolName}':\n${errors.map((e) => `- ${e}`).join('\n')}`); - this.name = 'DecisionValidationError'; - this.runId = runId; - this.toolName = toolName; - this.errors = errors; - } -} - -export class ToolNotRegisteredError extends Error { - readonly runId: string; - readonly toolName: string; - - constructor(runId: string, toolName: string) { - super(`Tool '${toolName}' is not registered on the agent resuming run ${runId}`); - this.name = 'ToolNotRegisteredError'; - this.runId = runId; - this.toolName = toolName; - } -} - -function cloneRun(run: AgentRun): AgentRun { - return JSON.parse(JSON.stringify(run)) as AgentRun; -} diff --git a/packages/agent/src/types.ts b/packages/agent/src/types.ts index 9e724d9..be1b6e2 100644 --- a/packages/agent/src/types.ts +++ b/packages/agent/src/types.ts @@ -10,8 +10,6 @@ import type { ToolResultMessage, } from 'agentic-kit'; -import type { RunStore } from './run-store.js'; - export interface AgentToolResult { content: ToolResultMessage['content']; details?: TDetails; @@ -73,7 +71,6 @@ export type AgentEvent = } | { type: 'tool_decision_pending'; - runId: string; toolCallId: string; toolName: string; input: Record; @@ -92,6 +89,4 @@ export interface AgentOptions { schema: JsonSchema, args: Record ) => Record; - runStore?: RunStore; - generateRunId?: () => string; } diff --git a/packages/agent/src/validation.ts b/packages/agent/src/validation.ts index 3fb227e..0e3d64c 100644 --- a/packages/agent/src/validation.ts +++ b/packages/agent/src/validation.ts @@ -1,5 +1,16 @@ import type { JsonSchema } from 'agentic-kit'; +export class DecisionValidationError extends Error { + readonly toolName: string; + readonly errors: string[]; + constructor(toolName: string, errors: string[]) { + super(`Decision validation failed for tool '${toolName}':\n${errors.map((e) => `- ${e}`).join('\n')}`); + this.name = 'DecisionValidationError'; + this.toolName = toolName; + this.errors = errors; + } +} + export function validateToolArguments( schema: JsonSchema, args: Record diff --git a/packages/agentic-kit/src/types.ts b/packages/agentic-kit/src/types.ts index 00b432c..d4812a8 100644 --- a/packages/agentic-kit/src/types.ts +++ b/packages/agentic-kit/src/types.ts @@ -89,6 +89,7 @@ export interface ToolCallContent { name: string; arguments: Record; rawArguments?: string; + decision?: unknown; } export interface Usage { From 9ad23557f04ddcb30339a4f14cd78e4a79eba51f Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 16:52:11 +0800 Subject: [PATCH 08/16] docs: align with message-log resume design --- ROADMAP.md | 294 ++++++++++++++++++++----------------------- tools/test/README.md | 5 - 2 files changed, 137 insertions(+), 162 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 507b319..24f3981 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -85,9 +85,6 @@ function createScriptedSSEResponse(events: AgentEvent[]): Response // SSE parser for assertions on emitted bytes function parseSSEStream(stream: ReadableStream): AsyncIterable -// portable contract suite for any RunStore implementation -function runRunStoreContractTests(makeStore: () => RunStore | Promise): void - // small fixtures function makeFakeModel(overrides?: Partial): ModelDescriptor ``` @@ -102,7 +99,7 @@ dep. That is intentional. ### 0.3 Integration Test Lane A workspace-level `pnpm test:integration` script. Brings up -`http.createServer` in-process, runs `agent.start(...).toResponse()` against +`http.createServer` in-process, runs `agent.prompt(...).toResponse()` against it, exercises pause/resume across a real HTTP boundary via `fetch`. Mock providers, real HTTP, real serialization. Catches wire-format and abort regressions that pure unit tests miss. @@ -122,11 +119,12 @@ the loop. ## Phase 1 — Pause/Resume + React Bindings (must) -The single architectural change behind Phase 1: the agent loop becomes -**checkpoint-able**. Tools may declare a `decision` schema; when the loop hits -such a tool, it persists run state, emits a structured event, and waits for a -matching decision payload before continuing. Everything else in Phase 1 follows -from this. +The single architectural change behind Phase 1: tools may declare a `decision` +schema. When the loop hits such a tool, it emits a structured event and ends +the run cleanly. The host writes the decision into the message log; the next +loop entry executes the tool with that decision. **State lives in the messages +— there is no separate run store.** Same idea as Vercel AI SDK: a stateless +server, a stateful client (or persistent message log). ### 1.1 Pausable Tools @@ -144,14 +142,22 @@ Extend `AgentTool` with an optional `decision` JSON Schema. The agent loop: 1. When the LLM emits a call to a tool that declares `decision`: - Validate the LLM's input against `parameters` as today. - - Emit a `tool_decision_pending` event with the input and the schema. - - Persist the run via the configured `RunStore` (see 1.2). - - Halt the loop and return. -2. The host invokes `agent.resume(runId, decision)`: - - Load run state from the `RunStore`. - - Validate `decision` against the tool's `decision` schema. - - Call `tool.execute(input, decision, ctx)`. - - Continue the loop with the result. + - Emit a `tool_decision_pending` event with the toolCallId, input, and schema. + - Stop the loop. End the run handle (close the stream). + - **No persistence.** The agent's in-memory state is fine for in-process + consumers; for cross-process consumers, state survives via the message + log the host re-POSTs on the next request. +2. The host attaches the decision to the trailing tool call in the message + log (representation: a `decision` payload on the tool-call content block — + see Open Questions for the exact shape). +3. The host calls `agent.continue()` (or constructs a fresh `Agent` over the + augmented messages — both work; the message log is the source of truth). +4. The loop's first action: walk the trailing assistant message's tool calls. + For any call that declares `decision`, has a decision attached in the log, + and has no matching `tool_result`, validate the decision against the tool's + schema and call `tool.execute(toolCallId, input, decision, ctx)`. Append + the result to the message log. +5. Continue the loop normally. Tools without a `decision` schema run as today — synchronously inside the loop. @@ -171,27 +177,28 @@ interface AgentTool extends ToolDefinition { } class Agent { - // existing - prompt(input: string | Message): Promise + prompt(input: string | Message): AgentRunHandle + continue(): AgentRunHandle // entry point for resume after decision attached abort(): void - - // new - resume(runId: string, decision: unknown): Promise } type AgentEvent = // ... existing events | { type: 'tool_decision_pending' - runId: string toolCallId: string toolName: string input: Record schema: JsonSchema } ``` +There is **no `agent.resume(runId, decision)` and no `RunStore`.** The host +mutates `agent.state.messages` (or the equivalent input to a fresh `Agent`) +to attach the decision, then re-enters via `continue()` or `prompt()`. The +loop figures out what to do from the message state. + A pausable tool with no `decision` is invalid — the field's presence is the -mechanism. Validation runs before `execute` is called; a malformed decision -rejects with a typed error and does not consume the run. +mechanism. Decision validation runs before `execute` is called; a malformed +decision rejects with a typed error and the loop does not advance. #### Naming @@ -207,83 +214,25 @@ shape — the kit does not over-generalize now. Unit tests in `@agentic-kit/agent`. Uses `createScriptedProvider` from 0.2. - Scripted provider emits a tool call to a `decision`-bearing tool. Assert: - `tool_decision_pending` event emitted, `runStore.save` called, loop halted. -- `agent.resume(runId, valid)` with a fresh scripted response. Assert: - `tool.execute` invoked with the decision argument, loop continues, final - event emitted. -- Resume with a decision that fails schema validation. Assert: typed - validation error, run not consumed, retry permitted. -- Resume with non-existent `runId`. Assert: typed `RunNotFound` error. -- `agent.abort()` while paused. Assert: clean cancellation, run cleaned up. + `tool_decision_pending` event fires (with toolCallId, input, schema), the + run handle closes cleanly, no `tool.execute` invocation yet, no `tool_result` + appended to messages. +- Attach a valid decision to the trailing tool call in `agent.state.messages`, + call `agent.continue()` against a fresh scripted response. Assert: + `tool.execute` called with `(toolCallId, input, decision, ...)`, tool result + appended, loop continues, final event emitted. +- Attach a decision that fails schema validation, call `continue()`. Assert: + typed validation error surfaced via the event channel, no `execute` call, + no `tool_result` appended (the host can fix and retry). +- `agent.abort()` after the decision-pending event closed the handle: state + goes idle, no leaked listeners or in-flight promises. - Tool without `decision` still runs synchronously (regression guard). +- Mixed-batch tool ordering: an assistant turn with a regular tool call + followed by a decision-bearing tool whose arguments fail validation. The + arg-validation error appends a tool_result before the loop pauses, so + message order matches the LLM's tool-call order. -### 1.2 RunStore - -#### Problem - -Pause/resume across HTTP requests requires the loop's state to survive between -the pause and the resume call. The kit must define where that state lives -without forcing a specific backend on consumers. - -#### Design - -A small interface plus a default implementation. The kit owns the schema of -what gets persisted (the run record); the consumer owns where it lands. - -```ts -interface AgentRun { - id: string - model: string - systemPrompt?: string - tools: ToolDefinition[] - messages: Message[] - pending?: { - toolCallId: string - toolName: string - input: Record - } - createdAt: number - updatedAt: number -} - -interface RunStore { - save(run: AgentRun): Promise - load(id: string): Promise - delete(id: string): Promise -} - -class MemoryRunStore implements RunStore { /* default, ephemeral */ } -``` - -`@agentic-kit/agent` ships `MemoryRunStore` for development and single-process -deployments. Production users supply a Redis-, KV-, or DB-backed implementation. -The kit ships no production backend. - -The kit deliberately does **not** persist final conversation history. That is a -consumer concern. See 1.4 for lifecycle hooks. - -#### Testing - -Unit tests in `@agentic-kit/agent`. - -- `MemoryRunStore`: save → load round-trip; `load` of missing id returns - `undefined`; `delete` is idempotent; `delete` then `load` returns `undefined`. -- `runRunStoreContractTests(makeMemoryStore)` from 0.2 runs the portable - contract suite against `MemoryRunStore`. The same export is consumed by - any third-party `RunStore` implementation. -- Concurrent save/load on the same id (last write wins, no torn reads). -- Re-pause `createdAt` preservation: a second `save()` of the same run id keeps - the original `createdAt`; only `updatedAt` advances. (1.1 does not yet - enforce this — fold into the contract suite.) -- Abort-during-save race: `agent.abort()` while a `runStore.save()` is - in-flight resolves without orphaning the persisted record or surfacing a - rejected save promise. -- Mixed-batch tool ordering: when an assistant turn contains a regular tool - call followed by a decision-bearing tool whose arguments fail validation, - the persisted `messages` order matches the LLM's tool-call order. (Latent - in 1.1's invalid-args branch; surfaces only via the contract suite.) - -### 1.3 Run Serialization Helpers +### 1.2 Run Serialization Helpers #### Problem @@ -293,8 +242,8 @@ kit should ship the canonical form so consumers do not reinvent it. #### Design -Standard Web primitives only. No framework helpers. The agent run object -exposes both pull-based and push-based access. +Standard Web primitives only. No framework helpers. A run handle exposes both +pull-based and push-based access. ```ts interface AgentRunHandle { @@ -302,8 +251,16 @@ interface AgentRunHandle { toReadableStream(): ReadableStream toResponse(init?: ResponseInit): Response // SSE-shaped body } +``` -const handle = agent.start({ messages, ... }) +The handle is returned by whichever entry point starts a loop iteration — +`agent.prompt(input)` or `agent.continue()`. Both return a handle; both +produce the same `AgentEvent` stream. (Today these methods return +`Promise` and require subscribing first; Phase 1 reshapes them to return +an `AgentRunHandle` so SSE serialization is a one-liner.) + +```ts +const handle = agent.prompt(userMessage) return handle.toResponse() ``` @@ -312,12 +269,16 @@ return handle.toResponse() speaks standard `Response` and `ReadableStream`: Next.js App Router, Hono, Bun, Deno, Cloudflare Workers, raw Node 18+. -A symmetric pair handles resume: +There is no separate "resume" entry point. The server handler builds an +`Agent` from the request body, inspects the trailing message, and chooses: -```ts -const handle = agent.resumeRun({ runId, decision, runStore }) -return handle.toResponse() -``` +- Last message is a user turn → `agent.prompt(lastMessage)`. +- Last assistant message has a tool call with a decision attached and no + matching tool_result → `agent.continue()`. +- Otherwise the request is malformed; reject. + +This mirrors AI SDK: the same `/api/chat` endpoint handles both initial sends +and post-approval continuations, because state lives in `messages`. The wire format is the kit's `AgentEvent` discriminated union, serialized as JSON in SSE `data:` lines. No translation to any third-party protocol; if a @@ -338,7 +299,7 @@ Unit tests in `@agentic-kit/agent`. - Backpressure: stream consumer pauses; producer respects it (no unbounded buffer). -### 1.4 `@agentic-kit/react` +### 1.3 `@agentic-kit/react` #### Problem @@ -364,7 +325,7 @@ const chat = useChat({ }) chat.send('hello') -chat.respondWithDecision(value) // delivers decision to /resume +chat.respondWithDecision(toolCallId, value) // mutates messages, re-POSTs same endpoint chat.abort() chat.messages // Message[] chat.isStreaming // boolean @@ -379,9 +340,10 @@ Behaviors the hook is responsible for: - Emitting `onMessage` per partial update, `onFinish` per turn end. - Surfacing `tool_decision_pending` events as `chat.pendingDecision` and via `onDecisionPending`. -- Rebroadcasting `respondWithDecision(value)` as a POST to `/resume` (path - configurable) with `{ runId, decision }`, and resuming stream consumption - from the response. +- `respondWithDecision(toolCallId, value)`: write the decision into the + matching tool-call content block in `messages`, then POST the augmented + `messages` back to the **same `api` endpoint**. No separate `/resume` route, + no `runId` plumbing — the message log carries everything the server needs. - Plumbing an `AbortSignal` through `chat.abort()`. The hook does not own persistence, modes, system prompts, or any UI shape. @@ -399,8 +361,10 @@ return `createScriptedSSEResponse(events)` from 0.2. - `chat.abort()` reaches the fetch mock's `AbortSignal`; state cleans up; no late updates after abort. - Decision-pending: `onDecisionPending` fires; `chat.pendingDecision` set; - `respondWithDecision(value)` POSTs to `/resume` with `{ runId, decision }`; - the resumed stream folds into `messages`. + `respondWithDecision(toolCallId, value)` mutates the matching tool-call + block in `messages`, POSTs to the same `api` endpoint, and the resumed + stream folds into `messages`. Assert the POSTed body contains the decision + on the right tool call. - Network error / non-200 response: `chat.error` set; `messages` not corrupted. - Malformed SSE bytes: hook surfaces an error rather than crashing. - `initialMessages` hydrates state on mount. @@ -497,21 +461,31 @@ Unit tests using an injectable clock. - Abort during a retry wait cancels promptly; no further attempts. - Retries respect a global deadline; total time bounded. -### 3.3 Stream Resume on Disconnect +### 3.3 Stream Resume on Disconnect (the only feature that needs a RunStore) + +If the agent loop is mid-run when the SSE connection drops — between turns, +mid-tool, mid-LLM-stream — the client cannot pick up where it left off, only +re-POST `messages` and start a fresh continuation. The previous in-flight +events are lost. + +This is the **one** Phase 1+ feature the message-log model cannot deliver, +because the in-flight events are not yet committed to the message log. +Implementing it requires the agent loop to outlive the HTTP request, and that +in turn requires persistent state — i.e., the `RunStore` concept the rest of +the kit deliberately avoids. -If the agent loop is mid-run when the SSE connection drops, the client should -be able to reconnect with the run ID and pick up where it left off. The -machinery is largely a free side-effect of `RunStore` — the run survives; -only stream-position tracking and an event replay endpoint are new. Useful -for flaky-network and long-running flows. +Defer until a real consumer asks for it. When that happens, introduce +`RunStore` here (interface + `MemoryRunStore` default + replay endpoint) as +an opt-in capability, not a baseline requirement. Phase 1's pause/resume +will keep working without it. #### Testing -- Unit: abort an in-flight `events()` iterator. Reload the run by id and call - `resumeRun`. Assert: events continue from the last-emitted checkpoint, no - duplicate side effects. +- Unit: abort an in-flight `events()` iterator. With a configured `RunStore`, + reload by run id and resume. Assert: events continue from the last-emitted + checkpoint, no duplicate side effects. - Integration (lane from 0.3): same flow over real HTTP — drop the connection - mid-stream, reconnect with `runId`, assert event continuity and correct + mid-stream, reconnect with run id, assert event continuity and correct `Last-Event-ID` semantics. ### 3.4 Client-Side Tool Execution @@ -519,9 +493,11 @@ for flaky-network and long-running flows. For tools that genuinely require browser-only capabilities (DOM access, WebRTC, File System Access API, locally-running services, hardware bridges, wallet signing), introduce a `runs: 'client'` flag. The mechanism reuses the -pause/resume rails: such tools emit a `tool_client_execute_pending` event, -the browser-side dispatcher runs the registered local executor, and the -result returns via the same resume endpoint shape. +same message-log rails as `decision`: such tools emit a +`tool_client_execute_pending` event with the toolCallId, the browser-side +dispatcher runs the registered local executor, the result is written into +the message log as a `tool_result`, and the client re-POSTs the same +`/api/chat` endpoint to continue the loop. This is deferred until a real use case appears. Most agent applications do not need it, and shipping it prematurely would constrain the design. @@ -530,12 +506,14 @@ not need it, and shipping it prematurely would constrain the design. - Unit (in `@agentic-kit/agent`): protocol layer only. Scripted provider emits a `runs: 'client'` tool call. Assert: `tool_client_execute_pending` - event fires, loop halts. `agent.resume(runId, { result })` continues with - the supplied result as the tool result. + event fires, loop closes the run handle, no execute call. After a + `tool_result` is appended to messages and `continue()` is called, the loop + proceeds with the supplied result. - Unit (in `@agentic-kit/react`, jsdom): client dispatcher. Register a local executor, fire a synthetic pending event, assert: executor runs with the - tool input, resulting POST to `/resume` includes the correct payload, the - resumed stream folds into `messages`. + tool input, the result is written into the message log, the next POST + goes to the same endpoint with the augmented messages, the resumed stream + folds into `messages`. --- @@ -561,46 +539,48 @@ companion packages, or other ecosystems entirely. package, not in the conversational core. - **System prompt construction utilities.** Prompt design is consumer-owned. - **Conversation modes / agent personas.** Application concern. -- **Built-in production storage backends.** `MemoryRunStore` is the only - implementation the kit ships; Redis, KV, DB backends are for consumers. +- **A separate run store.** Pause/resume rides the message log; there is no + `RunStore` in Phase 1. If 3.3 (stream resume on disconnect) ever ships, it + introduces an opt-in `RunStore` then. ## Package Layout After Phase 1 -| Package | Change | -| ------------------------ | ------------------------------------------------------------------------------------------- | -| `agentic-kit` | unchanged | -| `@agentic-kit/agent` | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) | -| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | -| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | -| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | -| `@agentic-kit/react` | **new** — `useChat` hook | +| Package | Change | +| ------------------------ | ------------------------------------------------------------------------------------- | +| `agentic-kit` | unchanged | +| `@agentic-kit/agent` | extended: pausable tools (`decision` schema), run serialization helpers; no RunStore | +| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | +| `@agentic-kit/react` | **new** — `useChat` hook | Shared test helpers live in `tools/test/` (repo-internal directory, not a package). Phase 2 and 3 add no new packages; everything extends in place. ## Open Questions -- **Run record schema versioning.** Once `RunStore` is shipped, the on-disk - `AgentRun` shape becomes a compatibility surface. Decide on an explicit - version field and migration story before 1.0. +- **Decision representation in the message log.** Resolved (1.1): the decision + lives as an optional `decision: unknown` field on the tool-call content + block. Existing cross-provider `transformMessages` preserves it via object + spread. - **Decision schema validator scope.** Resolved (1.1): the decision validator reuses `validateSchema` from `packages/agent/src/validation.ts` — same code path as tool inputs. Discriminated-union and `oneOf` / `anyOf` coverage is - still untested; fold into the 1.2 contract suite. -- **Lifecycle events across pause boundaries.** On resume, `agent_start` - re-fires (each `runLoop` entry is a fresh start) but `turn_start` does not - (the persisted assistant message is reused, not regenerated). This - asymmetry is invisible to a single-prompt consumer but matters for any - listener that tracks turn vs. run lifecycle. Decide before 1.4 whether to - introduce a distinct `agent_resume` event or to redocument `agent_start` - with explicit "loop entry" semantics — the `@agentic-kit/react` hook will - codify whichever choice externally. + still untested; fold into the 1.1 test matrix. +- **Lifecycle events across pause boundaries.** Each entry into the loop + (whether via `prompt()` or `continue()` after a decision) re-fires + `agent_start`. Consumers that distinguish "fresh prompt" from "resumed + loop" need a hint. Decide before 1.3 whether to add a distinct + `agent_resume` event or to redocument `agent_start` with explicit + "loop entry" semantics — the `@agentic-kit/react` hook codifies the + choice externally. - **SSE vs. NDJSON.** SSE is the proposed default. NDJSON is simpler but lacks reconnection semantics and event-type framing. Revisit if real-world consumers report SSE problems behind specific proxies. -- **`onDecisionPending` ergonomics.** Whether the React hook should auto-route - the next stream from `/resume` or require the consumer to call a follow-up - method explicitly. Default to auto for ergonomics; expose an opt-out. +- **`respondWithDecision` auto-fire vs. explicit send.** Whether the React + hook should auto-POST the augmented messages immediately or expose a + separate `send()` step. Default to auto for ergonomics (matches AI SDK's + `addToolApprovalResponse` → `sendAutomaticallyWhen` flow); expose an opt-out. - **Live test policy for paid providers.** Anthropic/OpenAI live tests would burn API credits. Default position: gated `*.live.test.ts` files with env-var keys, manually triggered, never required by per-PR CI. diff --git a/tools/test/README.md b/tools/test/README.md index 0513eb6..58871fe 100644 --- a/tools/test/README.md +++ b/tools/test/README.md @@ -12,11 +12,6 @@ Not a workspace package, not published. - `createScriptedSSEResponse(events)` — `Response` whose body serializes each `AgentEvent` as one SSE frame (`data: \n\n`). - `parseSSEStream(stream)` — async iterable that parses `AgentEvent` SSE frames from a `ReadableStream`. Handles split chunks, multi-line `data:`, comment lines, event-type framing, trailing newlines, and mid-event abort (incomplete trailing event is dropped, per SSE spec). -## Deferred - -`runRunStoreContractTests(makeStore)` lands with Phase 1.2 alongside the -`RunStore` interface. Adding it now would be dead scaffolding. - ## Adding a helper Promote a helper to `tools/test/` only when a third package needs the same From 75706cf780b7ceb86dfb82000e1a072c356ac0fd Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 22:26:25 +0800 Subject: [PATCH 09/16] refactor(agent): export parseSSEStream --- packages/agent/__tests__/run-handle.test.ts | 3 +- packages/agent/__tests__/sse.test.ts | 11 ++-- packages/agent/src/index.ts | 1 + packages/agent/src/sse.ts | 67 +++++++++++++++++++++ tools/test/index.ts | 2 +- tools/test/scripted-sse.ts | 66 -------------------- 6 files changed, 74 insertions(+), 76 deletions(-) create mode 100644 packages/agent/src/sse.ts diff --git a/packages/agent/__tests__/run-handle.test.ts b/packages/agent/__tests__/run-handle.test.ts index cb8fe2b..9aca76b 100644 --- a/packages/agent/__tests__/run-handle.test.ts +++ b/packages/agent/__tests__/run-handle.test.ts @@ -9,10 +9,9 @@ import { createScriptedProvider, makeFakeAssistantMessage, makeFakeModel, - parseSSEStream, } from '@test/index'; -import { Agent, type AgentEvent, type AgentTool } from '../src'; +import { Agent, type AgentEvent, type AgentTool, parseSSEStream } from '../src'; describe('AgentRunHandle', () => { describe('events()', () => { diff --git a/packages/agent/__tests__/sse.test.ts b/packages/agent/__tests__/sse.test.ts index 682e3dd..4cbc0b9 100644 --- a/packages/agent/__tests__/sse.test.ts +++ b/packages/agent/__tests__/sse.test.ts @@ -1,10 +1,7 @@ -// Exercises the `parseSSEStream` helper from tools/test/, not a production -// parser. The kit ships no SSE parser today; consumers parse on their side. -// These tests pin down the helper's edge-case behavior so future parser work -// has a baseline to match. -import { parseSSEStream } from '@test/index'; - -import type { AgentEvent } from '../src'; +// Exercises `parseSSEStream` exported from `@agentic-kit/agent`. Symmetric to +// the SSE producer in `toResponse()` — these tests pin down the parser's +// edge-case behavior so the wire-format contract has a baseline. +import { type AgentEvent, parseSSEStream } from '../src'; const encoder = new TextEncoder(); diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 220b76b..46e2f2a 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -1,4 +1,5 @@ export * from './agent.js'; export * from './run-handle.js'; +export * from './sse.js'; export * from './types.js'; export * from './validation.js'; diff --git a/packages/agent/src/sse.ts b/packages/agent/src/sse.ts new file mode 100644 index 0000000..f8002d5 --- /dev/null +++ b/packages/agent/src/sse.ts @@ -0,0 +1,67 @@ +import type { AgentEvent } from './types.js'; + +export async function* parseSSEStream( + stream: ReadableStream +): AsyncIterable { + const reader = stream.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + buffer = buffer.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + + let blankIdx = buffer.indexOf('\n\n'); + while (blankIdx !== -1) { + const rawEvent = buffer.slice(0, blankIdx); + buffer = buffer.slice(blankIdx + 2); + const event = parseEvent(rawEvent); + if (event) { + yield event; + } + blankIdx = buffer.indexOf('\n\n'); + } + } + } finally { + reader.releaseLock(); + } +} + +function parseEvent(raw: string): AgentEvent | null { + const dataLines: string[] = []; + for (const line of raw.split('\n')) { + if (line === '' || line.startsWith(':')) { + continue; + } + const colon = line.indexOf(':'); + const field = colon === -1 ? line : line.slice(0, colon); + let value = colon === -1 ? '' : line.slice(colon + 1); + if (value.startsWith(' ')) { + value = value.slice(1); + } + if (field === 'data') { + dataLines.push(value); + } + } + + if (dataLines.length === 0) { + return null; + } + + const data = dataLines.join('\n'); + if (data === '[DONE]') { + return null; + } + + try { + return JSON.parse(data) as AgentEvent; + } catch { + return null; + } +} diff --git a/tools/test/index.ts b/tools/test/index.ts index 4c8ab2e..a798b59 100644 --- a/tools/test/index.ts +++ b/tools/test/index.ts @@ -1,3 +1,3 @@ export { makeFakeAssistantMessage, makeFakeModel } from './fixtures'; export { createScriptedProvider, type ScriptedProviderOptions } from './scripted-provider'; -export { createScriptedSSEResponse, parseSSEStream } from './scripted-sse'; +export { createScriptedSSEResponse } from './scripted-sse'; diff --git a/tools/test/scripted-sse.ts b/tools/test/scripted-sse.ts index ae7497e..6267084 100644 --- a/tools/test/scripted-sse.ts +++ b/tools/test/scripted-sse.ts @@ -20,69 +20,3 @@ export function createScriptedSSEResponse(events: AgentEvent[]): Response { }, }); } - -export async function* parseSSEStream( - stream: ReadableStream -): AsyncIterable { - const reader = stream.getReader(); - const decoder = new TextDecoder('utf-8'); - let buffer = ''; - - try { - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - buffer += decoder.decode(value, { stream: true }); - buffer = buffer.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); - - let blankIdx = buffer.indexOf('\n\n'); - while (blankIdx !== -1) { - const rawEvent = buffer.slice(0, blankIdx); - buffer = buffer.slice(blankIdx + 2); - const event = parseEvent(rawEvent); - if (event) { - yield event; - } - blankIdx = buffer.indexOf('\n\n'); - } - } - } finally { - reader.releaseLock(); - } -} - -function parseEvent(raw: string): AgentEvent | null { - const dataLines: string[] = []; - for (const line of raw.split('\n')) { - if (line === '' || line.startsWith(':')) { - continue; - } - const colon = line.indexOf(':'); - const field = colon === -1 ? line : line.slice(0, colon); - let value = colon === -1 ? '' : line.slice(colon + 1); - if (value.startsWith(' ')) { - value = value.slice(1); - } - if (field === 'data') { - dataLines.push(value); - } - } - - if (dataLines.length === 0) { - return null; - } - - const data = dataLines.join('\n'); - if (data === '[DONE]') { - return null; - } - - try { - return JSON.parse(data) as AgentEvent; - } catch { - return null; - } -} From fac47ee69aafed172073bdab6212d648ea84a098 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 22:26:28 +0800 Subject: [PATCH 10/16] fix(openai): prefer native fetch over cross-fetch --- packages/openai/src/index.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/openai/src/index.ts b/packages/openai/src/index.ts index 365a76d..33452e1 100644 --- a/packages/openai/src/index.ts +++ b/packages/openai/src/index.ts @@ -1,4 +1,11 @@ -import fetch from 'cross-fetch'; +import crossFetch from 'cross-fetch'; + +// Prefer the runtime's native fetch when available. cross-fetch's Node ponyfill +// returns a node-fetch Response whose body is a Node Readable stream — that +// lacks `.getReader()`, which this adapter needs for SSE parsing. Native fetch +// in Node 18+ / browsers / Bun returns a Web ReadableStream. +const fetch: typeof globalThis.fetch = + typeof globalThis.fetch === 'function' ? globalThis.fetch.bind(globalThis) : crossFetch; type JsonPrimitive = string | number | boolean | null; type JsonValue = JsonPrimitive | JsonObject | JsonValue[]; From 3f5edbd7a0c0d6ccb707101b12f0a8c0a588d7d7 Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 22:26:31 +0800 Subject: [PATCH 11/16] feat(react): add useChat hook package --- packages/react/README.md | 14 + packages/react/__tests__/tsconfig.json | 23 ++ packages/react/__tests__/use-chat.test.ts | 372 ++++++++++++++++++++++ packages/react/jest.config.js | 24 ++ packages/react/jest.environment.js | 27 ++ packages/react/package.json | 49 +++ packages/react/src/index.ts | 6 + packages/react/src/use-chat.ts | 239 ++++++++++++++ packages/react/tsconfig.esm.json | 7 + packages/react/tsconfig.json | 10 + 10 files changed, 771 insertions(+) create mode 100644 packages/react/README.md create mode 100644 packages/react/__tests__/tsconfig.json create mode 100644 packages/react/__tests__/use-chat.test.ts create mode 100644 packages/react/jest.config.js create mode 100644 packages/react/jest.environment.js create mode 100644 packages/react/package.json create mode 100644 packages/react/src/index.ts create mode 100644 packages/react/src/use-chat.ts create mode 100644 packages/react/tsconfig.esm.json create mode 100644 packages/react/tsconfig.json diff --git a/packages/react/README.md b/packages/react/README.md new file mode 100644 index 0000000..2bf993a --- /dev/null +++ b/packages/react/README.md @@ -0,0 +1,14 @@ +## @agentic-kit/react + +Headless React bindings for `agentic-kit`. + +Currently exposes a single hook: + +- `useChat({ api, body, initialMessages, onMessage, onFinish, onDecisionPending })` + POSTs `messages` to `api`, parses the SSE response into `AgentEvent`s, and + folds them into a `Message[]`. Surfaces pause/resume via `pendingDecision` + + `respondWithDecision(toolCallId, value)`. + +The hook ships no UI. State lives in messages — there is no separate run +store, no `runId`. Resumption after a tool decision re-POSTs to the same `api` +endpoint with the augmented message log. diff --git a/packages/react/__tests__/tsconfig.json b/packages/react/__tests__/tsconfig.json new file mode 100644 index 0000000..e4f9a38 --- /dev/null +++ b/packages/react/__tests__/tsconfig.json @@ -0,0 +1,23 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"], + "@agentic-kit/react": ["packages/react/src"] + }, + "types": ["jest", "node"] + }, + "include": [ + "./**/*.ts", + "./**/*.tsx", + "../src/**/*.ts", + "../src/**/*.tsx", + "../../../tools/test/**/*.ts" + ], + "exclude": ["../dist", "../node_modules"] +} diff --git a/packages/react/__tests__/use-chat.test.ts b/packages/react/__tests__/use-chat.test.ts new file mode 100644 index 0000000..dbceb17 --- /dev/null +++ b/packages/react/__tests__/use-chat.test.ts @@ -0,0 +1,372 @@ +import { act, renderHook, waitFor } from '@testing-library/react'; + +import type { AgentEvent } from '@agentic-kit/agent'; +import type { AssistantMessage, Message, UserMessage } from 'agentic-kit'; +import { createScriptedSSEResponse, makeFakeAssistantMessage } from '@test/index'; + +import { useChat } from '../src'; + +function streamFromEvents(events: AgentEvent[]): Response { + return createScriptedSSEResponse(events); +} + +function makeUser(content: string, timestamp = 1): UserMessage { + return { role: 'user', content, timestamp }; +} + +function makeFinalAssistant(text: string): AssistantMessage { + return makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text }], + }); +} + +function makePartialAssistant(text: string): AssistantMessage { + return makeFakeAssistantMessage({ + content: [{ type: 'text', text }], + }); +} + +function makeAssistantWithToolCall(): AssistantMessage { + return makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { + type: 'toolCall', + id: 'call_1', + name: 'echo', + arguments: { text: 'hi' }, + rawArguments: '{"text":"hi"}', + }, + ], + }); +} + +describe('useChat', () => { + it('hydrates messages from initialMessages', () => { + const initial: Message[] = [makeUser('hi')]; + const { result } = renderHook(() => useChat({ api: '/chat', initialMessages: initial })); + expect(result.current.messages).toEqual(initial); + }); + + it('sends, streams, and folds messages into the log', async () => { + const final = makeFinalAssistant('world'); + const userEcho = makeUser('hello'); + const fetchFn = jest.fn( + async (): Promise => + streamFromEvents([ + { type: 'agent_start' }, + { type: 'message_start', message: userEcho }, + { type: 'message_end', message: userEcho }, + { type: 'message_start', message: makePartialAssistant('') }, + { + type: 'message_update', + message: makePartialAssistant('wo'), + assistantMessageEvent: { + type: 'text_delta', + contentIndex: 0, + delta: 'wo', + partial: makePartialAssistant('wo'), + }, + }, + { type: 'message_end', message: final }, + { type: 'agent_end', messages: [userEcho, final] }, + ]) + ); + const onMessage = jest.fn(); + const onFinish = jest.fn(); + + const { result } = renderHook(() => + useChat({ api: '/chat', fetch: fetchFn, onMessage, onFinish }) + ); + + await act(async () => { + await result.current.send('hello'); + }); + + expect(result.current.messages).toMatchObject([ + { role: 'user', content: 'hello' }, + { role: 'assistant', content: [{ type: 'text', text: 'world' }] }, + ]); + expect(result.current.isStreaming).toBe(false); + expect(result.current.error).toBeUndefined(); + expect(onMessage).toHaveBeenCalledTimes(2); + expect(onFinish).toHaveBeenCalledWith( + expect.objectContaining({ role: 'assistant', content: [{ type: 'text', text: 'world' }] }) + ); + }); + + it('forwards body() fields and current messages in the POST body', async () => { + const final = makeFinalAssistant('ok'); + const fetchFn = jest.fn( + async (_url: RequestInfo | URL, _init?: RequestInit): Promise => + streamFromEvents([ + { type: 'agent_start' }, + { type: 'agent_end', messages: [makeUser('hi'), final] }, + ]) + ); + const body = jest.fn(() => ({ model: 'demo', sessionId: 'abc' })); + + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn, body })); + + await act(async () => { + await result.current.send('hi'); + }); + + expect(fetchFn).toHaveBeenCalledTimes(1); + expect(body).toHaveBeenCalledTimes(1); + const init = fetchFn.mock.calls[0][1] as RequestInit; + expect(init.method).toBe('POST'); + expect(init.headers).toMatchObject({ 'Content-Type': 'application/json' }); + const sent = JSON.parse(init.body as string); + expect(sent).toMatchObject({ + model: 'demo', + sessionId: 'abc', + messages: [{ role: 'user', content: 'hi' }], + }); + }); + + it('drops a malformed SSE event and continues processing valid ones', async () => { + // parseSSEStream silently ignores malformed JSON, so the hook never sees a + // bogus event but valid events on either side still flow through. + const final = makeFinalAssistant('survived'); + const encoder = new TextEncoder(); + const body = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode('data: {"type":"agent_start"}\n\n')); + controller.enqueue(encoder.encode('data: {garbage not json\n\n')); + controller.enqueue( + encoder.encode( + `data: ${JSON.stringify({ type: 'agent_end', messages: [makeUser('hi'), final] })}\n\n` + ) + ); + controller.close(); + }, + }); + const response = new Response(body, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }); + const fetchFn = jest.fn(async (): Promise => response); + + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn })); + + await act(async () => { + await result.current.send('hi'); + }); + + expect(result.current.error).toBeUndefined(); + expect(result.current.messages).toMatchObject([ + { role: 'user', content: 'hi' }, + { role: 'assistant', content: [{ type: 'text', text: 'survived' }] }, + ]); + }); + + describe('abort', () => { + it('cancels the in-flight request and clears isStreaming', async () => { + let signalCaptured: AbortSignal | undefined; + const fetchFn = jest.fn( + (_url: RequestInfo | URL, init?: RequestInit): Promise => { + signalCaptured = init?.signal ?? undefined; + return new Promise((_resolve, reject) => { + init?.signal?.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + }); + } + ); + + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn })); + + act(() => { + void result.current.send('hi'); + }); + + await waitFor(() => expect(fetchFn).toHaveBeenCalled()); + expect(result.current.isStreaming).toBe(true); + + act(() => { + result.current.abort(); + }); + + await waitFor(() => expect(result.current.isStreaming).toBe(false)); + expect(signalCaptured?.aborted).toBe(true); + expect(result.current.error).toBeUndefined(); + }); + + it('drops events that arrive after abort', async () => { + let pushFn!: (event: AgentEvent) => void; + let closeFn!: () => void; + const encoder = new TextEncoder(); + const body = new ReadableStream({ + start(controller) { + pushFn = (event) => + controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`)); + closeFn = () => controller.close(); + }, + }); + const fetchFn = jest.fn( + async (): Promise => + new Response(body, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }) + ); + + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn })); + + let sendPromise!: Promise; + act(() => { + sendPromise = result.current.send('hi'); + }); + await waitFor(() => expect(fetchFn).toHaveBeenCalled()); + + act(() => { + result.current.abort(); + }); + expect(result.current.isStreaming).toBe(false); + + // Push a late event after abort. The for-await loop should hit the + // `if (!isCurrent()) return;` guard and exit without folding it into + // state. Awaiting sendPromise is the real synchronization barrier: + // runStream resolves via the early return. + const lateAssistant = makeFinalAssistant('late'); + pushFn({ type: 'agent_end', messages: [makeUser('hi'), lateAssistant] }); + closeFn(); + await act(async () => { + await sendPromise; + }); + + expect(result.current.messages).toMatchObject([{ role: 'user', content: 'hi' }]); + expect(result.current.messages).toHaveLength(1); + }); + }); + + describe('respondWithDecision', () => { + it('attaches the decision and re-POSTs with the augmented log', async () => { + const assistantWithToolCall = makeAssistantWithToolCall(); + const final = makeFinalAssistant('done'); + const userEcho = makeUser('hi'); + + const fetchFn = jest.fn( + async (_url: RequestInfo | URL, _init?: RequestInit): Promise => + streamFromEvents([ + { type: 'agent_start' }, + { type: 'message_start', message: userEcho }, + { type: 'message_end', message: userEcho }, + { type: 'message_start', message: assistantWithToolCall }, + { type: 'message_end', message: assistantWithToolCall }, + { + type: 'tool_decision_pending', + toolCallId: 'call_1', + toolName: 'echo', + input: { text: 'hi' }, + schema: { type: 'object' }, + }, + ]) + ); + + const onDecisionPending = jest.fn(); + const { result } = renderHook(() => + useChat({ api: '/chat', fetch: fetchFn, onDecisionPending }) + ); + + await act(async () => { + await result.current.send('hi'); + }); + + expect(onDecisionPending).toHaveBeenCalledWith( + expect.objectContaining({ toolCallId: 'call_1', toolName: 'echo' }) + ); + expect(result.current.pendingDecision).toMatchObject({ toolCallId: 'call_1' }); + // Pause = stream ended, hook idle, awaiting a decision. + expect(result.current.isStreaming).toBe(false); + + const resumedAssistant: AssistantMessage = { + ...assistantWithToolCall, + content: [ + { + type: 'toolCall', + id: 'call_1', + name: 'echo', + arguments: { text: 'hi' }, + rawArguments: '{"text":"hi"}', + decision: 'allow', + }, + ], + }; + const toolResult: Message = { + role: 'toolResult', + toolCallId: 'call_1', + toolName: 'echo', + content: [{ type: 'text', text: 'hi' }], + isError: false, + timestamp: 2, + }; + + fetchFn.mockImplementationOnce( + async (_url: RequestInfo | URL, _init?: RequestInit): Promise => + streamFromEvents([ + { type: 'agent_start' }, + { + type: 'agent_end', + messages: [userEcho, resumedAssistant, toolResult, final], + }, + ]) + ); + + await act(async () => { + await result.current.respondWithDecision('call_1', 'allow'); + }); + + expect(fetchFn).toHaveBeenCalledTimes(2); + const secondInit = fetchFn.mock.calls[1][1] as RequestInit; + const sent = JSON.parse(secondInit.body as string); + expect(sent.messages).toHaveLength(2); + expect(sent.messages[1].content[0]).toMatchObject({ + type: 'toolCall', + id: 'call_1', + decision: 'allow', + }); + + expect(result.current.messages).toHaveLength(4); + expect(result.current.pendingDecision).toBeUndefined(); + expect(result.current.isStreaming).toBe(false); + }); + }); + + describe('error handling', () => { + it('sets error on a non-200 response', async () => { + const fetchFn = jest.fn( + async (): Promise => + new Response('boom', { status: 500, statusText: 'Internal Server Error' }) + ); + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn })); + + await act(async () => { + await result.current.send('hi'); + }); + + expect(result.current.error).toEqual(new Error('HTTP 500: Internal Server Error')); + expect(result.current.isStreaming).toBe(false); + expect(result.current.messages).toMatchObject([{ role: 'user', content: 'hi' }]); + }); + + it('sets error on a network failure', async () => { + const fetchFn = jest.fn(async (): Promise => { + throw new Error('network down'); + }); + const { result } = renderHook(() => useChat({ api: '/chat', fetch: fetchFn })); + + await act(async () => { + await result.current.send('hi'); + }); + + expect(result.current.error).toEqual(new Error('network down')); + expect(result.current.isStreaming).toBe(false); + expect(result.current.messages).toMatchObject([{ role: 'user', content: 'hi' }]); + }); + }); +}); diff --git a/packages/react/jest.config.js b/packages/react/jest.config.js new file mode 100644 index 0000000..2f030f4 --- /dev/null +++ b/packages/react/jest.config.js @@ -0,0 +1,24 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: '/jest.environment.js', + transform: { + '^.+\\.tsx?$': [ + 'ts-jest', + { + babelConfig: false, + tsconfig: '__tests__/tsconfig.json', + }, + ], + }, + transformIgnorePatterns: ['/node_modules/*'], + testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', + moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], + modulePathIgnorePatterns: ['dist/*'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, +}; diff --git a/packages/react/jest.environment.js b/packages/react/jest.environment.js new file mode 100644 index 0000000..5f4e56e --- /dev/null +++ b/packages/react/jest.environment.js @@ -0,0 +1,27 @@ +/* eslint-disable @typescript-eslint/no-require-imports */ +const JSDOMEnvironment = require('jest-environment-jsdom').default; +const { ReadableStream, TransformStream, WritableStream } = require('node:stream/web'); +const { TextEncoder, TextDecoder } = require('node:util'); + +class WebJSDOMEnvironment extends JSDOMEnvironment { + constructor(config, context) { + super(config, context); + Object.assign(this.global, { + TextEncoder, + TextDecoder, + ReadableStream, + TransformStream, + WritableStream, + fetch, + Response, + Request, + Headers, + FormData, + Blob, + AbortController, + AbortSignal, + }); + } +} + +module.exports = WebJSDOMEnvironment; diff --git a/packages/react/package.json b/packages/react/package.json new file mode 100644 index 0000000..39963c1 --- /dev/null +++ b/packages/react/package.json @@ -0,0 +1,49 @@ +{ + "name": "@agentic-kit/react", + "version": "0.1.0", + "author": "Dan Lynch ", + "description": "React bindings for agentic-kit (useChat hook)", + "main": "index.js", + "module": "esm/index.js", + "types": "index.d.ts", + "homepage": "https://github.com/constructive-io/agentic-kit", + "license": "SEE LICENSE IN LICENSE", + "publishConfig": { + "access": "public", + "directory": "dist" + }, + "repository": { + "type": "git", + "url": "https://github.com/constructive-io/agentic-kit" + }, + "bugs": { + "url": "https://github.com/constructive-io/agentic-kit/issues" + }, + "scripts": { + "clean": "makage clean", + "prepack": "npm run build", + "build": "makage build", + "build:dev": "makage build --dev", + "lint": "eslint . --fix", + "test": "jest", + "test:watch": "jest --watch" + }, + "dependencies": { + "@agentic-kit/agent": "workspace:*", + "agentic-kit": "workspace:*" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + }, + "devDependencies": { + "@testing-library/dom": "10.4.1", + "@testing-library/react": "16.3.2", + "@types/react": "19.2.14", + "@types/react-dom": "19.2.3", + "jest-environment-jsdom": "^29.7.0", + "react": "19.2.5", + "react-dom": "19.2.5" + }, + "keywords": [] +} diff --git a/packages/react/src/index.ts b/packages/react/src/index.ts new file mode 100644 index 0000000..525368e --- /dev/null +++ b/packages/react/src/index.ts @@ -0,0 +1,6 @@ +export { + type ToolDecisionPendingEvent, + type UseChatOptions, + type UseChatResult, + useChat, +} from './use-chat.js'; diff --git a/packages/react/src/use-chat.ts b/packages/react/src/use-chat.ts new file mode 100644 index 0000000..1c8a632 --- /dev/null +++ b/packages/react/src/use-chat.ts @@ -0,0 +1,239 @@ +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; + +import type { AgentEvent } from '@agentic-kit/agent'; +import { parseSSEStream } from '@agentic-kit/agent'; +import type { AssistantMessage, Message } from 'agentic-kit'; +import { createUserMessage } from 'agentic-kit'; + +export type ToolDecisionPendingEvent = Extract; + +export interface UseChatOptions { + api: string; + body?: () => Record; + initialMessages?: Message[]; + onMessage?: (message: Message) => void; + onFinish?: (message: AssistantMessage) => void; + onDecisionPending?: (event: ToolDecisionPendingEvent) => void; + fetch?: typeof globalThis.fetch; +} + +export interface UseChatResult { + messages: Message[]; + isStreaming: boolean; + pendingDecision: ToolDecisionPendingEvent | undefined; + error: unknown; + send: (input: string | Message) => Promise; + respondWithDecision: (toolCallId: string, value: unknown) => Promise; + abort: () => void; +} + +export function useChat(options: UseChatOptions): UseChatResult { + const [messages, setMessages] = useState(() => options.initialMessages ?? []); + const [isStreaming, setIsStreaming] = useState(false); + const [pendingDecision, setPendingDecision] = useState( + undefined + ); + const [error, setError] = useState(undefined); + + const messagesRef = useRef(messages); + useEffect(() => { + messagesRef.current = messages; + }, [messages]); + + const optionsRef = useRef(options); + useEffect(() => { + optionsRef.current = options; + }, [options]); + + const runIdRef = useRef(0); + const abortControllerRef = useRef(null); + + const runStream = useCallback( + async (requestMessages: Message[], optimisticUserMessage: Message | null): Promise => { + const opts = optionsRef.current; + const myRun = ++runIdRef.current; + + abortControllerRef.current?.abort(); + const controller = new AbortController(); + abortControllerRef.current = controller; + + const isCurrent = () => runIdRef.current === myRun; + + setIsStreaming(true); + setError(undefined); + setPendingDecision(undefined); + if (optimisticUserMessage) { + setMessages((prev) => [...prev, optimisticUserMessage]); + } + + let skipUserEcho = optimisticUserMessage !== null; + + const fetchFn = opts.fetch ?? globalThis.fetch; + const extraBody = opts.body?.() ?? {}; + + let response: Response; + try { + response = await fetchFn(opts.api, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ messages: requestMessages, ...extraBody }), + signal: controller.signal, + }); + } catch (err) { + if (!isCurrent()) return; + if (controller.signal.aborted) { + if (isCurrent()) setIsStreaming(false); + return; + } + setError(err); + setIsStreaming(false); + return; + } + + if (!isCurrent()) return; + + if (!response.ok) { + setError(new Error(`HTTP ${response.status}: ${response.statusText}`)); + setIsStreaming(false); + return; + } + + if (!response.body) { + setError(new Error('Response has no body')); + setIsStreaming(false); + return; + } + + try { + for await (const event of parseSSEStream(response.body)) { + if (!isCurrent()) return; + + switch (event.type) { + case 'message_start': { + if (skipUserEcho && event.message.role === 'user') { + skipUserEcho = false; + break; + } + setMessages((prev) => { + if (!isCurrent()) return prev; + return [...prev, event.message]; + }); + break; + } + case 'message_update': { + setMessages((prev) => { + if (!isCurrent()) return prev; + if (prev.length === 0) return prev; + const last = prev[prev.length - 1]; + if (last.role !== 'assistant') return prev; + return [...prev.slice(0, -1), event.message]; + }); + break; + } + case 'message_end': { + if (event.message.role === 'assistant') { + setMessages((prev) => { + if (!isCurrent()) return prev; + if (prev.length === 0) return [event.message]; + const last = prev[prev.length - 1]; + if (last.role === 'assistant') { + return [...prev.slice(0, -1), event.message]; + } + return [...prev, event.message]; + }); + } + opts.onMessage?.(event.message); + break; + } + case 'tool_decision_pending': { + setPendingDecision(event); + opts.onDecisionPending?.(event); + break; + } + case 'agent_end': { + setMessages(() => { + if (!isCurrent()) return messagesRef.current; + return event.messages; + }); + const lastAssistant = [...event.messages] + .reverse() + .find((m): m is AssistantMessage => m.role === 'assistant'); + if (lastAssistant) { + opts.onFinish?.(lastAssistant); + } + break; + } + } + } + } catch (err) { + if (!isCurrent()) return; + if (controller.signal.aborted) return; + setError(err); + } finally { + if (isCurrent()) { + setIsStreaming(false); + abortControllerRef.current = null; + } + } + }, + [] + ); + + const send = useCallback( + async (input: string | Message): Promise => { + const userMessage: Message = typeof input === 'string' ? createUserMessage(input) : input; + const requestMessages = [...messagesRef.current, userMessage]; + await runStream(requestMessages, userMessage); + }, + [runStream] + ); + + const respondWithDecision = useCallback( + async (toolCallId: string, value: unknown): Promise => { + const current = messagesRef.current; + if (current.length === 0) { + throw new Error('No messages to attach a decision to'); + } + const lastIdx = current.length - 1; + const last = current[lastIdx]; + if (last.role !== 'assistant') { + throw new Error('Last message is not an assistant message; cannot attach a decision'); + } + const updatedAssistant: AssistantMessage = { + ...last, + content: last.content.map((block) => { + if (block.type !== 'toolCall' || block.id !== toolCallId) { + return block; + } + return { ...block, decision: value }; + }), + }; + const requestMessages = [...current.slice(0, lastIdx), updatedAssistant]; + setMessages(requestMessages); + messagesRef.current = requestMessages; + setPendingDecision(undefined); + await runStream(requestMessages, null); + }, + [runStream] + ); + + const abort = useCallback(() => { + abortControllerRef.current?.abort(); + abortControllerRef.current = null; + runIdRef.current++; + setIsStreaming(false); + }, []); + + return useMemo( + () => ({ + messages, + isStreaming, + pendingDecision, + error, + send, + respondWithDecision, + abort, + }), + [messages, isStreaming, pendingDecision, error, send, respondWithDecision, abort] + ); +} diff --git a/packages/react/tsconfig.esm.json b/packages/react/tsconfig.esm.json new file mode 100644 index 0000000..624ab17 --- /dev/null +++ b/packages/react/tsconfig.esm.json @@ -0,0 +1,7 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "module": "es2022", + "outDir": "dist/esm" + } +} diff --git a/packages/react/tsconfig.json b/packages/react/tsconfig.json new file mode 100644 index 0000000..2391b80 --- /dev/null +++ b/packages/react/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "jsx": "react-jsx", + "lib": ["es2022", "dom"] + }, + "include": ["src/**/*.ts", "src/**/*.tsx"] +} From c6605dc67add38b328c1b640aa13cb60f34e069f Mon Sep 17 00:00:00 2001 From: luca Date: Mon, 4 May 2026 22:26:38 +0800 Subject: [PATCH 12/16] feat(app): add nextjs chat demo --- apps/nextjs-chat-demo/.env.example | 9 + apps/nextjs-chat-demo/.gitignore | 38 + apps/nextjs-chat-demo/README.md | 59 + apps/nextjs-chat-demo/next.config.mjs | 28 + apps/nextjs-chat-demo/package.json | 30 + apps/nextjs-chat-demo/postcss.config.mjs | 5 + .../src/app/api/chat/route.ts | 93 ++ apps/nextjs-chat-demo/src/app/globals.css | 9 + apps/nextjs-chat-demo/src/app/layout.tsx | 18 + apps/nextjs-chat-demo/src/app/page.tsx | 9 + .../src/components/chat-input.tsx | 59 + .../src/components/chat-messages.tsx | 154 +++ .../src/components/chat-panel.tsx | 72 ++ .../src/components/tool-approval-card.tsx | 37 + .../src/components/tool-call-card.tsx | 54 + apps/nextjs-chat-demo/src/lib/cn.ts | 6 + apps/nextjs-chat-demo/src/lib/tools.ts | 74 ++ apps/nextjs-chat-demo/tsconfig.json | 59 + pnpm-lock.yaml | 1099 ++++++++++++++++- 19 files changed, 1864 insertions(+), 48 deletions(-) create mode 100644 apps/nextjs-chat-demo/.env.example create mode 100644 apps/nextjs-chat-demo/.gitignore create mode 100644 apps/nextjs-chat-demo/README.md create mode 100644 apps/nextjs-chat-demo/next.config.mjs create mode 100644 apps/nextjs-chat-demo/package.json create mode 100644 apps/nextjs-chat-demo/postcss.config.mjs create mode 100644 apps/nextjs-chat-demo/src/app/api/chat/route.ts create mode 100644 apps/nextjs-chat-demo/src/app/globals.css create mode 100644 apps/nextjs-chat-demo/src/app/layout.tsx create mode 100644 apps/nextjs-chat-demo/src/app/page.tsx create mode 100644 apps/nextjs-chat-demo/src/components/chat-input.tsx create mode 100644 apps/nextjs-chat-demo/src/components/chat-messages.tsx create mode 100644 apps/nextjs-chat-demo/src/components/chat-panel.tsx create mode 100644 apps/nextjs-chat-demo/src/components/tool-approval-card.tsx create mode 100644 apps/nextjs-chat-demo/src/components/tool-call-card.tsx create mode 100644 apps/nextjs-chat-demo/src/lib/cn.ts create mode 100644 apps/nextjs-chat-demo/src/lib/tools.ts create mode 100644 apps/nextjs-chat-demo/tsconfig.json diff --git a/apps/nextjs-chat-demo/.env.example b/apps/nextjs-chat-demo/.env.example new file mode 100644 index 0000000..40082f5 --- /dev/null +++ b/apps/nextjs-chat-demo/.env.example @@ -0,0 +1,9 @@ +# Either OPENAI_* or LLM_* (the LLM_* convention is shared with the dashboard). +# OPENAI_* takes precedence if both are set. +OPENAI_API_KEY=sk-... +# OPENAI_BASE_URL=https://api.openai.com/v1 +# OPENAI_MODEL=gpt-5.4-mini + +# LLM_API_KEY=... +# LLM_BASE_URL=https://api.deepseek.com/v1 +# LLM_MODEL=deepseek-chat diff --git a/apps/nextjs-chat-demo/.gitignore b/apps/nextjs-chat-demo/.gitignore new file mode 100644 index 0000000..6bee4d1 --- /dev/null +++ b/apps/nextjs-chat-demo/.gitignore @@ -0,0 +1,38 @@ +# dependencies +node_modules +.pnp +.pnp.* +.yarn/* +!.yarn/patches +!.yarn/plugins +!.yarn/releases +!.yarn/versions + +# testing +coverage + +# next.js +.next/ +out/ +build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# env files +.env +.env*.local + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/apps/nextjs-chat-demo/README.md b/apps/nextjs-chat-demo/README.md new file mode 100644 index 0000000..54a25bf --- /dev/null +++ b/apps/nextjs-chat-demo/README.md @@ -0,0 +1,59 @@ +# nextjs-chat-demo + +A Next.js 15 demo proving `agentic-kit` can replace `@ai-sdk/react` for the +dashboard chatbot. Demonstrates: + +- streaming chat via `useChat` from `@agentic-kit/react` +- a plain server tool (`get_current_time`) +- a **pausable** server tool (`send_email`) — model proposes args, the UI shows + Allow / Deny, the answer is fed back in via `respondWithDecision`, and the + agent resumes server-side. + +## Run + +```bash +# from monorepo root +pnpm install + +# point the demo at OpenAI +export OPENAI_API_KEY=sk-... + +pnpm --filter nextjs-chat-demo dev +# open http://localhost:3001 +``` + +## AI SDK → agentic-kit migration map + +| Dashboard (AI SDK) | This demo (agentic-kit) | +| -------------------------------------------------- | -------------------------------------------------------- | +| `streamText` + `convertToModelMessages` | `Agent.prompt()` / `continue()` + `handle.toResponse()` | +| `tool({ needsApproval: true })` | `AgentTool.decision` JSON Schema | +| `addToolApprovalResponse({ id, approved })` | `respondWithDecision(toolCallId, value)` (auto re-POST) | +| `result.toUIMessageStreamResponse()` | `handle.toResponse()` | +| `useChat` from `@ai-sdk/react` | `useChat` from `@agentic-kit/react` | + +## Out of scope + +This demo deliberately does not port: + +- mentions / @-suggestions +- multi-slot queue (`messageQueue`, `isFullySettled`, `sendAutomaticallyWhen`) +- task queue UI (`plan_tasks`, `complete_task`, `approve_previous_tool`) +- ask vs agent modes, settings menu +- FAB + portal placement +- history dropdown + +These are dashboard UI sugar that sits on top of the SDK, not in it. + +## Workspace dep wiring + +`@agentic-kit/react`, `@agentic-kit/agent`, and `agentic-kit` packages declare +build outputs (`main: index.js`, `module: esm/index.js`) that don't exist on +disk in development. To consume them without a build step the demo combines: + +- `tsconfig.json` `paths` map to `../../packages/*/src/index.ts` +- `next.config.mjs` `transpilePackages` so SWC compiles the TS source +- `experimental.externalDir` so Next is happy reading from outside the app dir + +See [`PLAN.md`](./PLAN.md) for the full implementation plan and +[`GAPS.md`](./GAPS.md) for everything that felt rough to wire up. diff --git a/apps/nextjs-chat-demo/next.config.mjs b/apps/nextjs-chat-demo/next.config.mjs new file mode 100644 index 0000000..28ef9da --- /dev/null +++ b/apps/nextjs-chat-demo/next.config.mjs @@ -0,0 +1,28 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + reactStrictMode: true, + transpilePackages: [ + 'agentic-kit', + '@agentic-kit/agent', + '@agentic-kit/react', + '@agentic-kit/openai', + '@agentic-kit/anthropic', + '@agentic-kit/ollama', + ], + experimental: { + externalDir: true, + }, + webpack: (config) => { + // The agentic-kit packages are TS source with .js extension imports + // (`from './foo.js'`). webpack doesn't auto-rewrite those to .ts; we + // teach it to fall back to the .ts source. + config.resolve.extensionAlias = { + ...(config.resolve.extensionAlias ?? {}), + '.js': ['.ts', '.tsx', '.js'], + '.mjs': ['.mts', '.mjs'], + }; + return config; + }, +}; + +export default nextConfig; diff --git a/apps/nextjs-chat-demo/package.json b/apps/nextjs-chat-demo/package.json new file mode 100644 index 0000000..529dc21 --- /dev/null +++ b/apps/nextjs-chat-demo/package.json @@ -0,0 +1,30 @@ +{ + "name": "nextjs-chat-demo", + "version": "0.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "next dev --port 3001", + "start": "next start --port 3001", + "lint": "next lint" + }, + "dependencies": { + "@agentic-kit/agent": "workspace:*", + "@agentic-kit/openai": "workspace:*", + "@agentic-kit/react": "workspace:*", + "agentic-kit": "workspace:*", + "clsx": "^2.1.1", + "next": "15.0.4", + "react": "19.0.0", + "react-dom": "19.0.0", + "tailwind-merge": "^3.5.0" + }, + "devDependencies": { + "@tailwindcss/postcss": "^4.1.18", + "@types/node": "^22.10.2", + "@types/react": "19.0.0", + "@types/react-dom": "19.0.0", + "tailwindcss": "^4.1.18", + "typescript": "^5.7.2" + } +} diff --git a/apps/nextjs-chat-demo/postcss.config.mjs b/apps/nextjs-chat-demo/postcss.config.mjs new file mode 100644 index 0000000..a34a3d5 --- /dev/null +++ b/apps/nextjs-chat-demo/postcss.config.mjs @@ -0,0 +1,5 @@ +export default { + plugins: { + '@tailwindcss/postcss': {}, + }, +}; diff --git a/apps/nextjs-chat-demo/src/app/api/chat/route.ts b/apps/nextjs-chat-demo/src/app/api/chat/route.ts new file mode 100644 index 0000000..bf65f13 --- /dev/null +++ b/apps/nextjs-chat-demo/src/app/api/chat/route.ts @@ -0,0 +1,93 @@ +import { Agent } from '@agentic-kit/agent'; +import { OpenAIAdapter } from '@agentic-kit/openai'; +import type { Message } from 'agentic-kit'; + +import { tools } from '@/lib/tools'; + +export const runtime = 'nodejs'; +export const dynamic = 'force-dynamic'; + +const SYSTEM_PROMPT = [ + 'You are a friendly assistant in a chat-app demo.', + 'You have two tools available:', + '- get_current_time(timezone?): returns the current time in the requested IANA timezone.', + '- send_email(to, subject, body): drafts an email. The user must approve before it is sent.', + 'When the user asks for the current time anywhere, call get_current_time.', + 'When the user asks you to send an email, call send_email exactly once and wait for the user decision.', + 'Keep replies short.', +].join('\n'); + +interface RequestBody { + messages: Message[]; +} + +function lastMessageHasPendingDecision(messages: Message[]): boolean { + const last = messages[messages.length - 1]; + if (!last || last.role !== 'assistant') return false; + const completedToolCallIds = new Set( + messages + .filter((m): m is Extract => m.role === 'toolResult') + .map((m) => m.toolCallId) + ); + return last.content.some( + (block) => + block.type === 'toolCall' && + !completedToolCallIds.has(block.id) && + 'decision' in block && + block.decision !== undefined + ); +} + +export async function POST(req: Request): Promise { + const apiKey = process.env.OPENAI_API_KEY ?? process.env.LLM_API_KEY; + const baseUrl = + process.env.OPENAI_BASE_URL ?? process.env.LLM_BASE_URL ?? 'https://api.openai.com/v1'; + const modelId = process.env.OPENAI_MODEL ?? process.env.LLM_MODEL ?? 'gpt-5.4-mini'; + + if (!apiKey) { + return new Response('OPENAI_API_KEY (or LLM_API_KEY) is not set on the server', { + status: 500, + }); + } + + let body: RequestBody; + try { + body = (await req.json()) as RequestBody; + } catch { + return new Response('Invalid JSON body', { status: 400 }); + } + + const messages = Array.isArray(body.messages) ? body.messages : []; + if (messages.length === 0) { + return new Response('Empty messages', { status: 400 }); + } + + const adapter = new OpenAIAdapter({ apiKey, baseUrl }); + const model = adapter.createModel(modelId); + + const agent = new Agent({ + initialState: { model, tools, systemPrompt: SYSTEM_PROMPT }, + streamFn: (m, ctx, opts) => adapter.stream(m, ctx, opts), + }); + + const isResume = lastMessageHasPendingDecision(messages); + + if (isResume) { + agent.replaceMessages(messages); + try { + const handle = agent.continue(); + return handle.toResponse(); + } catch (err) { + return new Response(`continue() failed: ${(err as Error).message}`, { status: 400 }); + } + } + + const last = messages[messages.length - 1]; + if (last.role !== 'user') { + return new Response('Last message must be a user message when not resuming', { status: 400 }); + } + + agent.replaceMessages(messages.slice(0, -1)); + const handle = agent.prompt(last); + return handle.toResponse(); +} diff --git a/apps/nextjs-chat-demo/src/app/globals.css b/apps/nextjs-chat-demo/src/app/globals.css new file mode 100644 index 0000000..ba8bba4 --- /dev/null +++ b/apps/nextjs-chat-demo/src/app/globals.css @@ -0,0 +1,9 @@ +@import "tailwindcss"; + +:root { + color-scheme: light dark; +} + +html, body { + height: 100%; +} diff --git a/apps/nextjs-chat-demo/src/app/layout.tsx b/apps/nextjs-chat-demo/src/app/layout.tsx new file mode 100644 index 0000000..60114c0 --- /dev/null +++ b/apps/nextjs-chat-demo/src/app/layout.tsx @@ -0,0 +1,18 @@ +import type { ReactNode } from 'react'; + +import './globals.css'; + +export const metadata = { + title: 'agentic-kit chat demo', + description: 'Next.js demo proving agentic-kit can replace AI SDK for the dashboard chatbot.', +}; + +export default function RootLayout({ children }: { children: ReactNode }) { + return ( + + + {children} + + + ); +} diff --git a/apps/nextjs-chat-demo/src/app/page.tsx b/apps/nextjs-chat-demo/src/app/page.tsx new file mode 100644 index 0000000..8d2be67 --- /dev/null +++ b/apps/nextjs-chat-demo/src/app/page.tsx @@ -0,0 +1,9 @@ +import { ChatPanel } from '@/components/chat-panel'; + +export default function Page() { + return ( +
+ +
+ ); +} diff --git a/apps/nextjs-chat-demo/src/components/chat-input.tsx b/apps/nextjs-chat-demo/src/components/chat-input.tsx new file mode 100644 index 0000000..89ebfb3 --- /dev/null +++ b/apps/nextjs-chat-demo/src/components/chat-input.tsx @@ -0,0 +1,59 @@ +'use client'; + +import { type KeyboardEvent, useState } from 'react'; + +import { cn } from '@/lib/cn'; + +interface ChatInputProps { + disabled?: boolean; + placeholder?: string; + onSend: (text: string) => void; +} + +export function ChatInput({ disabled, placeholder, onSend }: ChatInputProps) { + const [value, setValue] = useState(''); + + function submit() { + const text = value.trim(); + if (!text || disabled) return; + onSend(text); + setValue(''); + } + + function onKeyDown(e: KeyboardEvent) { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + submit(); + } + } + + return ( +
+