From 9f446057db6b22200aa0dfe0623b085b4c460da6 Mon Sep 17 00:00:00 2001 From: Liz <91279165+lizradway@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:59:56 -0400 Subject: [PATCH] feat(context): add context manager class --- package-lock.json | 1 - strands-ts/src/agent/agent.ts | 37 +++- .../compression/__tests__/protection.test.ts | 83 ++++++++ .../compression/context-compression.ts | 185 +++++++++++++++++ .../context-manager/compression/protection.ts | 138 +++++++++++++ .../compression/strategies/summarize.ts | 140 +++++++++++++ .../compression/strategies/truncate.ts | 85 ++++++++ .../src/context-manager/context-manager.ts | 190 ++++++++++++++++++ .../token-estimation/token-estimation.ts | 31 +++ .../tool-result-cache/tool-result-cache.ts | 7 + strands-ts/src/index.ts | 16 ++ 11 files changed, 908 insertions(+), 5 deletions(-) create mode 100644 strands-ts/src/context-manager/compression/__tests__/protection.test.ts create mode 100644 strands-ts/src/context-manager/compression/context-compression.ts create mode 100644 strands-ts/src/context-manager/compression/protection.ts create mode 100644 strands-ts/src/context-manager/compression/strategies/summarize.ts create mode 100644 strands-ts/src/context-manager/compression/strategies/truncate.ts create mode 100644 strands-ts/src/context-manager/context-manager.ts create mode 100644 strands-ts/src/context-manager/token-estimation/token-estimation.ts create mode 100644 strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts diff --git a/package-lock.json b/package-lock.json index ab84750f0..db73996d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8120,7 +8120,6 @@ "os": [ "darwin" ], - "peer": true, "engines": { "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } diff --git a/strands-ts/src/agent/agent.ts b/strands-ts/src/agent/agent.ts index 7fb3d6484..c3ccfecca 100644 --- a/strands-ts/src/agent/agent.ts +++ b/strands-ts/src/agent/agent.ts @@ -44,6 +44,8 @@ import { PluginRegistry } from '../plugins/registry.js' import { SlidingWindowConversationManager } from '../conversation-manager/sliding-window-conversation-manager.js' import { NullConversationManager } from '../conversation-manager/null-conversation-manager.js' import { ConversationManager } from '../conversation-manager/conversation-manager.js' +import type { ContextManagerParam } from '../context-manager/context-manager.js' +import { resolveContextManager } from '../context-manager/context-manager.js' import { HookRegistryImplementation } from '../hooks/registry.js' import type { HookableEventConstructor, HookCallback, HookCallbackOptions, HookCleanup } from '../hooks/types.js' import { @@ -167,9 +169,24 @@ export type AgentConfig = { * Defaults to true. */ printer?: boolean + /** + * Pre-composed context management strategy. + * + * - `"auto"`: enables tool result caching and proactive compression with defaults. + * - Object: fine-grained control over strategy, storage, caching, and compression settings. + * - `undefined` (default): no context management facade; use `conversationManager` + * and `plugins` directly. + * + * When set, takes priority over `conversationManager` — `NullConversationManager` is used. + */ + contextManager?: ContextManagerParam /** * Conversation manager for handling message history and context overflow. * Defaults to SlidingWindowConversationManager with windowSize of 40. + * + * @remarks Pending deprecation — use `contextManager` instead. The `contextManager` parameter + * composes compression, tool result caching, and token estimation into a single + * configuration surface. This field will be deprecated in a future version. */ conversationManager?: ConversationManager /** @@ -331,14 +348,22 @@ export class Agent implements LocalAgent, InvokableAgent { this.model = config?.model ?? new BedrockModel() } - // Validate and assign conversation manager + let contextManagerPlugin: Plugin | undefined + if (config?.contextManager) { + contextManagerPlugin = resolveContextManager(config.contextManager, config.plugins) + } + + // Validate and assign conversation manager. + // When contextManager is set, ContextCompression owns compression — use NullConversationManager. if (this.model.stateful) { - if (config?.conversationManager) { + if (config?.conversationManager || config?.contextManager) { throw new Error( - 'Cannot use a conversationManager with a stateful model. The model manages conversation state server-side.' + 'Cannot use a conversationManager or contextManager with a stateful model. The model manages conversation state server-side.' ) } this._conversationManager = new NullConversationManager() + } else if (contextManagerPlugin) { + this._conversationManager = new NullConversationManager() } else { this._conversationManager = config?.conversationManager ?? new SlidingWindowConversationManager({ windowSize: 40 }) @@ -372,9 +397,12 @@ export class Agent implements LocalAgent, InvokableAgent { // - Retry-strategy ordering is not load-bearing for correctness: `DefaultModelRetryStrategy` // guards on `event.retry`, so a user hook that already set it short-circuits // the strategy regardless of registration order. + // - contextManager plugin goes before user plugins so the offloader's AfterToolCallEvent + // hook fires first, ensuring large results are cached before user hooks see the event. this._pluginRegistry = new PluginRegistry([ this._conversationManager, ...retryStrategies, + ...(contextManagerPlugin ? [contextManagerPlugin] : []), ...(config?.plugins ?? []), ...(config?.sessionManager ? [config.sessionManager] : []), new ModelPlugin(this.model), @@ -1397,7 +1425,8 @@ export class Agent implements LocalAgent, InvokableAgent { let attemptCount = 1 while (true) { - // Estimate input tokens for the upcoming model call (non-fatal if estimation fails) + // Pending deprecation: token estimation will move fully to ContextManager. + // This remains for backward compat with standalone ConversationManager.proactiveCompression. let projectedInputTokens: number | undefined try { projectedInputTokens = await this._estimateInputTokens(streamOptions) diff --git a/strands-ts/src/context-manager/compression/__tests__/protection.test.ts b/strands-ts/src/context-manager/compression/__tests__/protection.test.ts new file mode 100644 index 000000000..311b5cce6 --- /dev/null +++ b/strands-ts/src/context-manager/compression/__tests__/protection.test.ts @@ -0,0 +1,83 @@ +import { describe, it, expect } from 'vitest' +import { isProtected, pinMessage } from '../protection.js' +import { Message, TextBlock, ToolUseBlock, ToolResultBlock } from '../../../types/messages.js' + +function userMsg(text: string): Message { + return new Message({ role: 'user', content: [new TextBlock(text)] }) +} + +function assistantMsg(text: string): Message { + return new Message({ role: 'assistant', content: [new TextBlock(text)] }) +} + +function toolUseMsg(toolUseId: string): Message { + return new Message({ role: 'assistant', content: [new ToolUseBlock({ toolUseId, name: 'test', input: {} })] }) +} + +function toolResultMsg(toolUseId: string): Message { + return new Message({ + role: 'user', + content: [new ToolResultBlock({ toolUseId, content: [new TextBlock('result')], status: 'success' })], + }) +} + +describe('isProtected', () => { + describe('no range, no pin', () => { + it('returns false for unprotected message', () => { + const messages = [userMsg('a'), assistantMsg('b')] + expect(isProtected(messages, 0)).toBe(false) + expect(isProtected(messages, 1)).toBe(false) + }) + }) + + describe('positive range (protect first N)', () => { + it('protects messages within range', () => { + const messages = [userMsg('a'), assistantMsg('b'), userMsg('c')] + expect(isProtected(messages, 0, 2)).toBe(true) + expect(isProtected(messages, 1, 2)).toBe(true) + expect(isProtected(messages, 2, 2)).toBe(false) + }) + + it('protects toolUse outside range if its toolResult is inside range', () => { + const messages = [userMsg('task'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('next')] + // range=2 protects [0] and [1]. [2] is toolResult — check if toolUse at [1] being in range protects [2] + // Actually [2] is outside range. But [1] (toolUse) is in range, so [2] (toolResult, partner) should be protected. + expect(isProtected(messages, 2, 2)).toBe(true) + }) + + it('protects toolResult outside range if its toolUse is inside range', () => { + const messages = [toolUseMsg('t1'), toolResultMsg('t1'), userMsg('a'), assistantMsg('b')] + // range=1 protects [0] (toolUse). [1] (toolResult) is outside but partner is protected. + expect(isProtected(messages, 1, 1)).toBe(true) + }) + }) + + describe('negative range (protect last N)', () => { + it('protects messages within range', () => { + const messages = [userMsg('a'), assistantMsg('b'), userMsg('c'), assistantMsg('d'), userMsg('e')] + expect(isProtected(messages, 0, -2)).toBe(false) + expect(isProtected(messages, 2, -2)).toBe(false) + expect(isProtected(messages, 3, -2)).toBe(true) + expect(isProtected(messages, 4, -2)).toBe(true) + }) + + it('protects toolUse outside range if its toolResult is inside range', () => { + const messages = [userMsg('a'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('b'), assistantMsg('c')] + // range=-3: protects [2], [3], [4]. toolUse at [1] is outside, but [2] (its toolResult) is in range. + expect(isProtected(messages, 1, -3)).toBe(true) + }) + }) + + describe('pinned messages', () => { + it('protects pinned message regardless of range', () => { + const messages = [userMsg('a'), pinMessage(assistantMsg('pinned')), userMsg('c')] + expect(isProtected(messages, 1)).toBe(true) + expect(isProtected(messages, 1, 0)).toBe(true) + }) + + it('protects tool-pair partner of pinned message', () => { + const messages = [pinMessage(toolUseMsg('t1')), toolResultMsg('t1'), userMsg('a')] + expect(isProtected(messages, 1)).toBe(true) // toolResult partner of pinned toolUse + }) + }) +}) diff --git a/strands-ts/src/context-manager/compression/context-compression.ts b/strands-ts/src/context-manager/compression/context-compression.ts new file mode 100644 index 000000000..4533d6206 --- /dev/null +++ b/strands-ts/src/context-manager/compression/context-compression.ts @@ -0,0 +1,185 @@ +import type { Plugin } from '../../plugins/plugin.js' +import type { LocalAgent } from '../../types/agent.js' +import type { Tool } from '../../tools/tool.js' +import type { Message } from '../../types/messages.js' +import type { Model } from '../../models/model.js' +import { AfterInvocationEvent, AfterModelCallEvent, BeforeModelCallEvent } from '../../hooks/events.js' +import { ContextWindowOverflowError } from '../../errors.js' +import { truncate } from './strategies/truncate.js' +import { summarize, type SummarizeOptions } from './strategies/summarize.js' +import { estimateInputTokens } from '../token-estimation/token-estimation.js' +import { logger } from '../../logging/logger.js' +import { warnOnce } from '../../logging/warn-once.js' + +const DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000 +const DEFAULT_PROACTIVE_THRESHOLD = 0.7 +const DEFAULT_WINDOW_SIZE = 40 + +export type CompressionMethod = 'truncate' | 'summarize' + +type SharedCompressionOptions = { + /** + * Proactive compression before the model call. + * - `true`: compress when 70% of the context window is used (default threshold). + * - `{ threshold: number }`: compress at the specified ratio (0, 1]. + * - `false`: disable proactive compression; only reactive overflow recovery is used. + * - Omitted: defaults to `true`. + */ + proactive?: boolean | { threshold: number } + /** + * Protect messages from eviction during reduction. + * Positive values protect the first N messages; negative values protect the last N. + * + * For agent-controlled pinning, use the `pinMessageTool` (agentic mode). + */ + protectedMessageRange?: number +} + +export type TruncateCompressionConfig = SharedCompressionOptions & { + method?: 'truncate' + /** Maximum messages to keep after trimming. Defaults to 40. */ + windowSize?: number +} + +export type SummarizeCompressionConfig = SharedCompressionOptions & { + method: 'summarize' + /** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */ + summaryRatio?: number + /** Minimum recent messages to preserve during summarization. Defaults to 10. */ + preserveRecentMessages?: number +} + +/** + * Compression configuration (discriminated union on `method`). + * + * @example + * ```typescript + * contextManager: { compression: true } // defaults (truncate) + * contextManager: { compression: 'summarize' } // strategy shorthand + * contextManager: { compression: { method: 'truncate', windowSize: 30 } } // full config + * contextManager: { compression: { method: 'summarize', summaryRatio: 0.5 } } // full config + * ``` + */ +export type CompressionOptions = TruncateCompressionConfig | SummarizeCompressionConfig + +/** + * Plugin that handles context compression — both proactive (before model call when + * threshold is exceeded) and reactive (after model call on overflow error). + * + * Delegates reduction to strategy functions (truncate or summarize). + */ +export class ContextCompression implements Plugin { + readonly name = 'strands:context-compression' + + private readonly _proactiveThreshold: number | undefined + private readonly _method: CompressionMethod + private readonly _windowSize: number + private readonly _protectedMessageRange: number | undefined + private readonly _summarizeOptions: SummarizeOptions | undefined + + constructor(config?: CompressionOptions) { + const proactive = config?.proactive ?? true + if (proactive === false) { + this._proactiveThreshold = undefined + } else if (proactive === true) { + this._proactiveThreshold = DEFAULT_PROACTIVE_THRESHOLD + } else { + if (proactive.threshold <= 0 || proactive.threshold > 1) { + throw new Error( + `proactive compression threshold must be between 0 (exclusive) and 1 (inclusive), got ${proactive.threshold}` + ) + } + this._proactiveThreshold = proactive.threshold + } + + this._method = config?.method ?? 'truncate' + this._protectedMessageRange = config?.protectedMessageRange + + if (config?.method === 'summarize') { + this._windowSize = DEFAULT_WINDOW_SIZE + this._summarizeOptions = { + ...(config.summaryRatio !== undefined && { summaryRatio: config.summaryRatio }), + ...(config.preserveRecentMessages !== undefined && { preserveRecentMessages: config.preserveRecentMessages }), + } + } else { + this._windowSize = (config as TruncateCompressionConfig | undefined)?.windowSize ?? DEFAULT_WINDOW_SIZE + this._summarizeOptions = undefined + } + } + + getTools(): Tool[] { + return [] + } + + initAgent(agent: LocalAgent): void { + // Reactive overflow recovery + agent.addHook(AfterModelCallEvent, async (event) => { + if (event.error instanceof ContextWindowOverflowError) { + if (await this._reduce(event.agent.messages, event.model)) { + event.retry = true + } + } + }) + + // Proactive compression + agent.addHook(BeforeModelCallEvent, async (event) => { + if (this._proactiveThreshold === undefined) { + return + } + + let contextWindowLimit = event.model.getConfig().contextWindowLimit + if (contextWindowLimit === undefined) { + contextWindowLimit = DEFAULT_CONTEXT_WINDOW_LIMIT + warnOnce( + logger, + `context_compression | contextWindowLimit is not set on the model, using default of ${DEFAULT_CONTEXT_WINDOW_LIMIT} | set contextWindowLimit in your model config for accurate proactive compression` + ) + } + + const projectedInputTokens = + event.projectedInputTokens ?? (await estimateInputTokens(event.agent.messages, event.model)) + + if (projectedInputTokens === undefined) { + return + } + + const ratio = projectedInputTokens / contextWindowLimit + if (ratio >= this._proactiveThreshold) { + logger.debug( + `projected_tokens=<${projectedInputTokens}>, limit=<${contextWindowLimit}>, ratio=<${ratio.toFixed(2)}>, threshold=<${this._proactiveThreshold}> | compression threshold exceeded, reducing context` + ) + try { + await this._reduce(event.agent.messages, event.model) + } catch (e) { + logger.warn(`context_compression | proactive compression failed, continuing | error=<${e}>`) + } + } + }) + + // Sliding window enforcement after each invocation (truncate method only) + if (this._method === 'truncate') { + agent.addHook(AfterInvocationEvent, (event) => { + if (event.agent.messages.length > this._windowSize) { + truncate(event.agent.messages, this._windowSize, { + ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }), + }) + } + }) + } + } + + private async _reduce(messages: Message[], model: Model): Promise { + switch (this._method) { + case 'summarize': + return summarize(messages, model, { + ...this._summarizeOptions, + ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }), + }) + case 'truncate': + default: + return truncate(messages, this._windowSize, { + ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }), + }) + } + } +} diff --git a/strands-ts/src/context-manager/compression/protection.ts b/strands-ts/src/context-manager/compression/protection.ts new file mode 100644 index 000000000..c736c49c3 --- /dev/null +++ b/strands-ts/src/context-manager/compression/protection.ts @@ -0,0 +1,138 @@ +import { z } from 'zod' +import { Message, type ToolUseBlock, type ToolResultBlock } from '../../types/messages.js' +import { tool } from '../../tools/tool-factory.js' + +// --- Pin utilities --- + +/** + * Check if a single message is pinned. + * + * @param message - The message to check + * @returns `true` if the message has `metadata.custom.pinned === true` + */ +export function isPinned(message: Message): boolean +/** + * Check if a message is pinned, including tool-pair partner protection. + * Returns `true` if the message at `index` is pinned, or if it is the + * adjacent tool-pair partner (toolUse/toolResult) of a pinned message, + * matched by toolUseId. + * + * @param messages - The full messages array + * @param index - The index to check + * @returns `true` if the message or its tool-pair partner is pinned + */ +export function isPinned(messages: Message[], index: number): boolean +export function isPinned(messageOrMessages: Message | Message[], index?: number): boolean { + if (index === undefined) { + return (messageOrMessages as Message).metadata?.custom?.pinned === true + } + + const messages = messageOrMessages as Message[] + const msg = messages[index]! + if (msg.metadata?.custom?.pinned === true) return true + + const toolResultBlocks = msg.content.filter((b): b is ToolResultBlock => b.type === 'toolResultBlock') + if (toolResultBlocks.length > 0 && index > 0) { + const prev = messages[index - 1]! + if (prev.metadata?.custom?.pinned === true) { + const resultIds = new Set(toolResultBlocks.map((b) => b.toolUseId)) + if (prev.content.some((b) => b.type === 'toolUseBlock' && resultIds.has((b as ToolUseBlock).toolUseId))) { + return true + } + } + } + + const toolUseBlocks = msg.content.filter((b): b is ToolUseBlock => b.type === 'toolUseBlock') + if (toolUseBlocks.length > 0 && index + 1 < messages.length) { + const next = messages[index + 1]! + if (next.metadata?.custom?.pinned === true) { + const useIds = new Set(toolUseBlocks.map((b) => b.toolUseId)) + if (next.content.some((b) => b.type === 'toolResultBlock' && useIds.has((b as ToolResultBlock).toolUseId))) { + return true + } + } + } + + return false +} + +/** + * Returns a new Message marked as pinned (protected from eviction during context reduction). + */ +export function pinMessage(message: Message): Message { + return new Message({ + role: message.role, + content: message.content, + metadata: { + ...message.metadata, + custom: { ...message.metadata?.custom, pinned: true }, + }, + }) +} + +/** + * Returns a new Message with pinning removed. + */ +export function unpinMessage(message: Message): Message { + const { pinned: _, ...restCustom } = message.metadata?.custom ?? {} + const { custom: __, ...restMetadata } = message.metadata ?? {} + const hasCustom = Object.keys(restCustom).length > 0 + const hasMetadata = hasCustom || Object.keys(restMetadata).length > 0 + const metadata = hasMetadata ? { ...restMetadata, ...(hasCustom ? { custom: restCustom } : {}) } : undefined + + return new Message({ + role: message.role, + content: message.content, + ...(metadata !== undefined ? { metadata } : {}), + }) +} + +/** + * Agent-invokable tool that pins or unpins a message in the conversation history. + */ +export const pinMessageTool = tool({ + name: 'pin_message', + description: + 'Pin or unpin a message in the conversation history. ' + + 'Pinned messages are protected from eviction during context reduction. ' + + 'Use this to preserve important context that should not be summarized or trimmed away.', + inputSchema: z.object({ + index: z.number().int().min(0).describe('The zero-based index of the message in the conversation history.'), + action: z.enum(['pin', 'unpin']).default('pin').describe('Whether to pin or unpin the message.'), + }), + callback: ({ index, action }, context) => { + const messages = context!.agent.messages + if (index >= messages.length) { + return `Invalid index ${index}. Conversation has ${messages.length} messages (indices 0-${messages.length - 1}).` + } + messages[index] = action === 'pin' ? pinMessage(messages[index]!) : unpinMessage(messages[index]!) + return `${action === 'pin' ? 'Pinned' : 'Unpinned'} message at index ${index}.` + }, +}) + +// --- Range + pin protection --- + +/** + * Check if a message at the given index is protected from eviction. + * A message is protected if it is pinned, within the protected range, + * or is a tool-pair partner of a range-protected message. + */ +export function isProtected(messages: Message[], index: number, range?: number): boolean { + if (isPinned(messages, index)) return true + if (range === undefined || range === 0) return false + if (inRange(messages.length, index, range)) return true + + const msg = messages[index]! + const hasToolResult = msg.content.some((b) => b.type === 'toolResultBlock') + if (hasToolResult && index > 0 && inRange(messages.length, index - 1, range)) return true + + const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock') + if (hasToolUse && index + 1 < messages.length && inRange(messages.length, index + 1, range)) return true + + return false +} + +function inRange(length: number, index: number, range: number): boolean { + if (range > 0) return index < range + return index >= length + range +} diff --git a/strands-ts/src/context-manager/compression/strategies/summarize.ts b/strands-ts/src/context-manager/compression/strategies/summarize.ts new file mode 100644 index 000000000..7aef57cc8 --- /dev/null +++ b/strands-ts/src/context-manager/compression/strategies/summarize.ts @@ -0,0 +1,140 @@ +import { Message, TextBlock } from '../../../types/messages.js' +import type { Model } from '../../../models/model.js' +import { isProtected } from '../protection.js' +import { logger } from '../../../logging/logger.js' + +const SUMMARIZATION_PROMPT = `You are a conversation summarizer. Provide a concise summary of the conversation history. + +Format Requirements: +- You MUST create a structured and concise summary in bullet-point format. +- You MUST NOT respond conversationally. +- You MUST NOT address the user directly. +- You MUST NOT comment on tool availability. + +Assumptions: +- You MUST NOT assume tool executions failed unless otherwise stated. + +Task: +Your task is to create a structured summary document: +- It MUST contain bullet points with key topics and questions covered +- It MUST contain bullet points for all significant tools executed and their results +- It MUST contain bullet points for any code or technical information shared +- It MUST contain a section of key insights gained +- It MUST format the summary in the third person + +Example format: + +## Conversation Summary +* Topic 1: Key information +* Topic 2: Key information + +## Tools Executed +* Tool X: Result Y` + +export type SummarizeOptions = { + /** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */ + summaryRatio?: number + /** Minimum recent messages to preserve. Defaults to 10. */ + preserveRecentMessages?: number + /** Positive: protect first N messages. Negative: protect last N messages. */ + protectedMessageRange?: number +} + +/** + * Summarize the oldest messages and replace them with a model-generated summary. + * + * @param messages - The messages array to mutate in place + * @param model - The model to use for generating the summary + * @param options - Summarization options + * @returns `true` if messages were summarized, `false` if not enough to summarize + */ +export async function summarize(messages: Message[], model: Model, options?: SummarizeOptions): Promise { + const summaryRatio = Math.max(0.1, Math.min(0.8, options?.summaryRatio ?? 0.3)) + const preserveRecent = options?.preserveRecentMessages ?? 10 + const protectedRange = options?.protectedMessageRange + + let count = Math.max(1, Math.floor(messages.length * summaryRatio)) + count = Math.min(count, messages.length - preserveRecent) + + if (count <= 0) { + logger.warn( + `preserve_recent=<${preserveRecent}>, messages=<${messages.length}> | insufficient messages for summarization` + ) + return false + } + + count = adjustSplitForToolPairs(messages, count) + + // Partition [0, count) into protected (preserve) and non-protected (summarize) + const protectedToPreserve: Message[] = [] + const toSummarize: Message[] = [] + for (let i = 0; i < count; i++) { + if (isProtected(messages, i, protectedRange)) { + protectedToPreserve.push(messages[i]!) + } else { + toSummarize.push(messages[i]!) + } + } + + if (toSummarize.length === 0) { + logger.warn(`messages=<${messages.length}> | all messages in summarize range are protected, unable to reduce`) + return false + } + + const summary = await generateSummary(toSummarize, model) + + // Replace summarized range with protected messages + summary + messages.splice(0, count, ...protectedToPreserve, summary) + return true +} + +async function generateSummary(messagesToSummarize: Message[], model: Model): Promise { + const input = [ + ...messagesToSummarize, + new Message({ role: 'user', content: [new TextBlock('Please summarize this conversation.')] }), + ] + + const stream = model.streamAggregated(input, { systemPrompt: SUMMARIZATION_PROMPT }) + + let result: Awaited> | undefined + for (;;) { + result = await stream.next() + if (result.done) break + } + + if (!result?.done || !result.value) { + throw new Error('Failed to generate summary: no response from model') + } + + return new Message({ role: 'user', content: result.value.message.content }) +} + +/** + * Adjust split point forward to avoid breaking tool use/result pairs. + */ +function adjustSplitForToolPairs(messages: Message[], splitPoint: number): number { + if (splitPoint >= messages.length) return splitPoint + + let idx = splitPoint + while (idx < messages.length) { + const msg = messages[idx]! + + if (msg.content.some((b) => b.type === 'toolResultBlock')) { + idx++ + continue + } + + const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock') + if (hasToolUse) { + const next = messages[idx + 1] + if (!next?.content.some((b) => b.type === 'toolResultBlock')) { + idx++ + continue + } + } + + break + } + + return idx >= messages.length ? splitPoint : idx +} diff --git a/strands-ts/src/context-manager/compression/strategies/truncate.ts b/strands-ts/src/context-manager/compression/strategies/truncate.ts new file mode 100644 index 000000000..ec082e60f --- /dev/null +++ b/strands-ts/src/context-manager/compression/strategies/truncate.ts @@ -0,0 +1,85 @@ +import type { Message } from '../../../types/messages.js' +import { isProtected } from '../protection.js' +import { logger } from '../../../logging/logger.js' + +export type TruncateOptions = { + /** Positive: protect first N messages. Negative: protect last N messages. */ + protectedMessageRange?: number +} + +/** + * Truncate oldest messages from the conversation, preserving tool use/result pairs. + * Protected messages (by range) are never removed. + * + * @param messages - The messages array to mutate in place + * @param windowSize - Maximum messages to keep + * @param options - Options including protectedMessageRange + * @returns `true` if messages were removed, `false` if no valid trim point found + */ +export function truncate(messages: Message[], windowSize: number, options?: TruncateOptions): boolean { + if (messages.length <= 2) return false + + const protectedRange = options?.protectedMessageRange + + let trimIndex = messages.length <= windowSize ? 2 : messages.length - windowSize + trimIndex = findValidTrimPoint(messages, trimIndex) + + if (trimIndex >= messages.length) { + logger.warn(`window_size=<${windowSize}>, messages=<${messages.length}> | unable to trim, no valid trim point`) + return false + } + + // Collect non-protected indices in [0, trimIndex) to remove + const indicesToRemove: number[] = [] + for (let i = 0; i < trimIndex; i++) { + if (isProtected(messages, i, protectedRange)) continue + indicesToRemove.push(i) + } + + if (indicesToRemove.length === 0) { + logger.warn( + `window_size=<${windowSize}>, messages=<${messages.length}> | all messages in trim range are protected, unable to reduce` + ) + return false + } + + // Remove in reverse order to keep indices stable + for (let i = indicesToRemove.length - 1; i >= 0; i--) { + messages.splice(indicesToRemove[i]!, 1) + } + return true +} + +/** + * Find a valid trim point starting from the given index. + * Skips positions that would leave orphaned toolResults or toolUse without a following toolResult. + */ +function findValidTrimPoint(messages: Message[], startIndex: number): number { + let idx = startIndex + while (idx < messages.length) { + const msg = messages[idx] + if (!msg) break + + if (msg.role !== 'user') { + idx++ + continue + } + + if (msg.content.some((b) => b.type === 'toolResultBlock')) { + idx++ + continue + } + + const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock') + if (hasToolUse) { + const next = messages[idx + 1] + if (!next || !next.content.some((b) => b.type === 'toolResultBlock')) { + idx++ + continue + } + } + + break + } + return idx +} diff --git a/strands-ts/src/context-manager/context-manager.ts b/strands-ts/src/context-manager/context-manager.ts new file mode 100644 index 000000000..c7c3a4c1e --- /dev/null +++ b/strands-ts/src/context-manager/context-manager.ts @@ -0,0 +1,190 @@ +import type { Storage } from '../vended-plugins/context-offloader/storage.js' +import type { Plugin } from '../plugins/plugin.js' +import type { Tool } from '../tools/tool.js' +import type { LocalAgent } from '../types/agent.js' +import { ContextCompression } from './compression/context-compression.js' +import { ContextOffloader } from '../vended-plugins/context-offloader/plugin.js' +import { InMemoryStorage } from '../vended-plugins/context-offloader/storage.js' + +export type ContextStrategyValue = 'auto' + +/** + * Configuration for the offloader component. + */ +export type OffloaderConfig = { + /** Token threshold above which tool results are offloaded. Defaults to 2500. */ + threshold?: number + /** Number of tokens to keep as an inline preview. Defaults to 500. */ + previewTokens?: number +} + +/** + * Compression configuration accepted by contextManager. + * - `true`: enable with defaults (truncate, proactive at 0.7). + * - `'truncate'` / `'summarize'`: enable specific strategy with defaults. + * - Object: full config with strategy and options. + * - Omitted: disabled. + */ +export type CompressionConfig = + | true + | import('./compression/context-compression.js').CompressionMethod + | import('./compression/context-compression.js').CompressionOptions + +/** + * Configuration accepted by the {@link ContextManager} constructor. + * + * Config objects are additive — only features you explicitly set are enabled. + * Use `"auto"` to enable everything with defaults. + */ +export type ContextManagerConfig = { + /** Strategy name. Only "auto" is supported currently. */ + strategy?: ContextStrategyValue + /** Storage backend for cached tool results. Defaults to InMemoryStorage. */ + storage?: Storage + /** + * Context offloader configuration. + * - `true`: enable with defaults (threshold=2500, previewTokens=500). + * - Object: enable with custom settings. + * - Omitted: disabled. + */ + offloader?: true | OffloaderConfig + /** + * Compression configuration. + * - `true`: enable with defaults (truncate, proactive at 0.7). + * - `'truncate'` / `'summarize'`: enable specific strategy with defaults. + * - `CompressionStrategy.Truncate(...)` / `CompressionStrategy.Summarize(...)`: full config. + * - Omitted: disabled. + */ + compression?: CompressionConfig +} + +/** + * The `contextManager` parameter type accepted by AgentConfig. + * + * - `"auto"`: enables everything with defaults. + * - `{ strategy: 'auto', ... }`: auto with overrides (omitted features stay enabled). + * - `{ compression: true }`: additive — only what you set is enabled. + * - `undefined` (default): no context management facade. + */ +export type ContextManagerParam = ContextStrategyValue | ContextManagerConfig + +/** + * Pre-composed context management for agents. + * + * Implements {@link Plugin} — registers hooks for token estimation and composes + * sub-plugins (ContextCompression, ContextOffloader) that handle the actual + * compression and caching behavior. + * + * @example + * ```typescript + * // Config shorthand (most users) + * const agent = new Agent({ contextManager: "auto" }) + * + * // Class instance (power users who need a handle) + * const cm = new ContextManager({ storage: new S3Storage("bucket") }) + * const agent = new Agent({ contextManager: cm }) + * cm.storage // direct access + * cm.budget // { used, limit, ratio } + * ``` + */ +export class ContextManager implements Plugin { + readonly name = 'strands:context-manager' + readonly storage: Storage + + private readonly _config: ContextManagerConfig + private _subPlugins: Plugin[] | undefined + + constructor(config?: ContextManagerConfig) { + this._config = config ?? {} + this.storage = this._config.storage ?? new InMemoryStorage() + } + + /** + * Resolve sub-plugins, skipping any that the user already provides. + * Called once before plugin initialization. + * @internal + */ + _resolveSubPlugins(userPlugins?: Plugin[]): void { + this._subPlugins = this._buildSubPlugins(userPlugins) + } + + getTools(): Tool[] { + const plugins = this._subPlugins ?? [] + const tools: Tool[] = [] + for (const plugin of plugins) { + if (plugin.getTools) { + tools.push(...plugin.getTools()) + } + } + return tools + } + + initAgent(agent: LocalAgent): void { + if (!this._subPlugins) { + this._subPlugins = this._buildSubPlugins() + } + + for (const plugin of this._subPlugins) { + plugin.initAgent(agent) + } + } + + private _buildSubPlugins(userPlugins?: Plugin[]): Plugin[] { + const config = this._config + const plugins: Plugin[] = [] + + if (config.compression) { + const userProvided = userPlugins?.some((p) => p.name === 'strands:context-compression') + if (!userProvided) { + let compressionConfig: import('./compression/context-compression.js').CompressionOptions | undefined + if (config.compression === true) { + compressionConfig = undefined + } else if (typeof config.compression === 'string') { + compressionConfig = { method: config.compression } + } else { + compressionConfig = config.compression + } + plugins.push(new ContextCompression(compressionConfig)) + } + } + + if (config.offloader) { + const userProvided = userPlugins?.some((p) => p.name === 'strands:context-offloader') + if (!userProvided) { + const offloaderConfig = config.offloader === true ? {} : config.offloader + plugins.push( + new ContextOffloader({ + storage: this.storage, + maxResultTokens: offloaderConfig.threshold ?? 2500, + previewTokens: offloaderConfig.previewTokens ?? 500, + includeRetrievalTool: true, + }) + ) + } + } + + return plugins + } +} + +/** + * Resolve a `contextManager` parameter into a ContextManager plugin instance. + * User-provided plugins that overlap with sub-plugins take precedence. + * + * @param param - The contextManager config (strategy string, config object, or class instance) + * @param userPlugins - User-provided plugins array, used for dedup checking + * @internal + */ +const STRATEGY_DEFAULTS = { + auto: { compression: true, offloader: true }, +} satisfies Record> + +export function resolveContextManager(param: ContextManagerParam, userPlugins?: Plugin[]): ContextManager { + const base = typeof param === 'string' ? { strategy: param } : param + const defaults = base.strategy ? STRATEGY_DEFAULTS[base.strategy] : undefined + const config = defaults ? { ...defaults, ...base } : base + + const instance = new ContextManager(config) + instance._resolveSubPlugins(userPlugins) + return instance +} diff --git a/strands-ts/src/context-manager/token-estimation/token-estimation.ts b/strands-ts/src/context-manager/token-estimation/token-estimation.ts new file mode 100644 index 000000000..05b3e9944 --- /dev/null +++ b/strands-ts/src/context-manager/token-estimation/token-estimation.ts @@ -0,0 +1,31 @@ +import type { Message } from '../../types/messages.js' +import type { Model } from '../../models/model.js' +import { logger } from '../../logging/logger.js' + +/** + * Estimate input tokens for a conversation. + * + * Uses an incremental strategy: if the last assistant message has usage metadata, + * uses (inputTokens + outputTokens) as a baseline and only counts new messages + * added after it. Falls back to full model estimation otherwise. + * + * @param messages - The conversation messages + * @param model - The model to use for token counting + * @returns Estimated token count, or undefined if estimation fails + */ +export async function estimateInputTokens(messages: Message[], model: Model): Promise { + try { + for (let i = messages.length - 1; i >= 0; i--) { + const usage = messages[i]!.metadata?.usage + if (messages[i]!.role === 'assistant' && usage) { + const baseline = usage.inputTokens + usage.outputTokens + const newMessages = messages.slice(i + 1) + return newMessages.length === 0 ? baseline : baseline + (await model.countTokens(newMessages)) + } + } + return await model.countTokens(messages) + } catch (e) { + logger.debug(`error=<${e}> | token estimation failed`) + return undefined + } +} diff --git a/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts b/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts new file mode 100644 index 000000000..90d1a33f2 --- /dev/null +++ b/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts @@ -0,0 +1,7 @@ +/** + * Re-exports from the canonical ContextOffloader location. + * The ContextManager composes ContextOffloader as its tool-result-cache sub-plugin. + */ +export { ContextOffloader, type ContextOffloaderConfig } from '../../vended-plugins/context-offloader/plugin.js' +export type { Storage } from '../../vended-plugins/context-offloader/storage.js' +export { InMemoryStorage, FileStorage, S3Storage } from '../../vended-plugins/context-offloader/storage.js' diff --git a/strands-ts/src/index.ts b/strands-ts/src/index.ts index 46b1022c0..b532fb549 100644 --- a/strands-ts/src/index.ts +++ b/strands-ts/src/index.ts @@ -19,6 +19,22 @@ export type { ToolCaller, ToolCallerProxy, ToolHandle, DirectToolCallOptions } f export type { InvocationState, InvokeArgs, InvokeOptions, LocalAgent } from './types/agent.js' export type { LifecycleObserver } from './types/lifecycle-observer.js' +// Context Manager +export type { + ContextManagerParam, + ContextManagerConfig, + ContextStrategyValue, + OffloaderConfig, + CompressionConfig, +} from './context-manager/context-manager.js' +export type { + CompressionOptions, + TruncateCompressionConfig, + SummarizeCompressionConfig, + CompressionMethod, +} from './context-manager/compression/context-compression.js' +export { pinMessageTool } from './context-manager/compression/protection.js' + // Snapshot types export { SNAPSHOT_SCHEMA_VERSION } from './types/snapshot.js' export type { Scope, Snapshot } from './types/snapshot.js'