Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 33 additions & 4 deletions strands-ts/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ import { PluginRegistry } from '../plugins/registry.js'
import { SlidingWindowConversationManager } from '../conversation-manager/sliding-window-conversation-manager.js'
import { NullConversationManager } from '../conversation-manager/null-conversation-manager.js'
import { ConversationManager } from '../conversation-manager/conversation-manager.js'
import type { ContextManagerParam } from '../context-manager/context-manager.js'
import { resolveContextManager } from '../context-manager/context-manager.js'
import { HookRegistryImplementation } from '../hooks/registry.js'
import type { HookableEventConstructor, HookCallback, HookCallbackOptions, HookCleanup } from '../hooks/types.js'
import {
Expand Down Expand Up @@ -167,9 +169,24 @@ export type AgentConfig = {
* Defaults to true.
*/
printer?: boolean
/**
* Pre-composed context management strategy.
*
* - `"auto"`: enables tool result caching and proactive compression with defaults.
* - Object: fine-grained control over strategy, storage, caching, and compression settings.
* - `undefined` (default): no context management facade; use `conversationManager`
* and `plugins` directly.
*
* When set, takes priority over `conversationManager` — `NullConversationManager` is used.
*/
contextManager?: ContextManagerParam
/**
* Conversation manager for handling message history and context overflow.
* Defaults to SlidingWindowConversationManager with windowSize of 40.
*
* @remarks Pending deprecation — use `contextManager` instead. The `contextManager` parameter
* composes compression, tool result caching, and token estimation into a single
* configuration surface. This field will be deprecated in a future version.
*/
conversationManager?: ConversationManager
/**
Expand Down Expand Up @@ -331,14 +348,22 @@ export class Agent implements LocalAgent, InvokableAgent {
this.model = config?.model ?? new BedrockModel()
}

// Validate and assign conversation manager
let contextManagerPlugin: Plugin | undefined
if (config?.contextManager) {
contextManagerPlugin = resolveContextManager(config.contextManager, config.plugins)
}

// Validate and assign conversation manager.
// When contextManager is set, ContextCompression owns compression — use NullConversationManager.
if (this.model.stateful) {
if (config?.conversationManager) {
if (config?.conversationManager || config?.contextManager) {
throw new Error(
'Cannot use a conversationManager with a stateful model. The model manages conversation state server-side.'
'Cannot use a conversationManager or contextManager with a stateful model. The model manages conversation state server-side.'
)
}
this._conversationManager = new NullConversationManager()
} else if (contextManagerPlugin) {
Comment thread
lizradway marked this conversation as resolved.
this._conversationManager = new NullConversationManager()
} else {
this._conversationManager =
config?.conversationManager ?? new SlidingWindowConversationManager({ windowSize: 40 })
Expand Down Expand Up @@ -372,9 +397,12 @@ export class Agent implements LocalAgent, InvokableAgent {
// - Retry-strategy ordering is not load-bearing for correctness: `DefaultModelRetryStrategy`
// guards on `event.retry`, so a user hook that already set it short-circuits
// the strategy regardless of registration order.
// - contextManager plugin goes before user plugins so the offloader's AfterToolCallEvent
// hook fires first, ensuring large results are cached before user hooks see the event.
this._pluginRegistry = new PluginRegistry([
this._conversationManager,
...retryStrategies,
...(contextManagerPlugin ? [contextManagerPlugin] : []),
...(config?.plugins ?? []),
...(config?.sessionManager ? [config.sessionManager] : []),
new ModelPlugin(this.model),
Expand Down Expand Up @@ -1397,7 +1425,8 @@ export class Agent implements LocalAgent, InvokableAgent {

let attemptCount = 1
while (true) {
// Estimate input tokens for the upcoming model call (non-fatal if estimation fails)
// Pending deprecation: token estimation will move fully to ContextManager.
// This remains for backward compat with standalone ConversationManager.proactiveCompression.
let projectedInputTokens: number | undefined
try {
projectedInputTokens = await this._estimateInputTokens(streamOptions)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { describe, it, expect } from 'vitest'
import { isProtected, pinMessage } from '../protection.js'
import { Message, TextBlock, ToolUseBlock, ToolResultBlock } from '../../../types/messages.js'

function userMsg(text: string): Message {
return new Message({ role: 'user', content: [new TextBlock(text)] })
}

function assistantMsg(text: string): Message {
return new Message({ role: 'assistant', content: [new TextBlock(text)] })
}

function toolUseMsg(toolUseId: string): Message {
return new Message({ role: 'assistant', content: [new ToolUseBlock({ toolUseId, name: 'test', input: {} })] })
}

function toolResultMsg(toolUseId: string): Message {
return new Message({
role: 'user',
content: [new ToolResultBlock({ toolUseId, content: [new TextBlock('result')], status: 'success' })],
})
}

describe('isProtected', () => {
describe('no range, no pin', () => {
it('returns false for unprotected message', () => {
const messages = [userMsg('a'), assistantMsg('b')]
expect(isProtected(messages, 0)).toBe(false)
expect(isProtected(messages, 1)).toBe(false)
})
})

describe('positive range (protect first N)', () => {
it('protects messages within range', () => {
const messages = [userMsg('a'), assistantMsg('b'), userMsg('c')]
expect(isProtected(messages, 0, 2)).toBe(true)
expect(isProtected(messages, 1, 2)).toBe(true)
expect(isProtected(messages, 2, 2)).toBe(false)
})

it('protects toolUse outside range if its toolResult is inside range', () => {
const messages = [userMsg('task'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('next')]
// range=2 protects [0] and [1]. [2] is toolResult — check if toolUse at [1] being in range protects [2]
// Actually [2] is outside range. But [1] (toolUse) is in range, so [2] (toolResult, partner) should be protected.
expect(isProtected(messages, 2, 2)).toBe(true)
})

it('protects toolResult outside range if its toolUse is inside range', () => {
const messages = [toolUseMsg('t1'), toolResultMsg('t1'), userMsg('a'), assistantMsg('b')]
// range=1 protects [0] (toolUse). [1] (toolResult) is outside but partner is protected.
expect(isProtected(messages, 1, 1)).toBe(true)
})
})

describe('negative range (protect last N)', () => {
it('protects messages within range', () => {
const messages = [userMsg('a'), assistantMsg('b'), userMsg('c'), assistantMsg('d'), userMsg('e')]
expect(isProtected(messages, 0, -2)).toBe(false)
expect(isProtected(messages, 2, -2)).toBe(false)
expect(isProtected(messages, 3, -2)).toBe(true)
expect(isProtected(messages, 4, -2)).toBe(true)
})

it('protects toolUse outside range if its toolResult is inside range', () => {
const messages = [userMsg('a'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('b'), assistantMsg('c')]
// range=-3: protects [2], [3], [4]. toolUse at [1] is outside, but [2] (its toolResult) is in range.
expect(isProtected(messages, 1, -3)).toBe(true)
})
})

describe('pinned messages', () => {
it('protects pinned message regardless of range', () => {
const messages = [userMsg('a'), pinMessage(assistantMsg('pinned')), userMsg('c')]
expect(isProtected(messages, 1)).toBe(true)
expect(isProtected(messages, 1, 0)).toBe(true)
})

it('protects tool-pair partner of pinned message', () => {
const messages = [pinMessage(toolUseMsg('t1')), toolResultMsg('t1'), userMsg('a')]
expect(isProtected(messages, 1)).toBe(true) // toolResult partner of pinned toolUse
})
})
})
185 changes: 185 additions & 0 deletions strands-ts/src/context-manager/compression/context-compression.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import type { Plugin } from '../../plugins/plugin.js'
import type { LocalAgent } from '../../types/agent.js'
import type { Tool } from '../../tools/tool.js'
import type { Message } from '../../types/messages.js'
import type { Model } from '../../models/model.js'
import { AfterInvocationEvent, AfterModelCallEvent, BeforeModelCallEvent } from '../../hooks/events.js'
import { ContextWindowOverflowError } from '../../errors.js'
import { truncate } from './strategies/truncate.js'
import { summarize, type SummarizeOptions } from './strategies/summarize.js'
import { estimateInputTokens } from '../token-estimation/token-estimation.js'
import { logger } from '../../logging/logger.js'
import { warnOnce } from '../../logging/warn-once.js'

const DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
const DEFAULT_PROACTIVE_THRESHOLD = 0.7
const DEFAULT_WINDOW_SIZE = 40

export type CompressionMethod = 'truncate' | 'summarize'

type SharedCompressionOptions = {
/**
* Proactive compression before the model call.
* - `true`: compress when 70% of the context window is used (default threshold).
* - `{ threshold: number }`: compress at the specified ratio (0, 1].
* - `false`: disable proactive compression; only reactive overflow recovery is used.
* - Omitted: defaults to `true`.
*/
proactive?: boolean | { threshold: number }
/**
* Protect messages from eviction during reduction.
* Positive values protect the first N messages; negative values protect the last N.
*
* For agent-controlled pinning, use the `pinMessageTool` (agentic mode).
*/
protectedMessageRange?: number
}

export type TruncateCompressionConfig = SharedCompressionOptions & {
method?: 'truncate'
/** Maximum messages to keep after trimming. Defaults to 40. */
windowSize?: number
}

export type SummarizeCompressionConfig = SharedCompressionOptions & {
method: 'summarize'
/** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */
summaryRatio?: number
/** Minimum recent messages to preserve during summarization. Defaults to 10. */
preserveRecentMessages?: number
}

/**
* Compression configuration (discriminated union on `method`).
*
* @example
* ```typescript
* contextManager: { compression: true } // defaults (truncate)
* contextManager: { compression: 'summarize' } // strategy shorthand
* contextManager: { compression: { method: 'truncate', windowSize: 30 } } // full config
* contextManager: { compression: { method: 'summarize', summaryRatio: 0.5 } } // full config
* ```
*/
export type CompressionOptions = TruncateCompressionConfig | SummarizeCompressionConfig

/**
* Plugin that handles context compression — both proactive (before model call when
* threshold is exceeded) and reactive (after model call on overflow error).
*
* Delegates reduction to strategy functions (truncate or summarize).
*/
export class ContextCompression implements Plugin {
readonly name = 'strands:context-compression'

private readonly _proactiveThreshold: number | undefined
private readonly _method: CompressionMethod
private readonly _windowSize: number
private readonly _protectedMessageRange: number | undefined
private readonly _summarizeOptions: SummarizeOptions | undefined

constructor(config?: CompressionOptions) {
const proactive = config?.proactive ?? true
if (proactive === false) {
this._proactiveThreshold = undefined
} else if (proactive === true) {
this._proactiveThreshold = DEFAULT_PROACTIVE_THRESHOLD
} else {
if (proactive.threshold <= 0 || proactive.threshold > 1) {
throw new Error(
`proactive compression threshold must be between 0 (exclusive) and 1 (inclusive), got ${proactive.threshold}`
)
}
this._proactiveThreshold = proactive.threshold
}

this._method = config?.method ?? 'truncate'
this._protectedMessageRange = config?.protectedMessageRange

if (config?.method === 'summarize') {
this._windowSize = DEFAULT_WINDOW_SIZE
this._summarizeOptions = {
...(config.summaryRatio !== undefined && { summaryRatio: config.summaryRatio }),
...(config.preserveRecentMessages !== undefined && { preserveRecentMessages: config.preserveRecentMessages }),
}
} else {
this._windowSize = (config as TruncateCompressionConfig | undefined)?.windowSize ?? DEFAULT_WINDOW_SIZE
this._summarizeOptions = undefined
}
}

getTools(): Tool[] {
return []
}

initAgent(agent: LocalAgent): void {
// Reactive overflow recovery
agent.addHook(AfterModelCallEvent, async (event) => {
if (event.error instanceof ContextWindowOverflowError) {
if (await this._reduce(event.agent.messages, event.model)) {
event.retry = true
}
}
})

// Proactive compression
agent.addHook(BeforeModelCallEvent, async (event) => {
if (this._proactiveThreshold === undefined) {
return
}

let contextWindowLimit = event.model.getConfig().contextWindowLimit
if (contextWindowLimit === undefined) {
contextWindowLimit = DEFAULT_CONTEXT_WINDOW_LIMIT
warnOnce(
logger,
`context_compression | contextWindowLimit is not set on the model, using default of ${DEFAULT_CONTEXT_WINDOW_LIMIT} | set contextWindowLimit in your model config for accurate proactive compression`
)
}

const projectedInputTokens =
Comment thread
lizradway marked this conversation as resolved.
event.projectedInputTokens ?? (await estimateInputTokens(event.agent.messages, event.model))

if (projectedInputTokens === undefined) {
return
}

const ratio = projectedInputTokens / contextWindowLimit
if (ratio >= this._proactiveThreshold) {
logger.debug(
`projected_tokens=<${projectedInputTokens}>, limit=<${contextWindowLimit}>, ratio=<${ratio.toFixed(2)}>, threshold=<${this._proactiveThreshold}> | compression threshold exceeded, reducing context`
)
try {
await this._reduce(event.agent.messages, event.model)
} catch (e) {
logger.warn(`context_compression | proactive compression failed, continuing | error=<${e}>`)
}
}
})

// Sliding window enforcement after each invocation (truncate method only)
if (this._method === 'truncate') {
agent.addHook(AfterInvocationEvent, (event) => {
if (event.agent.messages.length > this._windowSize) {
truncate(event.agent.messages, this._windowSize, {
...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
})
}
})
}
}

private async _reduce(messages: Message[], model: Model): Promise<boolean> {
switch (this._method) {
case 'summarize':
return summarize(messages, model, {
...this._summarizeOptions,
...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
})
case 'truncate':
default:
return truncate(messages, this._windowSize, {
...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
})
}
}
}
Loading
Loading