strands-agents · lizradway · Jun 1, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/strands-ts/src/agent/agent.ts b/strands-ts/src/agent/agent.ts
@@ -44,6 +44,8 @@ import { PluginRegistry } from '../plugins/registry.js'
 import { SlidingWindowConversationManager } from '../conversation-manager/sliding-window-conversation-manager.js'
 import { NullConversationManager } from '../conversation-manager/null-conversation-manager.js'
 import { ConversationManager } from '../conversation-manager/conversation-manager.js'
+import type { ContextManagerParam } from '../context-manager/context-manager.js'
+import { resolveContextManager } from '../context-manager/context-manager.js'
 import { HookRegistryImplementation } from '../hooks/registry.js'
 import type { HookableEventConstructor, HookCallback, HookCallbackOptions, HookCleanup } from '../hooks/types.js'
 import {
@@ -167,9 +169,24 @@ export type AgentConfig = {
    * Defaults to true.
    */
   printer?: boolean
+  /**
+   * Pre-composed context management strategy.
+   *
+   * - `"auto"`: enables tool result caching and proactive compression with defaults.
+   * - Object: fine-grained control over strategy, storage, caching, and compression settings.
+   * - `undefined` (default): no context management facade; use `conversationManager`
+   *   and `plugins` directly.
+   *
+   * When set, takes priority over `conversationManager` — `NullConversationManager` is used.
+   */
+  contextManager?: ContextManagerParam
   /**
    * Conversation manager for handling message history and context overflow.
    * Defaults to SlidingWindowConversationManager with windowSize of 40.
+   *
+   * @remarks Pending deprecation — use `contextManager` instead. The `contextManager` parameter
+   * composes compression, tool result caching, and token estimation into a single
+   * configuration surface. This field will be deprecated in a future version.
    */
   conversationManager?: ConversationManager
   /**
@@ -331,14 +348,22 @@ export class Agent implements LocalAgent, InvokableAgent {
       this.model = config?.model ?? new BedrockModel()
     }
 
-    // Validate and assign conversation manager
+    let contextManagerPlugin: Plugin | undefined
+    if (config?.contextManager) {
+      contextManagerPlugin = resolveContextManager(config.contextManager, config.plugins)
+    }
+
+    // Validate and assign conversation manager.
+    // When contextManager is set, ContextCompression owns compression — use NullConversationManager.
     if (this.model.stateful) {
-      if (config?.conversationManager) {
+      if (config?.conversationManager || config?.contextManager) {
         throw new Error(
-          'Cannot use a conversationManager with a stateful model. The model manages conversation state server-side.'
+          'Cannot use a conversationManager or contextManager with a stateful model. The model manages conversation state server-side.'
         )
       }
       this._conversationManager = new NullConversationManager()
+    } else if (contextManagerPlugin) {
+      this._conversationManager = new NullConversationManager()
     } else {
       this._conversationManager =
         config?.conversationManager ?? new SlidingWindowConversationManager({ windowSize: 40 })
@@ -372,9 +397,12 @@ export class Agent implements LocalAgent, InvokableAgent {
     // - Retry-strategy ordering is not load-bearing for correctness: `DefaultModelRetryStrategy`
     //   guards on `event.retry`, so a user hook that already set it short-circuits
     //   the strategy regardless of registration order.
+    // - contextManager plugin goes before user plugins so the offloader's AfterToolCallEvent
+    //   hook fires first, ensuring large results are cached before user hooks see the event.
     this._pluginRegistry = new PluginRegistry([
       this._conversationManager,
       ...retryStrategies,
+      ...(contextManagerPlugin ? [contextManagerPlugin] : []),
       ...(config?.plugins ?? []),
       ...(config?.sessionManager ? [config.sessionManager] : []),
       new ModelPlugin(this.model),
@@ -1397,7 +1425,8 @@ export class Agent implements LocalAgent, InvokableAgent {
 
     let attemptCount = 1
     while (true) {
-      // Estimate input tokens for the upcoming model call (non-fatal if estimation fails)
+      // Pending deprecation: token estimation will move fully to ContextManager.
+      // This remains for backward compat with standalone ConversationManager.proactiveCompression.
       let projectedInputTokens: number | undefined
       try {
         projectedInputTokens = await this._estimateInputTokens(streamOptions)

diff --git a/strands-ts/src/context-manager/compression/__tests__/protection.test.ts b/strands-ts/src/context-manager/compression/__tests__/protection.test.ts
@@ -0,0 +1,83 @@
+import { describe, it, expect } from 'vitest'
+import { isProtected, pinMessage } from '../protection.js'
+import { Message, TextBlock, ToolUseBlock, ToolResultBlock } from '../../../types/messages.js'
+
+function userMsg(text: string): Message {
+  return new Message({ role: 'user', content: [new TextBlock(text)] })
+}
+
+function assistantMsg(text: string): Message {
+  return new Message({ role: 'assistant', content: [new TextBlock(text)] })
+}
+
+function toolUseMsg(toolUseId: string): Message {
+  return new Message({ role: 'assistant', content: [new ToolUseBlock({ toolUseId, name: 'test', input: {} })] })
+}
+
+function toolResultMsg(toolUseId: string): Message {
+  return new Message({
+    role: 'user',
+    content: [new ToolResultBlock({ toolUseId, content: [new TextBlock('result')], status: 'success' })],
+  })
+}
+
+describe('isProtected', () => {
+  describe('no range, no pin', () => {
+    it('returns false for unprotected message', () => {
+      const messages = [userMsg('a'), assistantMsg('b')]
+      expect(isProtected(messages, 0)).toBe(false)
+      expect(isProtected(messages, 1)).toBe(false)
+    })
+  })
+
+  describe('positive range (protect first N)', () => {
+    it('protects messages within range', () => {
+      const messages = [userMsg('a'), assistantMsg('b'), userMsg('c')]
+      expect(isProtected(messages, 0, 2)).toBe(true)
+      expect(isProtected(messages, 1, 2)).toBe(true)
+      expect(isProtected(messages, 2, 2)).toBe(false)
+    })
+
+    it('protects toolUse outside range if its toolResult is inside range', () => {
+      const messages = [userMsg('task'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('next')]
+      // range=2 protects [0] and [1]. [2] is toolResult — check if toolUse at [1] being in range protects [2]
+      // Actually [2] is outside range. But [1] (toolUse) is in range, so [2] (toolResult, partner) should be protected.
+      expect(isProtected(messages, 2, 2)).toBe(true)
+    })
+
+    it('protects toolResult outside range if its toolUse is inside range', () => {
+      const messages = [toolUseMsg('t1'), toolResultMsg('t1'), userMsg('a'), assistantMsg('b')]
+      // range=1 protects [0] (toolUse). [1] (toolResult) is outside but partner is protected.
+      expect(isProtected(messages, 1, 1)).toBe(true)
+    })
+  })
+
+  describe('negative range (protect last N)', () => {
+    it('protects messages within range', () => {
+      const messages = [userMsg('a'), assistantMsg('b'), userMsg('c'), assistantMsg('d'), userMsg('e')]
+      expect(isProtected(messages, 0, -2)).toBe(false)
+      expect(isProtected(messages, 2, -2)).toBe(false)
+      expect(isProtected(messages, 3, -2)).toBe(true)
+      expect(isProtected(messages, 4, -2)).toBe(true)
+    })
+
+    it('protects toolUse outside range if its toolResult is inside range', () => {
+      const messages = [userMsg('a'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('b'), assistantMsg('c')]
+      // range=-3: protects [2], [3], [4]. toolUse at [1] is outside, but [2] (its toolResult) is in range.
+      expect(isProtected(messages, 1, -3)).toBe(true)
+    })
+  })
+
+  describe('pinned messages', () => {
+    it('protects pinned message regardless of range', () => {
+      const messages = [userMsg('a'), pinMessage(assistantMsg('pinned')), userMsg('c')]
+      expect(isProtected(messages, 1)).toBe(true)
+      expect(isProtected(messages, 1, 0)).toBe(true)
+    })
+
+    it('protects tool-pair partner of pinned message', () => {
+      const messages = [pinMessage(toolUseMsg('t1')), toolResultMsg('t1'), userMsg('a')]
+      expect(isProtected(messages, 1)).toBe(true) // toolResult partner of pinned toolUse
+    })
+  })
+})
diff --git a/strands-ts/src/context-manager/compression/context-compression.ts b/strands-ts/src/context-manager/compression/context-compression.ts
@@ -0,0 +1,185 @@
+import type { Plugin } from '../../plugins/plugin.js'
+import type { LocalAgent } from '../../types/agent.js'
+import type { Tool } from '../../tools/tool.js'
+import type { Message } from '../../types/messages.js'
+import type { Model } from '../../models/model.js'
+import { AfterInvocationEvent, AfterModelCallEvent, BeforeModelCallEvent } from '../../hooks/events.js'
+import { ContextWindowOverflowError } from '../../errors.js'
+import { truncate } from './strategies/truncate.js'
+import { summarize, type SummarizeOptions } from './strategies/summarize.js'
+import { estimateInputTokens } from '../token-estimation/token-estimation.js'
+import { logger } from '../../logging/logger.js'
+import { warnOnce } from '../../logging/warn-once.js'
+
+const DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
+const DEFAULT_PROACTIVE_THRESHOLD = 0.7
+const DEFAULT_WINDOW_SIZE = 40
+
+export type CompressionMethod = 'truncate' | 'summarize'
+
+type SharedCompressionOptions = {
+  /**
+   * Proactive compression before the model call.
+   * - `true`: compress when 70% of the context window is used (default threshold).
+   * - `{ threshold: number }`: compress at the specified ratio (0, 1].
+   * - `false`: disable proactive compression; only reactive overflow recovery is used.
+   * - Omitted: defaults to `true`.
+   */
+  proactive?: boolean | { threshold: number }
+  /**
+   * Protect messages from eviction during reduction.
+   * Positive values protect the first N messages; negative values protect the last N.
+   *
+   * For agent-controlled pinning, use the `pinMessageTool` (agentic mode).
+   */
+  protectedMessageRange?: number
+}
+
+export type TruncateCompressionConfig = SharedCompressionOptions & {
+  method?: 'truncate'
+  /** Maximum messages to keep after trimming. Defaults to 40. */
+  windowSize?: number
+}
+
+export type SummarizeCompressionConfig = SharedCompressionOptions & {
+  method: 'summarize'
+  /** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */
+  summaryRatio?: number
+  /** Minimum recent messages to preserve during summarization. Defaults to 10. */
+  preserveRecentMessages?: number
+}
+
+/**
+ * Compression configuration (discriminated union on `method`).
+ *
+ * @example
+ * ```typescript
+ * contextManager: { compression: true }                                         // defaults (truncate)
+ * contextManager: { compression: 'summarize' }                                  // strategy shorthand
+ * contextManager: { compression: { method: 'truncate', windowSize: 30 } }     // full config
+ * contextManager: { compression: { method: 'summarize', summaryRatio: 0.5 } } // full config
+ * ```
+ */
+export type CompressionOptions = TruncateCompressionConfig | SummarizeCompressionConfig
+
+/**
+ * Plugin that handles context compression — both proactive (before model call when
+ * threshold is exceeded) and reactive (after model call on overflow error).
+ *
+ * Delegates reduction to strategy functions (truncate or summarize).
+ */
+export class ContextCompression implements Plugin {
+  readonly name = 'strands:context-compression'
+
+  private readonly _proactiveThreshold: number | undefined
+  private readonly _method: CompressionMethod
+  private readonly _windowSize: number
+  private readonly _protectedMessageRange: number | undefined
+  private readonly _summarizeOptions: SummarizeOptions | undefined
+
+  constructor(config?: CompressionOptions) {
+    const proactive = config?.proactive ?? true
+    if (proactive === false) {
+      this._proactiveThreshold = undefined
+    } else if (proactive === true) {
+      this._proactiveThreshold = DEFAULT_PROACTIVE_THRESHOLD
+    } else {
+      if (proactive.threshold <= 0 || proactive.threshold > 1) {
+        throw new Error(
+          `proactive compression threshold must be between 0 (exclusive) and 1 (inclusive), got ${proactive.threshold}`
+        )
+      }
+      this._proactiveThreshold = proactive.threshold
+    }
+
+    this._method = config?.method ?? 'truncate'
+    this._protectedMessageRange = config?.protectedMessageRange
+
+    if (config?.method === 'summarize') {
+      this._windowSize = DEFAULT_WINDOW_SIZE
+      this._summarizeOptions = {
+        ...(config.summaryRatio !== undefined && { summaryRatio: config.summaryRatio }),
+        ...(config.preserveRecentMessages !== undefined && { preserveRecentMessages: config.preserveRecentMessages }),
+      }
+    } else {
+      this._windowSize = (config as TruncateCompressionConfig | undefined)?.windowSize ?? DEFAULT_WINDOW_SIZE
+      this._summarizeOptions = undefined
+    }
+  }
+
+  getTools(): Tool[] {
+    return []
+  }
+
+  initAgent(agent: LocalAgent): void {
+    // Reactive overflow recovery
+    agent.addHook(AfterModelCallEvent, async (event) => {
+      if (event.error instanceof ContextWindowOverflowError) {
+        if (await this._reduce(event.agent.messages, event.model)) {
+          event.retry = true
+        }
+      }
+    })
+
+    // Proactive compression
+    agent.addHook(BeforeModelCallEvent, async (event) => {
+      if (this._proactiveThreshold === undefined) {
+        return
+      }
+
+      let contextWindowLimit = event.model.getConfig().contextWindowLimit
+      if (contextWindowLimit === undefined) {
+        contextWindowLimit = DEFAULT_CONTEXT_WINDOW_LIMIT
+        warnOnce(
+          logger,
+          `context_compression | contextWindowLimit is not set on the model, using default of ${DEFAULT_CONTEXT_WINDOW_LIMIT} | set contextWindowLimit in your model config for accurate proactive compression`
+        )
+      }
+
+      const projectedInputTokens =
+        event.projectedInputTokens ?? (await estimateInputTokens(event.agent.messages, event.model))
+
+      if (projectedInputTokens === undefined) {
+        return
+      }
+
+      const ratio = projectedInputTokens / contextWindowLimit
+      if (ratio >= this._proactiveThreshold) {
+        logger.debug(
+          `projected_tokens=<${projectedInputTokens}>, limit=<${contextWindowLimit}>, ratio=<${ratio.toFixed(2)}>, threshold=<${this._proactiveThreshold}> | compression threshold exceeded, reducing context`
+        )
+        try {
+          await this._reduce(event.agent.messages, event.model)
+        } catch (e) {
+          logger.warn(`context_compression | proactive compression failed, continuing | error=<${e}>`)
+        }
+      }
+    })
+
+    // Sliding window enforcement after each invocation (truncate method only)
+    if (this._method === 'truncate') {
+      agent.addHook(AfterInvocationEvent, (event) => {
+        if (event.agent.messages.length > this._windowSize) {
+          truncate(event.agent.messages, this._windowSize, {
+            ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+          })
+        }
+      })
+    }
+  }
+
+  private async _reduce(messages: Message[], model: Model): Promise<boolean> {
+    switch (this._method) {
+      case 'summarize':
+        return summarize(messages, model, {
+          ...this._summarizeOptions,
+          ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+        })
+      case 'truncate':
+      default:
+        return truncate(messages, this._windowSize, {
+          ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+        })
+    }
+  }
+}