From 9f446057db6b22200aa0dfe0623b085b4c460da6 Mon Sep 17 00:00:00 2001
From: Liz <91279165+lizradway@users.noreply.github.com>
Date: Mon, 1 Jun 2026 10:59:56 -0400
Subject: [PATCH] feat(context): add context manager class

---
 package-lock.json                             |   1 -
 strands-ts/src/agent/agent.ts                 |  37 +++-
 .../compression/__tests__/protection.test.ts  |  83 ++++++++
 .../compression/context-compression.ts        | 185 +++++++++++++++++
 .../context-manager/compression/protection.ts | 138 +++++++++++++
 .../compression/strategies/summarize.ts       | 140 +++++++++++++
 .../compression/strategies/truncate.ts        |  85 ++++++++
 .../src/context-manager/context-manager.ts    | 190 ++++++++++++++++++
 .../token-estimation/token-estimation.ts      |  31 +++
 .../tool-result-cache/tool-result-cache.ts    |   7 +
 strands-ts/src/index.ts                       |  16 ++
 11 files changed, 908 insertions(+), 5 deletions(-)
 create mode 100644 strands-ts/src/context-manager/compression/__tests__/protection.test.ts
 create mode 100644 strands-ts/src/context-manager/compression/context-compression.ts
 create mode 100644 strands-ts/src/context-manager/compression/protection.ts
 create mode 100644 strands-ts/src/context-manager/compression/strategies/summarize.ts
 create mode 100644 strands-ts/src/context-manager/compression/strategies/truncate.ts
 create mode 100644 strands-ts/src/context-manager/context-manager.ts
 create mode 100644 strands-ts/src/context-manager/token-estimation/token-estimation.ts
 create mode 100644 strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts

diff --git a/package-lock.json b/package-lock.json
index ab84750f0..db73996d8 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8120,7 +8120,6 @@
       "os": [
         "darwin"
       ],
-      "peer": true,
       "engines": {
         "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
       }
diff --git a/strands-ts/src/agent/agent.ts b/strands-ts/src/agent/agent.ts
index 7fb3d6484..c3ccfecca 100644
--- a/strands-ts/src/agent/agent.ts
+++ b/strands-ts/src/agent/agent.ts
@@ -44,6 +44,8 @@ import { PluginRegistry } from '../plugins/registry.js'
 import { SlidingWindowConversationManager } from '../conversation-manager/sliding-window-conversation-manager.js'
 import { NullConversationManager } from '../conversation-manager/null-conversation-manager.js'
 import { ConversationManager } from '../conversation-manager/conversation-manager.js'
+import type { ContextManagerParam } from '../context-manager/context-manager.js'
+import { resolveContextManager } from '../context-manager/context-manager.js'
 import { HookRegistryImplementation } from '../hooks/registry.js'
 import type { HookableEventConstructor, HookCallback, HookCallbackOptions, HookCleanup } from '../hooks/types.js'
 import {
@@ -167,9 +169,24 @@ export type AgentConfig = {
    * Defaults to true.
    */
   printer?: boolean
+  /**
+   * Pre-composed context management strategy.
+   *
+   * - `"auto"`: enables tool result caching and proactive compression with defaults.
+   * - Object: fine-grained control over strategy, storage, caching, and compression settings.
+   * - `undefined` (default): no context management facade; use `conversationManager`
+   *   and `plugins` directly.
+   *
+   * When set, takes priority over `conversationManager` — `NullConversationManager` is used.
+   */
+  contextManager?: ContextManagerParam
   /**
    * Conversation manager for handling message history and context overflow.
    * Defaults to SlidingWindowConversationManager with windowSize of 40.
+   *
+   * @remarks Pending deprecation — use `contextManager` instead. The `contextManager` parameter
+   * composes compression, tool result caching, and token estimation into a single
+   * configuration surface. This field will be deprecated in a future version.
    */
   conversationManager?: ConversationManager
   /**
@@ -331,14 +348,22 @@ export class Agent implements LocalAgent, InvokableAgent {
       this.model = config?.model ?? new BedrockModel()
     }
 
-    // Validate and assign conversation manager
+    let contextManagerPlugin: Plugin | undefined
+    if (config?.contextManager) {
+      contextManagerPlugin = resolveContextManager(config.contextManager, config.plugins)
+    }
+
+    // Validate and assign conversation manager.
+    // When contextManager is set, ContextCompression owns compression — use NullConversationManager.
     if (this.model.stateful) {
-      if (config?.conversationManager) {
+      if (config?.conversationManager || config?.contextManager) {
         throw new Error(
-          'Cannot use a conversationManager with a stateful model. The model manages conversation state server-side.'
+          'Cannot use a conversationManager or contextManager with a stateful model. The model manages conversation state server-side.'
         )
       }
       this._conversationManager = new NullConversationManager()
+    } else if (contextManagerPlugin) {
+      this._conversationManager = new NullConversationManager()
     } else {
       this._conversationManager =
         config?.conversationManager ?? new SlidingWindowConversationManager({ windowSize: 40 })
@@ -372,9 +397,12 @@ export class Agent implements LocalAgent, InvokableAgent {
     // - Retry-strategy ordering is not load-bearing for correctness: `DefaultModelRetryStrategy`
     //   guards on `event.retry`, so a user hook that already set it short-circuits
     //   the strategy regardless of registration order.
+    // - contextManager plugin goes before user plugins so the offloader's AfterToolCallEvent
+    //   hook fires first, ensuring large results are cached before user hooks see the event.
     this._pluginRegistry = new PluginRegistry([
       this._conversationManager,
       ...retryStrategies,
+      ...(contextManagerPlugin ? [contextManagerPlugin] : []),
       ...(config?.plugins ?? []),
       ...(config?.sessionManager ? [config.sessionManager] : []),
       new ModelPlugin(this.model),
@@ -1397,7 +1425,8 @@ export class Agent implements LocalAgent, InvokableAgent {
 
     let attemptCount = 1
     while (true) {
-      // Estimate input tokens for the upcoming model call (non-fatal if estimation fails)
+      // Pending deprecation: token estimation will move fully to ContextManager.
+      // This remains for backward compat with standalone ConversationManager.proactiveCompression.
       let projectedInputTokens: number | undefined
       try {
         projectedInputTokens = await this._estimateInputTokens(streamOptions)
diff --git a/strands-ts/src/context-manager/compression/__tests__/protection.test.ts b/strands-ts/src/context-manager/compression/__tests__/protection.test.ts
new file mode 100644
index 000000000..311b5cce6
--- /dev/null
+++ b/strands-ts/src/context-manager/compression/__tests__/protection.test.ts
@@ -0,0 +1,83 @@
+import { describe, it, expect } from 'vitest'
+import { isProtected, pinMessage } from '../protection.js'
+import { Message, TextBlock, ToolUseBlock, ToolResultBlock } from '../../../types/messages.js'
+
+function userMsg(text: string): Message {
+  return new Message({ role: 'user', content: [new TextBlock(text)] })
+}
+
+function assistantMsg(text: string): Message {
+  return new Message({ role: 'assistant', content: [new TextBlock(text)] })
+}
+
+function toolUseMsg(toolUseId: string): Message {
+  return new Message({ role: 'assistant', content: [new ToolUseBlock({ toolUseId, name: 'test', input: {} })] })
+}
+
+function toolResultMsg(toolUseId: string): Message {
+  return new Message({
+    role: 'user',
+    content: [new ToolResultBlock({ toolUseId, content: [new TextBlock('result')], status: 'success' })],
+  })
+}
+
+describe('isProtected', () => {
+  describe('no range, no pin', () => {
+    it('returns false for unprotected message', () => {
+      const messages = [userMsg('a'), assistantMsg('b')]
+      expect(isProtected(messages, 0)).toBe(false)
+      expect(isProtected(messages, 1)).toBe(false)
+    })
+  })
+
+  describe('positive range (protect first N)', () => {
+    it('protects messages within range', () => {
+      const messages = [userMsg('a'), assistantMsg('b'), userMsg('c')]
+      expect(isProtected(messages, 0, 2)).toBe(true)
+      expect(isProtected(messages, 1, 2)).toBe(true)
+      expect(isProtected(messages, 2, 2)).toBe(false)
+    })
+
+    it('protects toolUse outside range if its toolResult is inside range', () => {
+      const messages = [userMsg('task'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('next')]
+      // range=2 protects [0] and [1]. [2] is toolResult — check if toolUse at [1] being in range protects [2]
+      // Actually [2] is outside range. But [1] (toolUse) is in range, so [2] (toolResult, partner) should be protected.
+      expect(isProtected(messages, 2, 2)).toBe(true)
+    })
+
+    it('protects toolResult outside range if its toolUse is inside range', () => {
+      const messages = [toolUseMsg('t1'), toolResultMsg('t1'), userMsg('a'), assistantMsg('b')]
+      // range=1 protects [0] (toolUse). [1] (toolResult) is outside but partner is protected.
+      expect(isProtected(messages, 1, 1)).toBe(true)
+    })
+  })
+
+  describe('negative range (protect last N)', () => {
+    it('protects messages within range', () => {
+      const messages = [userMsg('a'), assistantMsg('b'), userMsg('c'), assistantMsg('d'), userMsg('e')]
+      expect(isProtected(messages, 0, -2)).toBe(false)
+      expect(isProtected(messages, 2, -2)).toBe(false)
+      expect(isProtected(messages, 3, -2)).toBe(true)
+      expect(isProtected(messages, 4, -2)).toBe(true)
+    })
+
+    it('protects toolUse outside range if its toolResult is inside range', () => {
+      const messages = [userMsg('a'), toolUseMsg('t1'), toolResultMsg('t1'), userMsg('b'), assistantMsg('c')]
+      // range=-3: protects [2], [3], [4]. toolUse at [1] is outside, but [2] (its toolResult) is in range.
+      expect(isProtected(messages, 1, -3)).toBe(true)
+    })
+  })
+
+  describe('pinned messages', () => {
+    it('protects pinned message regardless of range', () => {
+      const messages = [userMsg('a'), pinMessage(assistantMsg('pinned')), userMsg('c')]
+      expect(isProtected(messages, 1)).toBe(true)
+      expect(isProtected(messages, 1, 0)).toBe(true)
+    })
+
+    it('protects tool-pair partner of pinned message', () => {
+      const messages = [pinMessage(toolUseMsg('t1')), toolResultMsg('t1'), userMsg('a')]
+      expect(isProtected(messages, 1)).toBe(true) // toolResult partner of pinned toolUse
+    })
+  })
+})
diff --git a/strands-ts/src/context-manager/compression/context-compression.ts b/strands-ts/src/context-manager/compression/context-compression.ts
new file mode 100644
index 000000000..4533d6206
--- /dev/null
+++ b/strands-ts/src/context-manager/compression/context-compression.ts
@@ -0,0 +1,185 @@
+import type { Plugin } from '../../plugins/plugin.js'
+import type { LocalAgent } from '../../types/agent.js'
+import type { Tool } from '../../tools/tool.js'
+import type { Message } from '../../types/messages.js'
+import type { Model } from '../../models/model.js'
+import { AfterInvocationEvent, AfterModelCallEvent, BeforeModelCallEvent } from '../../hooks/events.js'
+import { ContextWindowOverflowError } from '../../errors.js'
+import { truncate } from './strategies/truncate.js'
+import { summarize, type SummarizeOptions } from './strategies/summarize.js'
+import { estimateInputTokens } from '../token-estimation/token-estimation.js'
+import { logger } from '../../logging/logger.js'
+import { warnOnce } from '../../logging/warn-once.js'
+
+const DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
+const DEFAULT_PROACTIVE_THRESHOLD = 0.7
+const DEFAULT_WINDOW_SIZE = 40
+
+export type CompressionMethod = 'truncate' | 'summarize'
+
+type SharedCompressionOptions = {
+  /**
+   * Proactive compression before the model call.
+   * - `true`: compress when 70% of the context window is used (default threshold).
+   * - `{ threshold: number }`: compress at the specified ratio (0, 1].
+   * - `false`: disable proactive compression; only reactive overflow recovery is used.
+   * - Omitted: defaults to `true`.
+   */
+  proactive?: boolean | { threshold: number }
+  /**
+   * Protect messages from eviction during reduction.
+   * Positive values protect the first N messages; negative values protect the last N.
+   *
+   * For agent-controlled pinning, use the `pinMessageTool` (agentic mode).
+   */
+  protectedMessageRange?: number
+}
+
+export type TruncateCompressionConfig = SharedCompressionOptions & {
+  method?: 'truncate'
+  /** Maximum messages to keep after trimming. Defaults to 40. */
+  windowSize?: number
+}
+
+export type SummarizeCompressionConfig = SharedCompressionOptions & {
+  method: 'summarize'
+  /** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */
+  summaryRatio?: number
+  /** Minimum recent messages to preserve during summarization. Defaults to 10. */
+  preserveRecentMessages?: number
+}
+
+/**
+ * Compression configuration (discriminated union on `method`).
+ *
+ * @example
+ * ```typescript
+ * contextManager: { compression: true }                                         // defaults (truncate)
+ * contextManager: { compression: 'summarize' }                                  // strategy shorthand
+ * contextManager: { compression: { method: 'truncate', windowSize: 30 } }     // full config
+ * contextManager: { compression: { method: 'summarize', summaryRatio: 0.5 } } // full config
+ * ```
+ */
+export type CompressionOptions = TruncateCompressionConfig | SummarizeCompressionConfig
+
+/**
+ * Plugin that handles context compression — both proactive (before model call when
+ * threshold is exceeded) and reactive (after model call on overflow error).
+ *
+ * Delegates reduction to strategy functions (truncate or summarize).
+ */
+export class ContextCompression implements Plugin {
+  readonly name = 'strands:context-compression'
+
+  private readonly _proactiveThreshold: number | undefined
+  private readonly _method: CompressionMethod
+  private readonly _windowSize: number
+  private readonly _protectedMessageRange: number | undefined
+  private readonly _summarizeOptions: SummarizeOptions | undefined
+
+  constructor(config?: CompressionOptions) {
+    const proactive = config?.proactive ?? true
+    if (proactive === false) {
+      this._proactiveThreshold = undefined
+    } else if (proactive === true) {
+      this._proactiveThreshold = DEFAULT_PROACTIVE_THRESHOLD
+    } else {
+      if (proactive.threshold <= 0 || proactive.threshold > 1) {
+        throw new Error(
+          `proactive compression threshold must be between 0 (exclusive) and 1 (inclusive), got ${proactive.threshold}`
+        )
+      }
+      this._proactiveThreshold = proactive.threshold
+    }
+
+    this._method = config?.method ?? 'truncate'
+    this._protectedMessageRange = config?.protectedMessageRange
+
+    if (config?.method === 'summarize') {
+      this._windowSize = DEFAULT_WINDOW_SIZE
+      this._summarizeOptions = {
+        ...(config.summaryRatio !== undefined && { summaryRatio: config.summaryRatio }),
+        ...(config.preserveRecentMessages !== undefined && { preserveRecentMessages: config.preserveRecentMessages }),
+      }
+    } else {
+      this._windowSize = (config as TruncateCompressionConfig | undefined)?.windowSize ?? DEFAULT_WINDOW_SIZE
+      this._summarizeOptions = undefined
+    }
+  }
+
+  getTools(): Tool[] {
+    return []
+  }
+
+  initAgent(agent: LocalAgent): void {
+    // Reactive overflow recovery
+    agent.addHook(AfterModelCallEvent, async (event) => {
+      if (event.error instanceof ContextWindowOverflowError) {
+        if (await this._reduce(event.agent.messages, event.model)) {
+          event.retry = true
+        }
+      }
+    })
+
+    // Proactive compression
+    agent.addHook(BeforeModelCallEvent, async (event) => {
+      if (this._proactiveThreshold === undefined) {
+        return
+      }
+
+      let contextWindowLimit = event.model.getConfig().contextWindowLimit
+      if (contextWindowLimit === undefined) {
+        contextWindowLimit = DEFAULT_CONTEXT_WINDOW_LIMIT
+        warnOnce(
+          logger,
+          `context_compression | contextWindowLimit is not set on the model, using default of ${DEFAULT_CONTEXT_WINDOW_LIMIT} | set contextWindowLimit in your model config for accurate proactive compression`
+        )
+      }
+
+      const projectedInputTokens =
+        event.projectedInputTokens ?? (await estimateInputTokens(event.agent.messages, event.model))
+
+      if (projectedInputTokens === undefined) {
+        return
+      }
+
+      const ratio = projectedInputTokens / contextWindowLimit
+      if (ratio >= this._proactiveThreshold) {
+        logger.debug(
+          `projected_tokens=<${projectedInputTokens}>, limit=<${contextWindowLimit}>, ratio=<${ratio.toFixed(2)}>, threshold=<${this._proactiveThreshold}> | compression threshold exceeded, reducing context`
+        )
+        try {
+          await this._reduce(event.agent.messages, event.model)
+        } catch (e) {
+          logger.warn(`context_compression | proactive compression failed, continuing | error=<${e}>`)
+        }
+      }
+    })
+
+    // Sliding window enforcement after each invocation (truncate method only)
+    if (this._method === 'truncate') {
+      agent.addHook(AfterInvocationEvent, (event) => {
+        if (event.agent.messages.length > this._windowSize) {
+          truncate(event.agent.messages, this._windowSize, {
+            ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+          })
+        }
+      })
+    }
+  }
+
+  private async _reduce(messages: Message[], model: Model): Promise<boolean> {
+    switch (this._method) {
+      case 'summarize':
+        return summarize(messages, model, {
+          ...this._summarizeOptions,
+          ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+        })
+      case 'truncate':
+      default:
+        return truncate(messages, this._windowSize, {
+          ...(this._protectedMessageRange !== undefined && { protectedMessageRange: this._protectedMessageRange }),
+        })
+    }
+  }
+}
diff --git a/strands-ts/src/context-manager/compression/protection.ts b/strands-ts/src/context-manager/compression/protection.ts
new file mode 100644
index 000000000..c736c49c3
--- /dev/null
+++ b/strands-ts/src/context-manager/compression/protection.ts
@@ -0,0 +1,138 @@
+import { z } from 'zod'
+import { Message, type ToolUseBlock, type ToolResultBlock } from '../../types/messages.js'
+import { tool } from '../../tools/tool-factory.js'
+
+// --- Pin utilities ---
+
+/**
+ * Check if a single message is pinned.
+ *
+ * @param message - The message to check
+ * @returns `true` if the message has `metadata.custom.pinned === true`
+ */
+export function isPinned(message: Message): boolean
+/**
+ * Check if a message is pinned, including tool-pair partner protection.
+ * Returns `true` if the message at `index` is pinned, or if it is the
+ * adjacent tool-pair partner (toolUse/toolResult) of a pinned message,
+ * matched by toolUseId.
+ *
+ * @param messages - The full messages array
+ * @param index - The index to check
+ * @returns `true` if the message or its tool-pair partner is pinned
+ */
+export function isPinned(messages: Message[], index: number): boolean
+export function isPinned(messageOrMessages: Message | Message[], index?: number): boolean {
+  if (index === undefined) {
+    return (messageOrMessages as Message).metadata?.custom?.pinned === true
+  }
+
+  const messages = messageOrMessages as Message[]
+  const msg = messages[index]!
+  if (msg.metadata?.custom?.pinned === true) return true
+
+  const toolResultBlocks = msg.content.filter((b): b is ToolResultBlock => b.type === 'toolResultBlock')
+  if (toolResultBlocks.length > 0 && index > 0) {
+    const prev = messages[index - 1]!
+    if (prev.metadata?.custom?.pinned === true) {
+      const resultIds = new Set(toolResultBlocks.map((b) => b.toolUseId))
+      if (prev.content.some((b) => b.type === 'toolUseBlock' && resultIds.has((b as ToolUseBlock).toolUseId))) {
+        return true
+      }
+    }
+  }
+
+  const toolUseBlocks = msg.content.filter((b): b is ToolUseBlock => b.type === 'toolUseBlock')
+  if (toolUseBlocks.length > 0 && index + 1 < messages.length) {
+    const next = messages[index + 1]!
+    if (next.metadata?.custom?.pinned === true) {
+      const useIds = new Set(toolUseBlocks.map((b) => b.toolUseId))
+      if (next.content.some((b) => b.type === 'toolResultBlock' && useIds.has((b as ToolResultBlock).toolUseId))) {
+        return true
+      }
+    }
+  }
+
+  return false
+}
+
+/**
+ * Returns a new Message marked as pinned (protected from eviction during context reduction).
+ */
+export function pinMessage(message: Message): Message {
+  return new Message({
+    role: message.role,
+    content: message.content,
+    metadata: {
+      ...message.metadata,
+      custom: { ...message.metadata?.custom, pinned: true },
+    },
+  })
+}
+
+/**
+ * Returns a new Message with pinning removed.
+ */
+export function unpinMessage(message: Message): Message {
+  const { pinned: _, ...restCustom } = message.metadata?.custom ?? {}
+  const { custom: __, ...restMetadata } = message.metadata ?? {}
+  const hasCustom = Object.keys(restCustom).length > 0
+  const hasMetadata = hasCustom || Object.keys(restMetadata).length > 0
+  const metadata = hasMetadata ? { ...restMetadata, ...(hasCustom ? { custom: restCustom } : {}) } : undefined
+
+  return new Message({
+    role: message.role,
+    content: message.content,
+    ...(metadata !== undefined ? { metadata } : {}),
+  })
+}
+
+/**
+ * Agent-invokable tool that pins or unpins a message in the conversation history.
+ */
+export const pinMessageTool = tool({
+  name: 'pin_message',
+  description:
+    'Pin or unpin a message in the conversation history. ' +
+    'Pinned messages are protected from eviction during context reduction. ' +
+    'Use this to preserve important context that should not be summarized or trimmed away.',
+  inputSchema: z.object({
+    index: z.number().int().min(0).describe('The zero-based index of the message in the conversation history.'),
+    action: z.enum(['pin', 'unpin']).default('pin').describe('Whether to pin or unpin the message.'),
+  }),
+  callback: ({ index, action }, context) => {
+    const messages = context!.agent.messages
+    if (index >= messages.length) {
+      return `Invalid index ${index}. Conversation has ${messages.length} messages (indices 0-${messages.length - 1}).`
+    }
+    messages[index] = action === 'pin' ? pinMessage(messages[index]!) : unpinMessage(messages[index]!)
+    return `${action === 'pin' ? 'Pinned' : 'Unpinned'} message at index ${index}.`
+  },
+})
+
+// --- Range + pin protection ---
+
+/**
+ * Check if a message at the given index is protected from eviction.
+ * A message is protected if it is pinned, within the protected range,
+ * or is a tool-pair partner of a range-protected message.
+ */
+export function isProtected(messages: Message[], index: number, range?: number): boolean {
+  if (isPinned(messages, index)) return true
+  if (range === undefined || range === 0) return false
+  if (inRange(messages.length, index, range)) return true
+
+  const msg = messages[index]!
+  const hasToolResult = msg.content.some((b) => b.type === 'toolResultBlock')
+  if (hasToolResult && index > 0 && inRange(messages.length, index - 1, range)) return true
+
+  const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock')
+  if (hasToolUse && index + 1 < messages.length && inRange(messages.length, index + 1, range)) return true
+
+  return false
+}
+
+function inRange(length: number, index: number, range: number): boolean {
+  if (range > 0) return index < range
+  return index >= length + range
+}
diff --git a/strands-ts/src/context-manager/compression/strategies/summarize.ts b/strands-ts/src/context-manager/compression/strategies/summarize.ts
new file mode 100644
index 000000000..7aef57cc8
--- /dev/null
+++ b/strands-ts/src/context-manager/compression/strategies/summarize.ts
@@ -0,0 +1,140 @@
+import { Message, TextBlock } from '../../../types/messages.js'
+import type { Model } from '../../../models/model.js'
+import { isProtected } from '../protection.js'
+import { logger } from '../../../logging/logger.js'
+
+const SUMMARIZATION_PROMPT = `You are a conversation summarizer. Provide a concise summary of the conversation history.
+
+Format Requirements:
+- You MUST create a structured and concise summary in bullet-point format.
+- You MUST NOT respond conversationally.
+- You MUST NOT address the user directly.
+- You MUST NOT comment on tool availability.
+
+Assumptions:
+- You MUST NOT assume tool executions failed unless otherwise stated.
+
+Task:
+Your task is to create a structured summary document:
+- It MUST contain bullet points with key topics and questions covered
+- It MUST contain bullet points for all significant tools executed and their results
+- It MUST contain bullet points for any code or technical information shared
+- It MUST contain a section of key insights gained
+- It MUST format the summary in the third person
+
+Example format:
+
+## Conversation Summary
+* Topic 1: Key information
+* Topic 2: Key information
+
+## Tools Executed
+* Tool X: Result Y`
+
+export type SummarizeOptions = {
+  /** Ratio of messages to summarize (0.1–0.8). Defaults to 0.3. */
+  summaryRatio?: number
+  /** Minimum recent messages to preserve. Defaults to 10. */
+  preserveRecentMessages?: number
+  /** Positive: protect first N messages. Negative: protect last N messages. */
+  protectedMessageRange?: number
+}
+
+/**
+ * Summarize the oldest messages and replace them with a model-generated summary.
+ *
+ * @param messages - The messages array to mutate in place
+ * @param model - The model to use for generating the summary
+ * @param options - Summarization options
+ * @returns `true` if messages were summarized, `false` if not enough to summarize
+ */
+export async function summarize(messages: Message[], model: Model, options?: SummarizeOptions): Promise<boolean> {
+  const summaryRatio = Math.max(0.1, Math.min(0.8, options?.summaryRatio ?? 0.3))
+  const preserveRecent = options?.preserveRecentMessages ?? 10
+  const protectedRange = options?.protectedMessageRange
+
+  let count = Math.max(1, Math.floor(messages.length * summaryRatio))
+  count = Math.min(count, messages.length - preserveRecent)
+
+  if (count <= 0) {
+    logger.warn(
+      `preserve_recent=<${preserveRecent}>, messages=<${messages.length}> | insufficient messages for summarization`
+    )
+    return false
+  }
+
+  count = adjustSplitForToolPairs(messages, count)
+
+  // Partition [0, count) into protected (preserve) and non-protected (summarize)
+  const protectedToPreserve: Message[] = []
+  const toSummarize: Message[] = []
+  for (let i = 0; i < count; i++) {
+    if (isProtected(messages, i, protectedRange)) {
+      protectedToPreserve.push(messages[i]!)
+    } else {
+      toSummarize.push(messages[i]!)
+    }
+  }
+
+  if (toSummarize.length === 0) {
+    logger.warn(`messages=<${messages.length}> | all messages in summarize range are protected, unable to reduce`)
+    return false
+  }
+
+  const summary = await generateSummary(toSummarize, model)
+
+  // Replace summarized range with protected messages + summary
+  messages.splice(0, count, ...protectedToPreserve, summary)
+  return true
+}
+
+async function generateSummary(messagesToSummarize: Message[], model: Model): Promise<Message> {
+  const input = [
+    ...messagesToSummarize,
+    new Message({ role: 'user', content: [new TextBlock('Please summarize this conversation.')] }),
+  ]
+
+  const stream = model.streamAggregated(input, { systemPrompt: SUMMARIZATION_PROMPT })
+
+  let result: Awaited<ReturnType<typeof stream.next>> | undefined
+  for (;;) {
+    result = await stream.next()
+    if (result.done) break
+  }
+
+  if (!result?.done || !result.value) {
+    throw new Error('Failed to generate summary: no response from model')
+  }
+
+  return new Message({ role: 'user', content: result.value.message.content })
+}
+
+/**
+ * Adjust split point forward to avoid breaking tool use/result pairs.
+ */
+function adjustSplitForToolPairs(messages: Message[], splitPoint: number): number {
+  if (splitPoint >= messages.length) return splitPoint
+
+  let idx = splitPoint
+  while (idx < messages.length) {
+    const msg = messages[idx]!
+
+    if (msg.content.some((b) => b.type === 'toolResultBlock')) {
+      idx++
+      continue
+    }
+
+    const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock')
+    if (hasToolUse) {
+      const next = messages[idx + 1]
+      if (!next?.content.some((b) => b.type === 'toolResultBlock')) {
+        idx++
+        continue
+      }
+    }
+
+    break
+  }
+
+  return idx >= messages.length ? splitPoint : idx
+}
diff --git a/strands-ts/src/context-manager/compression/strategies/truncate.ts b/strands-ts/src/context-manager/compression/strategies/truncate.ts
new file mode 100644
index 000000000..ec082e60f
--- /dev/null
+++ b/strands-ts/src/context-manager/compression/strategies/truncate.ts
@@ -0,0 +1,85 @@
+import type { Message } from '../../../types/messages.js'
+import { isProtected } from '../protection.js'
+import { logger } from '../../../logging/logger.js'
+
+export type TruncateOptions = {
+  /** Positive: protect first N messages. Negative: protect last N messages. */
+  protectedMessageRange?: number
+}
+
+/**
+ * Truncate oldest messages from the conversation, preserving tool use/result pairs.
+ * Protected messages (by range) are never removed.
+ *
+ * @param messages - The messages array to mutate in place
+ * @param windowSize - Maximum messages to keep
+ * @param options - Options including protectedMessageRange
+ * @returns `true` if messages were removed, `false` if no valid trim point found
+ */
+export function truncate(messages: Message[], windowSize: number, options?: TruncateOptions): boolean {
+  if (messages.length <= 2) return false
+
+  const protectedRange = options?.protectedMessageRange
+
+  let trimIndex = messages.length <= windowSize ? 2 : messages.length - windowSize
+  trimIndex = findValidTrimPoint(messages, trimIndex)
+
+  if (trimIndex >= messages.length) {
+    logger.warn(`window_size=<${windowSize}>, messages=<${messages.length}> | unable to trim, no valid trim point`)
+    return false
+  }
+
+  // Collect non-protected indices in [0, trimIndex) to remove
+  const indicesToRemove: number[] = []
+  for (let i = 0; i < trimIndex; i++) {
+    if (isProtected(messages, i, protectedRange)) continue
+    indicesToRemove.push(i)
+  }
+
+  if (indicesToRemove.length === 0) {
+    logger.warn(
+      `window_size=<${windowSize}>, messages=<${messages.length}> | all messages in trim range are protected, unable to reduce`
+    )
+    return false
+  }
+
+  // Remove in reverse order to keep indices stable
+  for (let i = indicesToRemove.length - 1; i >= 0; i--) {
+    messages.splice(indicesToRemove[i]!, 1)
+  }
+  return true
+}
+
+/**
+ * Find a valid trim point starting from the given index.
+ * Skips positions that would leave orphaned toolResults or toolUse without a following toolResult.
+ */
+function findValidTrimPoint(messages: Message[], startIndex: number): number {
+  let idx = startIndex
+  while (idx < messages.length) {
+    const msg = messages[idx]
+    if (!msg) break
+
+    if (msg.role !== 'user') {
+      idx++
+      continue
+    }
+
+    if (msg.content.some((b) => b.type === 'toolResultBlock')) {
+      idx++
+      continue
+    }
+
+    const hasToolUse = msg.content.some((b) => b.type === 'toolUseBlock')
+    if (hasToolUse) {
+      const next = messages[idx + 1]
+      if (!next || !next.content.some((b) => b.type === 'toolResultBlock')) {
+        idx++
+        continue
+      }
+    }
+
+    break
+  }
+  return idx
+}
diff --git a/strands-ts/src/context-manager/context-manager.ts b/strands-ts/src/context-manager/context-manager.ts
new file mode 100644
index 000000000..c7c3a4c1e
--- /dev/null
+++ b/strands-ts/src/context-manager/context-manager.ts
@@ -0,0 +1,190 @@
+import type { Storage } from '../vended-plugins/context-offloader/storage.js'
+import type { Plugin } from '../plugins/plugin.js'
+import type { Tool } from '../tools/tool.js'
+import type { LocalAgent } from '../types/agent.js'
+import { ContextCompression } from './compression/context-compression.js'
+import { ContextOffloader } from '../vended-plugins/context-offloader/plugin.js'
+import { InMemoryStorage } from '../vended-plugins/context-offloader/storage.js'
+
+export type ContextStrategyValue = 'auto'
+
+/**
+ * Configuration for the offloader component.
+ */
+export type OffloaderConfig = {
+  /** Token threshold above which tool results are offloaded. Defaults to 2500. */
+  threshold?: number
+  /** Number of tokens to keep as an inline preview. Defaults to 500. */
+  previewTokens?: number
+}
+
+/**
+ * Compression configuration accepted by contextManager.
+ * - `true`: enable with defaults (truncate, proactive at 0.7).
+ * - `'truncate'` / `'summarize'`: enable specific strategy with defaults.
+ * - Object: full config with strategy and options.
+ * - Omitted: disabled.
+ */
+export type CompressionConfig =
+  | true
+  | import('./compression/context-compression.js').CompressionMethod
+  | import('./compression/context-compression.js').CompressionOptions
+
+/**
+ * Configuration accepted by the {@link ContextManager} constructor.
+ *
+ * Config objects are additive — only features you explicitly set are enabled.
+ * Use `"auto"` to enable everything with defaults.
+ */
+export type ContextManagerConfig = {
+  /** Strategy name. Only "auto" is supported currently. */
+  strategy?: ContextStrategyValue
+  /** Storage backend for cached tool results. Defaults to InMemoryStorage. */
+  storage?: Storage
+  /**
+   * Context offloader configuration.
+   * - `true`: enable with defaults (threshold=2500, previewTokens=500).
+   * - Object: enable with custom settings.
+   * - Omitted: disabled.
+   */
+  offloader?: true | OffloaderConfig
+  /**
+   * Compression configuration.
+   * - `true`: enable with defaults (truncate, proactive at 0.7).
+   * - `'truncate'` / `'summarize'`: enable specific strategy with defaults.
+   * - `CompressionStrategy.Truncate(...)` / `CompressionStrategy.Summarize(...)`: full config.
+   * - Omitted: disabled.
+   */
+  compression?: CompressionConfig
+}
+
+/**
+ * The `contextManager` parameter type accepted by AgentConfig.
+ *
+ * - `"auto"`: enables everything with defaults.
+ * - `{ strategy: 'auto', ... }`: auto with overrides (omitted features stay enabled).
+ * - `{ compression: true }`: additive — only what you set is enabled.
+ * - `undefined` (default): no context management facade.
+ */
+export type ContextManagerParam = ContextStrategyValue | ContextManagerConfig
+
+/**
+ * Pre-composed context management for agents.
+ *
+ * Implements {@link Plugin} — registers hooks for token estimation and composes
+ * sub-plugins (ContextCompression, ContextOffloader) that handle the actual
+ * compression and caching behavior.
+ *
+ * @example
+ * ```typescript
+ * // Config shorthand (most users)
+ * const agent = new Agent({ contextManager: "auto" })
+ *
+ * // Class instance (power users who need a handle)
+ * const cm = new ContextManager({ storage: new S3Storage("bucket") })
+ * const agent = new Agent({ contextManager: cm })
+ * cm.storage // direct access
+ * cm.budget  // { used, limit, ratio }
+ * ```
+ */
+export class ContextManager implements Plugin {
+  readonly name = 'strands:context-manager'
+  readonly storage: Storage
+
+  private readonly _config: ContextManagerConfig
+  private _subPlugins: Plugin[] | undefined
+
+  constructor(config?: ContextManagerConfig) {
+    this._config = config ?? {}
+    this.storage = this._config.storage ?? new InMemoryStorage()
+  }
+
+  /**
+   * Resolve sub-plugins, skipping any that the user already provides.
+   * Called once before plugin initialization.
+   * @internal
+   */
+  _resolveSubPlugins(userPlugins?: Plugin[]): void {
+    this._subPlugins = this._buildSubPlugins(userPlugins)
+  }
+
+  getTools(): Tool[] {
+    const plugins = this._subPlugins ?? []
+    const tools: Tool[] = []
+    for (const plugin of plugins) {
+      if (plugin.getTools) {
+        tools.push(...plugin.getTools())
+      }
+    }
+    return tools
+  }
+
+  initAgent(agent: LocalAgent): void {
+    if (!this._subPlugins) {
+      this._subPlugins = this._buildSubPlugins()
+    }
+
+    for (const plugin of this._subPlugins) {
+      plugin.initAgent(agent)
+    }
+  }
+
+  private _buildSubPlugins(userPlugins?: Plugin[]): Plugin[] {
+    const config = this._config
+    const plugins: Plugin[] = []
+
+    if (config.compression) {
+      const userProvided = userPlugins?.some((p) => p.name === 'strands:context-compression')
+      if (!userProvided) {
+        let compressionConfig: import('./compression/context-compression.js').CompressionOptions | undefined
+        if (config.compression === true) {
+          compressionConfig = undefined
+        } else if (typeof config.compression === 'string') {
+          compressionConfig = { method: config.compression }
+        } else {
+          compressionConfig = config.compression
+        }
+        plugins.push(new ContextCompression(compressionConfig))
+      }
+    }
+
+    if (config.offloader) {
+      const userProvided = userPlugins?.some((p) => p.name === 'strands:context-offloader')
+      if (!userProvided) {
+        const offloaderConfig = config.offloader === true ? {} : config.offloader
+        plugins.push(
+          new ContextOffloader({
+            storage: this.storage,
+            maxResultTokens: offloaderConfig.threshold ?? 2500,
+            previewTokens: offloaderConfig.previewTokens ?? 500,
+            includeRetrievalTool: true,
+          })
+        )
+      }
+    }
+
+    return plugins
+  }
+}
+
+/**
+ * Resolve a `contextManager` parameter into a ContextManager plugin instance.
+ * User-provided plugins that overlap with sub-plugins take precedence.
+ *
+ * @param param - The contextManager config (strategy string, config object, or class instance)
+ * @param userPlugins - User-provided plugins array, used for dedup checking
+ * @internal
+ */
+const STRATEGY_DEFAULTS = {
+  auto: { compression: true, offloader: true },
+} satisfies Record<string, Partial<ContextManagerConfig>>
+
+export function resolveContextManager(param: ContextManagerParam, userPlugins?: Plugin[]): ContextManager {
+  const base = typeof param === 'string' ? { strategy: param } : param
+  const defaults = base.strategy ? STRATEGY_DEFAULTS[base.strategy] : undefined
+  const config = defaults ? { ...defaults, ...base } : base
+
+  const instance = new ContextManager(config)
+  instance._resolveSubPlugins(userPlugins)
+  return instance
+}
diff --git a/strands-ts/src/context-manager/token-estimation/token-estimation.ts b/strands-ts/src/context-manager/token-estimation/token-estimation.ts
new file mode 100644
index 000000000..05b3e9944
--- /dev/null
+++ b/strands-ts/src/context-manager/token-estimation/token-estimation.ts
@@ -0,0 +1,31 @@
+import type { Message } from '../../types/messages.js'
+import type { Model } from '../../models/model.js'
+import { logger } from '../../logging/logger.js'
+
+/**
+ * Estimate input tokens for a conversation.
+ *
+ * Uses an incremental strategy: if the last assistant message has usage metadata,
+ * uses (inputTokens + outputTokens) as a baseline and only counts new messages
+ * added after it. Falls back to full model estimation otherwise.
+ *
+ * @param messages - The conversation messages
+ * @param model - The model to use for token counting
+ * @returns Estimated token count, or undefined if estimation fails
+ */
+export async function estimateInputTokens(messages: Message[], model: Model): Promise<number | undefined> {
+  try {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const usage = messages[i]!.metadata?.usage
+      if (messages[i]!.role === 'assistant' && usage) {
+        const baseline = usage.inputTokens + usage.outputTokens
+        const newMessages = messages.slice(i + 1)
+        return newMessages.length === 0 ? baseline : baseline + (await model.countTokens(newMessages))
+      }
+    }
+    return await model.countTokens(messages)
+  } catch (e) {
+    logger.debug(`error=<${e}> | token estimation failed`)
+    return undefined
+  }
+}
diff --git a/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts b/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts
new file mode 100644
index 000000000..90d1a33f2
--- /dev/null
+++ b/strands-ts/src/context-manager/tool-result-cache/tool-result-cache.ts
@@ -0,0 +1,7 @@
+/**
+ * Re-exports from the canonical ContextOffloader location.
+ * The ContextManager composes ContextOffloader as its tool-result-cache sub-plugin.
+ */
+export { ContextOffloader, type ContextOffloaderConfig } from '../../vended-plugins/context-offloader/plugin.js'
+export type { Storage } from '../../vended-plugins/context-offloader/storage.js'
+export { InMemoryStorage, FileStorage, S3Storage } from '../../vended-plugins/context-offloader/storage.js'
diff --git a/strands-ts/src/index.ts b/strands-ts/src/index.ts
index 46b1022c0..b532fb549 100644
--- a/strands-ts/src/index.ts
+++ b/strands-ts/src/index.ts
@@ -19,6 +19,22 @@ export type { ToolCaller, ToolCallerProxy, ToolHandle, DirectToolCallOptions } f
 export type { InvocationState, InvokeArgs, InvokeOptions, LocalAgent } from './types/agent.js'
 export type { LifecycleObserver } from './types/lifecycle-observer.js'
 
+// Context Manager
+export type {
+  ContextManagerParam,
+  ContextManagerConfig,
+  ContextStrategyValue,
+  OffloaderConfig,
+  CompressionConfig,
+} from './context-manager/context-manager.js'
+export type {
+  CompressionOptions,
+  TruncateCompressionConfig,
+  SummarizeCompressionConfig,
+  CompressionMethod,
+} from './context-manager/compression/context-compression.js'
+export { pinMessageTool } from './context-manager/compression/protection.js'
+
 // Snapshot types
 export { SNAPSHOT_SCHEMA_VERSION } from './types/snapshot.js'
 export type { Scope, Snapshot } from './types/snapshot.js'