danny-avila · SharpLu · Apr 29, 2026 · Copilot · Apr 29, 2026 · Copilot
diff --git a/helm/librechat/values.yaml b/helm/librechat/values.yaml
@@ -69,6 +69,12 @@ librechat:
   #   registration:
   #     socialLogins: ["discord", "facebook", "github", "google", "openid"] 
   #   endpoints:
+  #     bedrock:
+  #       models:
+  #         - "anthropic.claude-sonnet-4-5-20250929-v1:0"
+  #       # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL.
+  #       # Omit this field to keep Bedrock's default 5-minute prompt cache TTL.
+  #       promptCacheTtl: "1h"
   #     azureOpenAI:
   #      # Endpoint-level configuration
   #      titleModel: "gpt-4o"

diff --git a/librechat.example.yaml b/librechat.example.yaml
@@ -534,6 +534,12 @@ endpoints:
   #     - "anthropic.claude-3-7-sonnet-20250219-v1:0"
   #     - "anthropic.claude-3-5-sonnet-20241022-v2:0"
   #
+  #   # Prompt Cache TTL
+  #   # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints
+  #   # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL.
+  #   # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
+  #   promptCacheTtl: "1h"
+  #
   #   # Inference Profiles Configuration
   #   # Maps model IDs to their inference profile ARNs
   #   # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above

diff --git a/packages/api/src/endpoints/bedrock/initialize.spec.ts b/packages/api/src/endpoints/bedrock/initialize.spec.ts
@@ -95,6 +95,48 @@ describe('initializeBedrock', () => {
       expect(result.llmConfig).toHaveProperty('region', 'us-east-1');
     });
 
+    it('should include promptCacheTtl from Bedrock endpoint config', async () => {
+      const params = createMockParams({
+        model_parameters: {
+          model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
+        },
+        config: {
+          endpoints: {
+            [EModelEndpoint.bedrock]: {
+              promptCacheTtl: '1h',
+            },
+          },
+        },
+      });
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h');
+      expect(result.llmConfig).toHaveProperty('promptCache', true);
+    });
+
+    it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => {
+      const params = createMockParams({
+        config: {
+          endpoints: {
+            [EModelEndpoint.bedrock]: {
+              promptCacheTtl: '1h',
+            },
+          },
+        },
+      });
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
+      expect(result.llmConfig).toHaveProperty('promptCache', true);
+    });
+
+    it('should not include promptCacheTtl when not configured', async () => {
+      const params = createMockParams();
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
+    });
+
     it('should handle model_parameters', async () => {
       const params = createMockParams({
         model_parameters: {

diff --git a/packages/api/src/endpoints/bedrock/initialize.ts b/packages/api/src/endpoints/bedrock/initialize.ts
@@ -13,6 +13,7 @@ import type {
   BaseInitializeParams,
   InitializeResultBase,
   BedrockCredentials,
+  BedrockPromptCacheTtl,
   GuardrailConfiguration,
   InferenceProfileConfig,
 } from '~/types';
@@ -54,6 +55,7 @@ export async function initializeBedrock({
     | ({
         guardrailConfig?: GuardrailConfiguration;
         inferenceProfiles?: InferenceProfileConfig;
+        promptCacheTtl?: BedrockPromptCacheTtl;
       } & Record<string, unknown>)
     | undefined;
 
@@ -98,6 +100,7 @@ export async function initializeBedrock({
   const requestOptions: Record<string, unknown> = {
     model: model_parameters?.model as string | undefined,
     region: BEDROCK_AWS_DEFAULT_REGION,
+    promptCacheTtl: bedrockConfig?.promptCacheTtl,
   };
 
   const configOptions: Record<string, unknown> = {};
@@ -117,6 +120,7 @@ export async function initializeBedrock({
     endpointHost?: string;
     guardrailConfig?: GuardrailConfiguration;
     applicationInferenceProfile?: string;
+    promptCacheTtl?: BedrockPromptCacheTtl;
   };
 
   if (bedrockConfig?.guardrailConfig) {

diff --git a/packages/api/src/types/bedrock.ts b/packages/api/src/types/bedrock.ts
@@ -27,6 +27,7 @@ export interface GuardrailConfiguration {
  * @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
  */
 export type InferenceProfileConfig = Record<string, string>;
+export type BedrockPromptCacheTtl = '5m' | '1h';
 
 /**
  * Configuration options for Bedrock LLM
@@ -45,6 +46,8 @@ export interface BedrockConfigOptions {
   guardrailConfig?: GuardrailConfiguration;
   /** Inference profile ARNs keyed by model ID / friendly name */
   inferenceProfiles?: InferenceProfileConfig;
+  /** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */
+  promptCacheTtl?: BedrockPromptCacheTtl;
 }
 
 /**
@@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult {
     endpointHost?: string;
     guardrailConfig?: GuardrailConfiguration;
     applicationInferenceProfile?: string;
+    promptCacheTtl?: BedrockPromptCacheTtl;
   };
   configOptions: Record<string, unknown>;
 }
diff --git a/packages/data-provider/specs/bedrock.spec.ts b/packages/data-provider/specs/bedrock.spec.ts
@@ -921,6 +921,68 @@ describe('bedrockInputParser', () => {
       expect(result.promptCache).toBe(true);
     });
 
+    test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => {
+      const input = {
+        model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
+        promptCache: true,
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBe('1h');
+    });
+
+    test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => {
+      const result = bedrockInputParser.parse({
+        model: 'amazon.nova-pro-v1:0',
+        promptCache: true,
+        promptCacheTtl: '1h',
+      }) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => {
+      const input = {
+        model: 'amazon.nova-pro-v1:0',
+        promptCache: true,
+        promptCacheTtl: '5m',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBe('5m');
+    });
+
+    test('should strip promptCacheTtl when promptCache is disabled', () => {
+      const input = {
+        model: 'anthropic.claude-sonnet-4-20250514-v1:0',
+        promptCache: false,
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(false);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => {
+      const staleConversationData = {
+        model: 'deepseek.deepseek-r1',
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(staleConversationData) as Record<string, unknown>;
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => {
+      const result = bedrockInputSchema.parse({
+        model: 'anthropic.claude-sonnet-4-20250514-v1:0',
+        promptCache: false,
+        promptCacheTtl: '1h',
+      }) as Record<string, unknown>;
+      expect(result.promptCache).toBe(false);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
     test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => {
       const staleConversationData = {
         model: 'moonshot.kimi-k2-0711-thinking',

diff --git a/packages/data-provider/src/bedrock.ts b/packages/data-provider/src/bedrock.ts
@@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined {
   return typeof display === 'string' ? display : undefined;
 }
 
+function supportsBedrockPromptCache(model: unknown): boolean {
+  return typeof model === 'string' && (model.includes('claude') || model.includes('nova'));
+}
+
+function supportsOneHourBedrockPromptCache(model: unknown): boolean {
+  if (typeof model !== 'string') {
+    return false;
+  }
+
+  return (
+    model.includes('anthropic.claude-opus-4-5') ||
+    model.includes('anthropic.claude-sonnet-4-5') ||
+    model.includes('anthropic.claude-haiku-4-5')
+  );
+}
+
+function normalizeBedrockPromptCache(data: Record<string, unknown>) {
+  if (supportsBedrockPromptCache(data.model)) {
+    if (data.promptCache === undefined) {
+      data.promptCache = true;
+    }
+    if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) {
+      data.promptCacheTtl = undefined;
+    }
+  } else {
+    if (data.promptCache === true) {
+      data.promptCache = undefined;
+    }
+    data.promptCacheTtl = undefined;
+  }
+
+  if (data.promptCache === false) {
+    data.promptCacheTtl = undefined;
+  }
+}
+
 export function resolveThinkingDisplay(
   model: string,
   explicit?: s.ThinkingDisplay | string | null,
@@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema
     thinkingDisplay: true,
     reasoning_effort: true,
     promptCache: true,
+    promptCacheTtl: true,
     /* Catch-all fields */
     topK: true,
     additionalModelRequestFields: true,
@@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema
       }
       delete obj.additionalModelRequestFields;
     }
+    normalizeBedrockPromptCache(obj as Record<string, unknown>);
     return s.removeNullishValues(obj);
   })
   .catch(() => ({}));
@@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema
     thinkingDisplay: true,
     reasoning_effort: true,
     promptCache: true,
+    promptCacheTtl: true,
     /* Catch-all fields */
     topK: true,
     additionalModelRequestFields: true,
@@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema
       'topP',
       'stop',
       'promptCache',
+      'promptCacheTtl',
     ];
 
     const additionalFields: Record<string, unknown> = {};
@@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema
     }
 
     /** Default promptCache for claude and nova models, if not defined */
-    if (
-      typeof typedData.model === 'string' &&
-      (typedData.model.includes('claude') || typedData.model.includes('nova'))
-    ) {
-      if (typedData.promptCache === undefined) {
-        typedData.promptCache = true;
-      }
-    } else if (typedData.promptCache === true) {
-      typedData.promptCache = undefined;
-    }
+    normalizeBedrockPromptCache(typedData);
 
     if (Object.keys(additionalFields).length > 0) {
       typedData.additionalModelRequestFields = {

diff --git a/packages/data-provider/src/config.spec.ts b/packages/data-provider/src/config.spec.ts
@@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
 import { getEndpointFileConfig, mergeFileConfig } from './file-config';
 import {
   allowedAddressesSchema,
+  bedrockEndpointSchema,
   configSchema,
   excludedKeys,
   resolveEndpointType,
@@ -29,6 +30,17 @@ describe('excludedKeys', () => {
   });
 });
 
+describe('bedrockEndpointSchema', () => {
+  it('accepts supported Bedrock prompt cache TTL values', () => {
+    expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m');
+    expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h');
+  });
+
+  it('rejects unsupported Bedrock prompt cache TTL values', () => {
+    expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow();
+  });
+});
+
 describe('resolveEndpointType', () => {
   describe('non-agents endpoints', () => {
     it('returns the config type for a custom endpoint', () => {

diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts
@@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge(
     availableRegions: z.array(z.string()).optional(),
     models: z.array(z.string()).optional(),
     inferenceProfiles: z.record(z.string(), z.string()).optional(),
+    promptCacheTtl: z.enum(['5m', '1h']).optional(),
   }),
 );
 

diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts
@@ -800,6 +800,7 @@ export const tConversationSchema = z.object({
   max_tokens: coerceNumber.optional(),
   /* Anthropic */
   promptCache: z.boolean().optional(),
+  promptCacheTtl: z.enum(['5m', '1h']).optional(),
-  promptCache: z.boolean().optional(),
-  promptCacheTtl: z.enum(['5m', '1h']).optional(),
+  promptCache: z.boolean().optional(),
+  /* AWS Bedrock */
+  promptCacheTtl: z.enum(['5m', '1h']).optional(),
+  /* Anthropic */
-  promptCache: z.boolean().optional(),
-  promptCacheTtl: z.enum(['5m', '1h']).optional(),
+  promptCache: z.boolean().optional(),
+  /* AWS Bedrock */
+  promptCacheTtl: z.enum(['5m', '1h']).optional(),
+  /* Anthropic */
   system: z.string().optional(),
   thinking: z.boolean().optional(),
   thinkingBudget: coerceNumber.optional(),
@@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema
     maxOutputTokens: true,
     /** @endpoints anthropic */
     promptCache: true,
+    /** @endpoints bedrock */
+    promptCacheTtl: true,
     thinking: true,
     thinkingBudget: true,
     thinkingLevel: true,

diff --git a/packages/data-provider/src/types.ts b/packages/data-provider/src/types.ts
@@ -51,6 +51,7 @@ export type TEndpointOption = Pick<
   | 'additionalModelRequestFields'
   // Anthropic-specific
   | 'promptCache'
+  | 'promptCacheTtl'
-  // Anthropic-specific
-  | 'promptCache'
-  | 'promptCacheTtl'
+  // Anthropic/Bedrock
+  | 'promptCache'
+  // Bedrock-specific
+  | 'promptCacheTtl'
+  // Anthropic-specific
-  // Anthropic-specific
-  | 'promptCache'
-  | 'promptCacheTtl'
+  // Anthropic/Bedrock
+  | 'promptCache'
+  // Bedrock-specific
+  | 'promptCacheTtl'
+  // Anthropic-specific
   | 'thinking'
   | 'thinkingBudget'
   | 'thinkingLevel'

diff --git a/packages/data-schemas/src/schema/defaults.ts b/packages/data-schemas/src/schema/defaults.ts
@@ -77,6 +77,10 @@ export const conversationPreset = {
   promptCache: {
     type: Boolean,
   },
+  promptCacheTtl: {
+    type: String,
+    enum: ['5m', '1h'],
+  },
   thinking: {
     type: Boolean,
   },

diff --git a/packages/data-schemas/src/schema/preset.ts b/packages/data-schemas/src/schema/preset.ts
@@ -28,6 +28,7 @@ export interface IPreset extends Document {
   file_ids?: string[];
   resendImages?: boolean;
   promptCache?: boolean;
+  promptCacheTtl?: '5m' | '1h';
   thinking?: boolean;
   thinkingBudget?: number;
   effort?: string;

diff --git a/packages/data-schemas/src/types/convo.ts b/packages/data-schemas/src/types/convo.ts
@@ -26,6 +26,7 @@ export interface IConversation extends Document {
   file_ids?: string[];
   resendImages?: boolean;
   promptCache?: boolean;
+  promptCacheTtl?: '5m' | '1h';
   thinking?: boolean;
   thinkingBudget?: number;
   effort?: string;