Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions helm/librechat/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ librechat:
# registration:
# socialLogins: ["discord", "facebook", "github", "google", "openid"]
# endpoints:
# bedrock:
# models:
# - "anthropic.claude-sonnet-4-5-20250929-v1:0"
# # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL.
# # Omit this field to keep Bedrock's default 5-minute prompt cache TTL.
# promptCacheTtl: "1h"
# azureOpenAI:
# # Endpoint-level configuration
# titleModel: "gpt-4o"
Expand Down
6 changes: 6 additions & 0 deletions librechat.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,12 @@ endpoints:
# - "anthropic.claude-3-7-sonnet-20250219-v1:0"
# - "anthropic.claude-3-5-sonnet-20241022-v2:0"
#
# # Prompt Cache TTL
# # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints
# # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL.
# # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
# promptCacheTtl: "1h"
#
# # Inference Profiles Configuration
# # Maps model IDs to their inference profile ARNs
# # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above
Expand Down
42 changes: 42 additions & 0 deletions packages/api/src/endpoints/bedrock/initialize.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,48 @@ describe('initializeBedrock', () => {
expect(result.llmConfig).toHaveProperty('region', 'us-east-1');
});

it('should include promptCacheTtl from Bedrock endpoint config', async () => {
const params = createMockParams({
model_parameters: {
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
},
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
promptCacheTtl: '1h',
},
},
},
});
const result = await initializeBedrock(params);

expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h');
expect(result.llmConfig).toHaveProperty('promptCache', true);
});

it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
promptCacheTtl: '1h',
},
},
},
});
const result = await initializeBedrock(params);

expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
expect(result.llmConfig).toHaveProperty('promptCache', true);
});

it('should not include promptCacheTtl when not configured', async () => {
const params = createMockParams();
const result = await initializeBedrock(params);

expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
});

it('should handle model_parameters', async () => {
const params = createMockParams({
model_parameters: {
Expand Down
4 changes: 4 additions & 0 deletions packages/api/src/endpoints/bedrock/initialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type {
BaseInitializeParams,
InitializeResultBase,
BedrockCredentials,
BedrockPromptCacheTtl,
GuardrailConfiguration,
InferenceProfileConfig,
} from '~/types';
Expand Down Expand Up @@ -54,6 +55,7 @@ export async function initializeBedrock({
| ({
guardrailConfig?: GuardrailConfiguration;
inferenceProfiles?: InferenceProfileConfig;
promptCacheTtl?: BedrockPromptCacheTtl;
} & Record<string, unknown>)
| undefined;

Expand Down Expand Up @@ -98,6 +100,7 @@ export async function initializeBedrock({
const requestOptions: Record<string, unknown> = {
model: model_parameters?.model as string | undefined,
region: BEDROCK_AWS_DEFAULT_REGION,
promptCacheTtl: bedrockConfig?.promptCacheTtl,
};

const configOptions: Record<string, unknown> = {};
Expand All @@ -117,6 +120,7 @@ export async function initializeBedrock({
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
promptCacheTtl?: BedrockPromptCacheTtl;
};

if (bedrockConfig?.guardrailConfig) {
Expand Down
4 changes: 4 additions & 0 deletions packages/api/src/types/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export interface GuardrailConfiguration {
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
*/
export type InferenceProfileConfig = Record<string, string>;
export type BedrockPromptCacheTtl = '5m' | '1h';

/**
* Configuration options for Bedrock LLM
Expand All @@ -45,6 +46,8 @@ export interface BedrockConfigOptions {
guardrailConfig?: GuardrailConfiguration;
/** Inference profile ARNs keyed by model ID / friendly name */
inferenceProfiles?: InferenceProfileConfig;
/** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */
promptCacheTtl?: BedrockPromptCacheTtl;
}

/**
Expand All @@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult {
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
promptCacheTtl?: BedrockPromptCacheTtl;
};
configOptions: Record<string, unknown>;
}
62 changes: 62 additions & 0 deletions packages/data-provider/specs/bedrock.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,68 @@ describe('bedrockInputParser', () => {
expect(result.promptCache).toBe(true);
});

test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => {
const input = {
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
promptCache: true,
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBe('1h');
});

test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => {
const result = bedrockInputParser.parse({
model: 'amazon.nova-pro-v1:0',
promptCache: true,
promptCacheTtl: '1h',
}) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBeUndefined();
});

test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => {
const input = {
model: 'amazon.nova-pro-v1:0',
promptCache: true,
promptCacheTtl: '5m',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBe('5m');
});

test('should strip promptCacheTtl when promptCache is disabled', () => {
const input = {
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
promptCache: false,
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(false);
expect(result.promptCacheTtl).toBeUndefined();
});

test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => {
const staleConversationData = {
model: 'deepseek.deepseek-r1',
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(staleConversationData) as Record<string, unknown>;
expect(result.promptCacheTtl).toBeUndefined();
});

test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => {
const result = bedrockInputSchema.parse({
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
promptCache: false,
promptCacheTtl: '1h',
}) as Record<string, unknown>;
expect(result.promptCache).toBe(false);
expect(result.promptCacheTtl).toBeUndefined();
});

test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => {
const staleConversationData = {
model: 'moonshot.kimi-k2-0711-thinking',
Expand Down
51 changes: 41 additions & 10 deletions packages/data-provider/src/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined {
return typeof display === 'string' ? display : undefined;
}

function supportsBedrockPromptCache(model: unknown): boolean {
return typeof model === 'string' && (model.includes('claude') || model.includes('nova'));
}

function supportsOneHourBedrockPromptCache(model: unknown): boolean {
if (typeof model !== 'string') {
return false;
}

return (
model.includes('anthropic.claude-opus-4-5') ||
model.includes('anthropic.claude-sonnet-4-5') ||
model.includes('anthropic.claude-haiku-4-5')
);
}

function normalizeBedrockPromptCache(data: Record<string, unknown>) {
if (supportsBedrockPromptCache(data.model)) {
if (data.promptCache === undefined) {
data.promptCache = true;
}
if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) {
data.promptCacheTtl = undefined;
}
} else {
if (data.promptCache === true) {
data.promptCache = undefined;
}
data.promptCacheTtl = undefined;
}

if (data.promptCache === false) {
data.promptCacheTtl = undefined;
}
}

export function resolveThinkingDisplay(
model: string,
explicit?: s.ThinkingDisplay | string | null,
Expand Down Expand Up @@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema
thinkingDisplay: true,
reasoning_effort: true,
promptCache: true,
promptCacheTtl: true,
/* Catch-all fields */
topK: true,
additionalModelRequestFields: true,
Expand All @@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema
}
delete obj.additionalModelRequestFields;
}
normalizeBedrockPromptCache(obj as Record<string, unknown>);
return s.removeNullishValues(obj);
})
.catch(() => ({}));
Expand Down Expand Up @@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema
thinkingDisplay: true,
reasoning_effort: true,
promptCache: true,
promptCacheTtl: true,
/* Catch-all fields */
topK: true,
additionalModelRequestFields: true,
Expand All @@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema
'topP',
'stop',
'promptCache',
'promptCacheTtl',
];

const additionalFields: Record<string, unknown> = {};
Expand Down Expand Up @@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema
}

/** Default promptCache for claude and nova models, if not defined */
if (
typeof typedData.model === 'string' &&
(typedData.model.includes('claude') || typedData.model.includes('nova'))
) {
if (typedData.promptCache === undefined) {
typedData.promptCache = true;
}
} else if (typedData.promptCache === true) {
typedData.promptCache = undefined;
}
normalizeBedrockPromptCache(typedData);

if (Object.keys(additionalFields).length > 0) {
typedData.additionalModelRequestFields = {
Expand Down
12 changes: 12 additions & 0 deletions packages/data-provider/src/config.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
import { getEndpointFileConfig, mergeFileConfig } from './file-config';
import {
allowedAddressesSchema,
bedrockEndpointSchema,
configSchema,
excludedKeys,
resolveEndpointType,
Expand All @@ -29,6 +30,17 @@ describe('excludedKeys', () => {
});
});

describe('bedrockEndpointSchema', () => {
it('accepts supported Bedrock prompt cache TTL values', () => {
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m');
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h');
});

it('rejects unsupported Bedrock prompt cache TTL values', () => {
expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow();
});
});

describe('resolveEndpointType', () => {
describe('non-agents endpoints', () => {
it('returns the config type for a custom endpoint', () => {
Expand Down
1 change: 1 addition & 0 deletions packages/data-provider/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge(
availableRegions: z.array(z.string()).optional(),
models: z.array(z.string()).optional(),
inferenceProfiles: z.record(z.string(), z.string()).optional(),
promptCacheTtl: z.enum(['5m', '1h']).optional(),
}),
);

Expand Down
3 changes: 3 additions & 0 deletions packages/data-provider/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,7 @@ export const tConversationSchema = z.object({
max_tokens: coerceNumber.optional(),
/* Anthropic */
promptCache: z.boolean().optional(),
promptCacheTtl: z.enum(['5m', '1h']).optional(),
Comment on lines 802 to +803
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

promptCacheTtl is documented as part of the /* Anthropic */ block, but this TTL is Bedrock-specific (per PR description and usage in bedrockInputParser). This comment/section placement is misleading; please either move promptCacheTtl to the AWS Bedrock section of the schema or update the comment to reflect Bedrock usage.

Suggested change
promptCache: z.boolean().optional(),
promptCacheTtl: z.enum(['5m', '1h']).optional(),
promptCache: z.boolean().optional(),
/* AWS Bedrock */
promptCacheTtl: z.enum(['5m', '1h']).optional(),
/* Anthropic */

Copilot uses AI. Check for mistakes.
system: z.string().optional(),
thinking: z.boolean().optional(),
thinkingBudget: coerceNumber.optional(),
Expand Down Expand Up @@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema
maxOutputTokens: true,
/** @endpoints anthropic */
promptCache: true,
/** @endpoints bedrock */
promptCacheTtl: true,
thinking: true,
thinkingBudget: true,
thinkingLevel: true,
Expand Down
1 change: 1 addition & 0 deletions packages/data-provider/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export type TEndpointOption = Pick<
| 'additionalModelRequestFields'
// Anthropic-specific
| 'promptCache'
| 'promptCacheTtl'
Comment on lines 52 to +54
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

promptCacheTtl is listed under the // Anthropic-specific grouping, but it's Bedrock-only (whereas promptCache applies to both Anthropic and Bedrock). Please adjust the grouping/comment so future readers don’t assume this option is supported by the Anthropic endpoint.

Suggested change
// Anthropic-specific
| 'promptCache'
| 'promptCacheTtl'
// Anthropic/Bedrock
| 'promptCache'
// Bedrock-specific
| 'promptCacheTtl'
// Anthropic-specific

Copilot uses AI. Check for mistakes.
| 'thinking'
| 'thinkingBudget'
| 'thinkingLevel'
Expand Down
4 changes: 4 additions & 0 deletions packages/data-schemas/src/schema/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ export const conversationPreset = {
promptCache: {
type: Boolean,
},
promptCacheTtl: {
type: String,
enum: ['5m', '1h'],
},
thinking: {
type: Boolean,
},
Expand Down
1 change: 1 addition & 0 deletions packages/data-schemas/src/schema/preset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export interface IPreset extends Document {
file_ids?: string[];
resendImages?: boolean;
promptCache?: boolean;
promptCacheTtl?: '5m' | '1h';
thinking?: boolean;
thinkingBudget?: number;
effort?: string;
Expand Down
1 change: 1 addition & 0 deletions packages/data-schemas/src/types/convo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export interface IConversation extends Document {
file_ids?: string[];
resendImages?: boolean;
promptCache?: boolean;
promptCacheTtl?: '5m' | '1h';
thinking?: boolean;
thinkingBudget?: number;
effort?: string;
Expand Down
Loading