From ac40e231e9b0274fb8c26152059cae1d1460388f Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 09:32:27 +0100 Subject: [PATCH 01/23] feat: Add Scaleway custom endpoint support - Add Scaleway to RECOGNIZED_PROVIDERS for improved MCP content formatting - Add Scaleway detection for proper usage field handling (streamUsage: false, usage: true) - Scaleway uses standard OpenAI reasoning_content format, no special handling needed Scaleway custom endpoints are identified by endpoint name or baseURL containing 'scaleway' or 'api.scaleway.ai'. --- packages/api/src/agents/run.ts | 31 ++++++++++++++++++++++++++++++- packages/api/src/mcp/parsers.ts | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 6b18c73799e9..2c22c1a86ab2 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -21,12 +21,30 @@ const customProviders = new Set([ KnownEndpoints.ollama, ]); +/** + * Check if the endpoint is Scaleway. + * Scaleway custom endpoints are identified by endpoint name or baseURL containing 'scaleway' or 'api.scaleway.ai'. + */ +function isScalewayEndpoint( + provider?: string, + endpoint?: string | null, + baseURL?: string | null, +): boolean { + return ( + provider?.toLowerCase().includes('scaleway') || + endpoint?.toLowerCase().includes('scaleway') || + baseURL?.toLowerCase().includes('scaleway') || + baseURL?.toLowerCase().includes('api.scaleway.ai') + ); +} + export function getReasoningKey( provider: Providers, llmConfig: t.RunLLMConfig, agentEndpoint?: string | null, ): 'reasoning_content' | 'reasoning' { let reasoningKey: 'reasoning_content' | 'reasoning' = 'reasoning_content'; + if (provider === Providers.GOOGLE) { reasoningKey = 'reasoning'; } else if ( @@ -40,6 +58,9 @@ export function getReasoningKey( ) { reasoningKey = 'reasoning'; } + // Scaleway uses reasoning_content format (standard OpenAI format) + // No special handling needed - reasoning_content is the default + return reasoningKey; } @@ -127,9 +148,17 @@ export async function createRun({ } /** Resolves issues with new OpenAI usage field */ + // Scaleway custom endpoints should be treated like other custom providers for usage handling + const isScaleway = isScalewayEndpoint( + agent.provider, + agent.endpoint, + llmConfig.configuration?.baseURL, + ); + if ( customProviders.has(agent.provider) || - (agent.provider === Providers.OPENAI && agent.endpoint !== agent.provider) + (agent.provider === Providers.OPENAI && agent.endpoint !== agent.provider) || + isScaleway ) { llmConfig.streamUsage = false; llmConfig.usage = true; diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index 76e59b2e9cf1..b1ff790e393e 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -17,6 +17,7 @@ const RECOGNIZED_PROVIDERS = new Set([ 'deepseek', 'ollama', 'bedrock', + 'scaleway', ]); const CONTENT_ARRAY_PROVIDERS = new Set(['google', 'anthropic', 'azureopenai', 'openai']); From 3f8347b27d97c7b6c358db6ce3336052fcd2498c Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 09:32:30 +0100 Subject: [PATCH 02/23] fix: Improve usage metadata extraction from LangChain responses LangChain may store usage data in response_metadata.usage instead of usage_metadata. This change checks both locations and converts LangChain format to the expected format when token data is present. This improves compatibility with custom endpoints that use LangChain internally. --- api/server/controllers/agents/callbacks.js | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 0d2a7bc31760..02de02582596 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -82,7 +82,21 @@ class ModelEndHandler { await handleToolCalls(toolCalls, metadata, graph); } - const usage = data?.output?.usage_metadata; + // Try multiple locations for usage data + // LangChain may store usage in response_metadata.usage instead of usage_metadata + let usage = data?.output?.usage_metadata; + if (!usage && data?.output?.response_metadata?.usage) { + const responseUsage = data.output.response_metadata.usage; + // Convert LangChain format to expected format if it contains token data + if (responseUsage && (responseUsage.input_tokens !== undefined || responseUsage.output_tokens !== undefined)) { + usage = { + input_tokens: responseUsage.input_tokens, + output_tokens: responseUsage.output_tokens, + total_tokens: (responseUsage.input_tokens || 0) + (responseUsage.output_tokens || 0), + }; + } + } + if (!usage) { return this.finalize(errorMessage); } From d735df039a2262f93af8d083d4ddc8f3f347f14c Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 09:37:12 +0100 Subject: [PATCH 03/23] feat: Add Scaleway custom endpoint support - Add Scaleway to RECOGNIZED_PROVIDERS for improved MCP content formatting - Add Scaleway detection for proper usage field handling (streamUsage: false, usage: true) - Scaleway uses standard OpenAI reasoning_content format, no special handling needed Scaleway custom endpoints are identified by endpoint name or baseURL containing 'scaleway' or 'api.scaleway.ai'. --- packages/api/src/agents/run.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 2c22c1a86ab2..3f0c949938c4 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -58,8 +58,6 @@ export function getReasoningKey( ) { reasoningKey = 'reasoning'; } - // Scaleway uses reasoning_content format (standard OpenAI format) - // No special handling needed - reasoning_content is the default return reasoningKey; } From c5d6c43ae4b1007b206fe38528f6d317008afd05 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 09:37:12 +0100 Subject: [PATCH 04/23] feat: Improve support for OpenAI-compatible custom endpoints - Generalize custom endpoint detection for usage field handling - Replace provider-specific checks with generic isCustomOpenAIEndpoint function - Automatically handles all custom endpoints (provider=OPENAI but endpoint name differs) - Removes need for explicit provider additions - Improve MCP content formatting for custom endpoints - Add isRecognizedProvider helper function for clarity - Custom endpoints automatically recognized since they use 'openai' provider - Helps address MCP tool response formatting issues (LibreChat #11494) This change benefits all OpenAI-compatible custom endpoints, not just specific providers, making the codebase more maintainable and reducing the need for provider-specific additions. --- packages/api/src/agents/run.ts | 26 +++++++++++++++----------- packages/api/src/mcp/parsers.ts | 12 ++++++++++-- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 3f0c949938c4..cc4c3b523f60 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -22,19 +22,22 @@ const customProviders = new Set([ ]); /** - * Check if the endpoint is Scaleway. - * Scaleway custom endpoints are identified by endpoint name or baseURL containing 'scaleway' or 'api.scaleway.ai'. + * Check if the endpoint is a custom OpenAI-compatible endpoint. + * Custom endpoints are identified when provider is OPENAI but endpoint name differs, + * or when a custom baseURL is provided (indicating a custom endpoint configuration). */ -function isScalewayEndpoint( +function isCustomOpenAIEndpoint( provider?: string, endpoint?: string | null, baseURL?: string | null, ): boolean { + // Custom endpoints are typically configured with provider=OPENAI but different endpoint name + // or have a custom baseURL that differs from standard OpenAI endpoints return ( - provider?.toLowerCase().includes('scaleway') || - endpoint?.toLowerCase().includes('scaleway') || - baseURL?.toLowerCase().includes('scaleway') || - baseURL?.toLowerCase().includes('api.scaleway.ai') + provider === Providers.OPENAI && + endpoint != null && + endpoint !== provider && + endpoint !== Providers.OPENAI ); } @@ -146,8 +149,10 @@ export async function createRun({ } /** Resolves issues with new OpenAI usage field */ - // Scaleway custom endpoints should be treated like other custom providers for usage handling - const isScaleway = isScalewayEndpoint( + // Custom OpenAI-compatible endpoints should use non-streaming usage extraction + // This includes all custom endpoints (provider=OPENAI but endpoint name differs) + // and known custom providers that require special handling + const isCustomEndpoint = isCustomOpenAIEndpoint( agent.provider, agent.endpoint, llmConfig.configuration?.baseURL, @@ -155,8 +160,7 @@ export async function createRun({ if ( customProviders.has(agent.provider) || - (agent.provider === Providers.OPENAI && agent.endpoint !== agent.provider) || - isScaleway + isCustomEndpoint ) { llmConfig.streamUsage = false; llmConfig.usage = true; diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index b1ff790e393e..3838addaa01c 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -17,8 +17,16 @@ const RECOGNIZED_PROVIDERS = new Set([ 'deepseek', 'ollama', 'bedrock', - 'scaleway', ]); + +/** + * Check if a provider should receive structured content formatting for MCP tool responses. + * Custom OpenAI-compatible endpoints are automatically recognized since they use 'openai' as provider, + * which is already included in RECOGNIZED_PROVIDERS. + */ +function isRecognizedProvider(provider: t.Provider): boolean { + return RECOGNIZED_PROVIDERS.has(provider); +} const CONTENT_ARRAY_PROVIDERS = new Set(['google', 'anthropic', 'azureopenai', 'openai']); const imageFormatters: Record = { @@ -94,7 +102,7 @@ export function formatToolContent( result: t.MCPToolCallResponse, provider: t.Provider, ): t.FormattedContentResult { - if (!RECOGNIZED_PROVIDERS.has(provider)) { + if (!isRecognizedProvider(provider)) { return [parseAsString(result), undefined]; } From 3166bccb59b14fb08420262a74b580e32c2e19ea Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 09:52:32 +0100 Subject: [PATCH 05/23] refactor: Simplify usage metadata extraction in ModelEndHandler - Removed redundant checks for usage data in LangChain responses, consolidating the logic to directly access usage_metadata. - This change streamlines the code and improves readability while maintaining functionality. --- api/server/controllers/agents/callbacks.js | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 02de02582596..0d2a7bc31760 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -82,21 +82,7 @@ class ModelEndHandler { await handleToolCalls(toolCalls, metadata, graph); } - // Try multiple locations for usage data - // LangChain may store usage in response_metadata.usage instead of usage_metadata - let usage = data?.output?.usage_metadata; - if (!usage && data?.output?.response_metadata?.usage) { - const responseUsage = data.output.response_metadata.usage; - // Convert LangChain format to expected format if it contains token data - if (responseUsage && (responseUsage.input_tokens !== undefined || responseUsage.output_tokens !== undefined)) { - usage = { - input_tokens: responseUsage.input_tokens, - output_tokens: responseUsage.output_tokens, - total_tokens: (responseUsage.input_tokens || 0) + (responseUsage.output_tokens || 0), - }; - } - } - + const usage = data?.output?.usage_metadata; if (!usage) { return this.finalize(errorMessage); } From 612b14c59bfee0aefa6e634abe084a8de029ef41 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 10:24:56 +0100 Subject: [PATCH 06/23] feat: Improve support for OpenAI-compatible custom endpoints - Add isCustomOpenAIEndpoint function to automatically detect custom endpoints for proper usage field handling (provider=OPENAI but endpoint name differs) - Add Scaleway to RECOGNIZED_PROVIDERS for MCP content formatting - Improves handling of MCP tool responses with structured content formatting This change benefits all OpenAI-compatible custom endpoints by automatically detecting them for usage field handling, while MCP formatting requires explicit provider additions since custom endpoints are passed with their endpoint name. --- packages/api/src/agents/run.ts | 8 +------- packages/api/src/mcp/parsers.ts | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index cc4c3b523f60..051e188d5360 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -23,16 +23,13 @@ const customProviders = new Set([ /** * Check if the endpoint is a custom OpenAI-compatible endpoint. - * Custom endpoints are identified when provider is OPENAI but endpoint name differs, - * or when a custom baseURL is provided (indicating a custom endpoint configuration). + * Custom endpoints are identified when provider is OPENAI but endpoint name differs. */ function isCustomOpenAIEndpoint( provider?: string, endpoint?: string | null, baseURL?: string | null, ): boolean { - // Custom endpoints are typically configured with provider=OPENAI but different endpoint name - // or have a custom baseURL that differs from standard OpenAI endpoints return ( provider === Providers.OPENAI && endpoint != null && @@ -149,9 +146,6 @@ export async function createRun({ } /** Resolves issues with new OpenAI usage field */ - // Custom OpenAI-compatible endpoints should use non-streaming usage extraction - // This includes all custom endpoints (provider=OPENAI but endpoint name differs) - // and known custom providers that require special handling const isCustomEndpoint = isCustomOpenAIEndpoint( agent.provider, agent.endpoint, diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index 3838addaa01c..87fdb6f83619 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -17,12 +17,12 @@ const RECOGNIZED_PROVIDERS = new Set([ 'deepseek', 'ollama', 'bedrock', + 'scaleway', ]); /** * Check if a provider should receive structured content formatting for MCP tool responses. - * Custom OpenAI-compatible endpoints are automatically recognized since they use 'openai' as provider, - * which is already included in RECOGNIZED_PROVIDERS. + * Custom endpoints are passed with their endpoint name, so they need to be explicitly added to RECOGNIZED_PROVIDERS. */ function isRecognizedProvider(provider: t.Provider): boolean { return RECOGNIZED_PROVIDERS.has(provider); From 4942cf20b775b256cbb2d72edb99fbd150d408c3 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 10:24:56 +0100 Subject: [PATCH 07/23] feat: Improve support for OpenAI-compatible custom endpoints - Add isCustomOpenAIEndpoint function to automatically detect custom endpoints for proper usage field handling (provider=OPENAI but endpoint name differs) - Add Scaleway to RECOGNIZED_PROVIDERS for MCP content formatting - Improves handling of MCP tool responses with structured content formatting This change benefits all OpenAI-compatible custom endpoints by automatically detecting them for usage field handling, while MCP formatting requires explicit provider additions since custom endpoints are passed with their endpoint name. --- packages/api/src/agents/run.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 051e188d5360..1d36ca84b3e8 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -28,7 +28,6 @@ const customProviders = new Set([ function isCustomOpenAIEndpoint( provider?: string, endpoint?: string | null, - baseURL?: string | null, ): boolean { return ( provider === Providers.OPENAI && @@ -149,7 +148,6 @@ export async function createRun({ const isCustomEndpoint = isCustomOpenAIEndpoint( agent.provider, agent.endpoint, - llmConfig.configuration?.baseURL, ); if ( From dc1ee4dbe72b534b0062034f9446f08fc43c9e9e Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 13:02:07 +0100 Subject: [PATCH 08/23] feat: add vision capability flag to modelSpecs Add `vision` boolean field to modelSpecs configuration to explicitly declare model vision support. This enables proper filtering of image artifacts for non-vision models and UI gating for image upload options. - Add vision field to TModelSpec type/schema - Extend validateVisionModel() to check modelSpecs first - Pass modelSpecs from API to agents package - Update UI components to use vision capability check --- api/server/controllers/agents/client.js | 1 + .../Chat/Input/Files/AttachFileMenu.tsx | 62 +++++++++++++------ .../Chat/Input/Files/DragDropModal.tsx | 23 +++++-- client/vite.config.ts | 11 ++-- config/create-user.js | 8 ++- packages/api/src/agents/run.ts | 16 ++++- .../api/src/tools/toolkits/imageContext.ts | 1 - packages/api/src/utils/env.spec.ts | 7 +-- packages/api/src/utils/env.ts | 10 +-- packages/data-provider/src/config.ts | 27 +++++++- packages/data-provider/src/models.ts | 2 + src/tests/oidc-integration.test.ts | 46 ++++++++------ 12 files changed, 150 insertions(+), 64 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 35cf7de784fe..9f7719454439 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -1035,6 +1035,7 @@ class AgentClient extends BaseClient { requestBody: config.configurable.requestBody, user: createSafeUser(this.options.req?.user), tokenCounter: createTokenCounter(this.getEncoding()), + modelSpecs: appConfig.modelSpecs, }); if (!run) { diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index 218328b0864d..cb3d2fede55f 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -14,6 +14,7 @@ import { EModelEndpoint, defaultAgentCapabilities, isDocumentSupportedProvider, + validateVisionModel, } from 'librechat-data-provider'; import { FileUpload, @@ -36,6 +37,7 @@ import { useGetStartupConfig } from '~/data-provider'; import { ephemeralAgentByConvoId } from '~/store'; import { MenuItemProps } from '~/common'; import { cn } from '~/utils'; +import { useChatContext } from '~/Providers'; type FileUploadType = 'image' | 'document' | 'image_document' | 'image_document_video_audio'; @@ -73,8 +75,20 @@ const AttachFileMenu = ({ const { agentsConfig } = useGetAgentsConfig(); const { data: startupConfig } = useGetStartupConfig(); + const { conversation } = useChatContext(); const sharePointEnabled = startupConfig?.sharePointFilePickerEnabled; + const isVisionModel = useMemo(() => { + const model = conversation?.model; + if (!model) { + return false; + } + return validateVisionModel({ + model, + modelSpecs: startupConfig?.modelSpecs, + }); + }, [conversation?.model, startupConfig?.modelSpecs]); + const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false); /** TODO: Ephemeral Agent Capabilities @@ -127,27 +141,34 @@ const AttachFileMenu = ({ isDocumentSupportedProvider(currentProvider) || isAzureWithResponsesApi ) { - items.push({ - label: localize('com_ui_upload_provider'), - onClick: () => { - setToolResource(undefined); - let fileType: Exclude = 'image_document'; - if (currentProvider === Providers.GOOGLE || currentProvider === Providers.OPENROUTER) { - fileType = 'image_document_video_audio'; - } - onAction(fileType); - }, - icon: , - }); + if (isVisionModel) { + items.push({ + label: localize('com_ui_upload_provider'), + onClick: () => { + setToolResource(undefined); + let fileType: Exclude = 'image_document'; + if ( + currentProvider === Providers.GOOGLE || + currentProvider === Providers.OPENROUTER + ) { + fileType = 'image_document_video_audio'; + } + onAction(fileType); + }, + icon: , + }); + } } else { - items.push({ - label: localize('com_ui_upload_image_input'), - onClick: () => { - setToolResource(undefined); - onAction('image'); - }, - icon: , - }); + if (isVisionModel) { + items.push({ + label: localize('com_ui_upload_image_input'), + onClick: () => { + setToolResource(undefined); + onAction('image'); + }, + icon: , + }); + } } if (capabilities.contextEnabled) { @@ -224,6 +245,7 @@ const AttachFileMenu = ({ codeAllowedByAgent, fileSearchAllowedByAgent, setIsSharePointDialogOpen, + isVisionModel, ]); const menuTrigger = ( diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index a59a7e3e9d61..baf31a92acc7 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -8,6 +8,7 @@ import { EModelEndpoint, defaultAgentCapabilities, isDocumentSupportedProvider, + validateVisionModel, } from 'librechat-data-provider'; import { ImageUpIcon, @@ -23,7 +24,8 @@ import { useLocalize, } from '~/hooks'; import { ephemeralAgentByConvoId } from '~/store'; -import { useDragDropContext } from '~/Providers'; +import { useDragDropContext, useChatContext } from '~/Providers'; +import { useGetStartupConfig } from '~/data-provider'; interface DragDropModalProps { onOptionSelect: (option: EToolResources | undefined) => void; @@ -48,12 +50,25 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); const { conversationId, agentId, endpoint, endpointType, useResponsesApi } = useDragDropContext(); + const { conversation } = useChatContext(); + const { data: startupConfig } = useGetStartupConfig(); const ephemeralAgent = useRecoilValue(ephemeralAgentByConvoId(conversationId ?? '')); const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( agentId, ephemeralAgent, ); + const isVisionModel = useMemo(() => { + const model = conversation?.model; + if (!model) { + return false; + } + return validateVisionModel({ + model, + modelSpecs: startupConfig?.modelSpecs, + }); + }, [conversation?.model, startupConfig?.modelSpecs]); + const options = useMemo(() => { const _options: FileOption[] = []; let currentProvider = provider || endpoint; @@ -96,15 +111,14 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD label: localize('com_ui_upload_provider'), value: undefined, icon: , - condition: validFileTypes, + condition: validFileTypes && isVisionModel, }); } else { - // Only show image upload option if all files are images and provider doesn't support documents _options.push({ label: localize('com_ui_upload_image_input'), value: undefined, icon: , - condition: files.every((file) => getFileType(file)?.startsWith('image/')), + condition: files.every((file) => getFileType(file)?.startsWith('image/')) && isVisionModel, }); } if (capabilities.fileSearchEnabled && fileSearchAllowedByAgent) { @@ -140,6 +154,7 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD useResponsesApi, codeAllowedByAgent, fileSearchAllowedByAgent, + isVisionModel, ]); if (!isVisible) { diff --git a/client/vite.config.ts b/client/vite.config.ts index b3f6541ab3cb..beea46165e79 100644 --- a/client/vite.config.ts +++ b/client/vite.config.ts @@ -8,15 +8,18 @@ import { nodePolyfills } from 'vite-plugin-node-polyfills'; import { VitePWA } from 'vite-plugin-pwa'; // https://vitejs.dev/config/ -const backendPort = process.env.BACKEND_PORT && Number(process.env.BACKEND_PORT) || 3080; -const backendURL = process.env.HOST ? `http://${process.env.HOST}:${backendPort}` : `http://localhost:${backendPort}`; +const backendPort = (process.env.BACKEND_PORT && Number(process.env.BACKEND_PORT)) || 3080; +const backendURL = process.env.HOST + ? `http://${process.env.HOST}:${backendPort}` + : `http://localhost:${backendPort}`; export default defineConfig(({ command }) => ({ base: '', server: { - allowedHosts: process.env.VITE_ALLOWED_HOSTS && process.env.VITE_ALLOWED_HOSTS.split(',') || [], + allowedHosts: + (process.env.VITE_ALLOWED_HOSTS && process.env.VITE_ALLOWED_HOSTS.split(',')) || [], host: process.env.HOST || 'localhost', - port: process.env.PORT && Number(process.env.PORT) || 3090, + port: (process.env.PORT && Number(process.env.PORT)) || 3090, strictPort: false, proxy: { '/api': { diff --git a/config/create-user.js b/config/create-user.js index 3688d736e24c..2b7608c4a982 100644 --- a/config/create-user.js +++ b/config/create-user.js @@ -14,7 +14,9 @@ const connect = require('./connect'); console.purple('--------------------------'); if (process.argv.length < 5) { - console.orange('Usage: npm run create-user -- [--email-verified=false]'); + console.orange( + 'Usage: npm run create-user -- [--email-verified=false]', + ); console.orange('Note: if you do not pass in the arguments, you will be prompted for them.'); console.orange( 'If you really need to pass in the password, you can do so as the 4th argument (not recommended for security).', @@ -88,9 +90,9 @@ If \`n\`, and email service is configured, the user will be sent a verification If \`n\`, and email service is not configured, you must have the \`ALLOW_UNVERIFIED_EMAIL_LOGIN\` .env variable set to true, or the user will need to attempt logging in to have a verification link sent to them.`); - const normalizedEmailVerifiedInput = emailVerifiedInput.trim().toLowerCase() + const normalizedEmailVerifiedInput = emailVerifiedInput.trim().toLowerCase(); - emailVerified = true + emailVerified = true; if (normalizedEmailVerifiedInput === 'n') { emailVerified = false; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 6b18c73799e9..cc410d95aba8 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -8,9 +8,10 @@ import type { GenericTool, RunConfig, IState, + ModelSpecsConfig, } from '@librechat/agents'; import type { IUser } from '@librechat/data-schemas'; -import type { Agent } from 'librechat-data-provider'; +import type { Agent, TSpecsConfig } from 'librechat-data-provider'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; @@ -71,6 +72,7 @@ export async function createRun({ tokenCounter, customHandlers, indexTokenCountMap, + modelSpecs, streaming = true, streamUsage = true, }: { @@ -81,6 +83,7 @@ export async function createRun({ streamUsage?: boolean; requestBody?: t.RequestBody; user?: IUser; + modelSpecs?: TSpecsConfig; } & Pick): Promise< Run > { @@ -154,10 +157,21 @@ export async function createRun({ buildAgentContext(agent); } + // Convert TSpecsConfig to ModelSpecsConfig (minimal type for agents package) + const convertedModelSpecs: ModelSpecsConfig | undefined = modelSpecs + ? { + list: modelSpecs.list.map((spec) => ({ + preset: spec.preset ? { model: spec.preset.model } : undefined, + vision: spec.vision, + })), + } + : undefined; + const graphConfig: RunConfig['graphConfig'] = { signal, agents: agentInputs, edges: agents[0].edges, + modelSpecs: convertedModelSpecs, }; if (agentInputs.length > 1 || ((graphConfig as MultiAgentGraphConfig).edges?.length ?? 0) > 0) { diff --git a/packages/api/src/tools/toolkits/imageContext.ts b/packages/api/src/tools/toolkits/imageContext.ts index 0485ed815a52..723f1731044f 100644 --- a/packages/api/src/tools/toolkits/imageContext.ts +++ b/packages/api/src/tools/toolkits/imageContext.ts @@ -35,4 +35,3 @@ export function buildImageToolContext({ } return toolContext; } - diff --git a/packages/api/src/utils/env.spec.ts b/packages/api/src/utils/env.spec.ts index eec15c1c2555..62180d65dfc7 100644 --- a/packages/api/src/utils/env.spec.ts +++ b/packages/api/src/utils/env.spec.ts @@ -1,10 +1,5 @@ import { TokenExchangeMethodEnum } from 'librechat-data-provider'; -import { - resolveHeaders, - resolveNestedObject, - processMCPEnv, - encodeHeaderValue, -} from './env'; +import { resolveHeaders, resolveNestedObject, processMCPEnv, encodeHeaderValue } from './env'; import type { MCPOptions } from 'librechat-data-provider'; import type { IUser } from '@librechat/data-schemas'; import { Types } from 'mongoose'; diff --git a/packages/api/src/utils/env.ts b/packages/api/src/utils/env.ts index 5a8ea19ac301..dc93e769ec26 100644 --- a/packages/api/src/utils/env.ts +++ b/packages/api/src/utils/env.ts @@ -46,10 +46,10 @@ type SafeUser = Pick; * if (headerValue.startsWith('b64:')) { * const decoded = Buffer.from(headerValue.slice(4), 'base64').toString('utf8'); * } - * + * * @param value - The string value to encode * @returns ASCII-safe string (encoded if necessary) - * + * * @example * encodeHeaderValue("José") // Returns "José" (é = 233, safe) * encodeHeaderValue("Marić") // Returns "b64:TWFyacSH" (ć = 263, needs encoding) @@ -59,17 +59,17 @@ export function encodeHeaderValue(value: string): string { if (!value || typeof value !== 'string') { return ''; } - + // Check if string contains extended Unicode characters (> 255) // Characters 0-255 (ASCII + Latin-1) are safe and don't need encoding // Characters > 255 (e.g., ć=263, đ=272, ł=322) need Base64 encoding // eslint-disable-next-line no-control-regex const hasExtendedUnicode = /[^\u0000-\u00FF]/.test(value); - + if (!hasExtendedUnicode) { return value; // Safe to pass through } - + // Encode to Base64 for extended Unicode characters const base64 = Buffer.from(value, 'utf8').toString('base64'); return `b64:${base64}`; diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 45c964cbd8a2..c6d25179c969 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1283,27 +1283,52 @@ export enum VisionModes { agents = 'agents', } +/** + * Validates whether a model supports vision capabilities. + * Checks modelSpecs configuration first, then falls back to hardcoded list. + * + * @param model - The model name to check + * @param additionalModels - Additional vision models to include in the check + * @param availableModels - List of available models (if provided, model must be in this list) + * @param modelSpecs - Optional modelSpecs configuration to check first + * @returns true if the model supports vision, false otherwise + */ export function validateVisionModel({ model, additionalModels = [], availableModels, + modelSpecs, }: { model: string; additionalModels?: string[]; availableModels?: string[]; -}) { + modelSpecs?: TSpecsConfig; +}): boolean { if (!model) { return false; } + // Exclude known non-vision models if (model.includes('gpt-4-turbo-preview') || model.includes('o1-mini')) { return false; } + // Check if model is in available models list if (availableModels && !availableModels.includes(model)) { return false; } + // Check modelSpecs first if provided + if (modelSpecs?.list) { + const matchingSpec = modelSpecs.list.find( + (spec) => spec.preset?.model === model || model.includes(spec.preset?.model ?? ''), + ); + if (matchingSpec && matchingSpec.vision !== undefined) { + return matchingSpec.vision === true; + } + } + + // Fall back to hardcoded visionModels list return visionModels.concat(additionalModels).some((visionModel) => model.includes(visionModel)); } diff --git a/packages/data-provider/src/models.ts b/packages/data-provider/src/models.ts index 3c3c19766015..7b0cc3483fdb 100644 --- a/packages/data-provider/src/models.ts +++ b/packages/data-provider/src/models.ts @@ -35,6 +35,7 @@ export type TModelSpec = { webSearch?: boolean; fileSearch?: boolean; executeCode?: boolean; + vision?: boolean; mcpServers?: string[]; }; @@ -54,6 +55,7 @@ export const tModelSpecSchema = z.object({ webSearch: z.boolean().optional(), fileSearch: z.boolean().optional(), executeCode: z.boolean().optional(), + vision: z.boolean().optional(), mcpServers: z.array(z.string()).optional(), }); diff --git a/src/tests/oidc-integration.test.ts b/src/tests/oidc-integration.test.ts index 2bbff4fd37ac..654f0e7df869 100644 --- a/src/tests/oidc-integration.test.ts +++ b/src/tests/oidc-integration.test.ts @@ -30,7 +30,8 @@ describe('OpenID Connect Federated Provider Token Integration', () => { openidId: 'cognito-user-123', federatedTokens: { access_token: 'cognito-access-token-123', - id_token: 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb2duaXRvLXVzZXItMTIzIiwiZW1haWwiOiJ0ZXN0QGV4YW1wbGUuY29tIiwibmFtZSI6IlRlc3QgVXNlciIsImV4cCI6MTcwMDAwMDAwMH0.fake-signature', + id_token: + 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb2duaXRvLXVzZXItMTIzIiwiZW1haWwiOiJ0ZXN0QGV4YW1wbGUuY29tIiwibmFtZSI6IlRlc3QgVXNlciIsImV4cCI6MTcwMDAwMDAwMH0.fake-signature', expires_at: Math.floor(Date.now() / 1000) + 3600, // Expires in 1 hour }, }; @@ -272,7 +273,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { describe('Integration with resolveHeaders', () => { it('should resolve OpenID Connect placeholders in headers for Cognito', () => { const headers = { - 'Authorization': '{{LIBRECHAT_OPENID_TOKEN}}', + Authorization: '{{LIBRECHAT_OPENID_TOKEN}}', 'X-User-ID': '{{LIBRECHAT_OPENID_USER_ID}}', 'X-User-Email': '{{LIBRECHAT_OPENID_USER_EMAIL}}', }; @@ -289,7 +290,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { it('should work with Bearer token format for Cognito', () => { const headers = { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -302,7 +303,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { it('should work with specific access token placeholder', () => { const headers = { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', 'X-Cognito-ID-Token': '{{LIBRECHAT_OPENID_ID_TOKEN}}', }; @@ -312,7 +313,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); expect(resolvedHeaders['Authorization']).toBe('Bearer cognito-access-token-123'); - expect(resolvedHeaders['X-Cognito-ID-Token']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); + expect(resolvedHeaders['X-Cognito-ID-Token']).toContain( + 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', + ); }); }); @@ -322,9 +325,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { command: 'node', args: ['server.js'], env: { - 'COGNITO_ACCESS_TOKEN': '{{LIBRECHAT_OPENID_TOKEN}}', - 'USER_ID': '{{LIBRECHAT_OPENID_USER_ID}}', - 'USER_EMAIL': '{{LIBRECHAT_OPENID_USER_EMAIL}}', + COGNITO_ACCESS_TOKEN: '{{LIBRECHAT_OPENID_TOKEN}}', + USER_ID: '{{LIBRECHAT_OPENID_USER_ID}}', + USER_EMAIL: '{{LIBRECHAT_OPENID_USER_EMAIL}}', }, }; @@ -343,7 +346,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { type: 'sse' as const, url: 'https://api.example.com/mcp', headers: { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', 'X-Cognito-User-Info': '{{LIBRECHAT_OPENID_USER_EMAIL}}', 'X-Cognito-ID-Token': '{{LIBRECHAT_OPENID_ID_TOKEN}}', }, @@ -356,7 +359,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { expect(processedOptions.headers?.['Authorization']).toBe('Bearer cognito-access-token-123'); expect(processedOptions.headers?.['X-Cognito-User-Info']).toBe('test@example.com'); - expect(processedOptions.headers?.['X-Cognito-ID-Token']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); + expect(processedOptions.headers?.['X-Cognito-ID-Token']).toContain( + 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', + ); }); it('should handle AWS-specific MCP server configuration', () => { @@ -364,9 +369,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { command: 'node', args: ['aws-mcp-server.js'], env: { - 'AWS_COGNITO_TOKEN': '{{LIBRECHAT_OPENID_ACCESS_TOKEN}}', - 'AWS_COGNITO_ID_TOKEN': '{{LIBRECHAT_OPENID_ID_TOKEN}}', - 'COGNITO_USER_SUB': '{{LIBRECHAT_OPENID_USER_ID}}', + AWS_COGNITO_TOKEN: '{{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + AWS_COGNITO_ID_TOKEN: '{{LIBRECHAT_OPENID_ID_TOKEN}}', + COGNITO_USER_SUB: '{{LIBRECHAT_OPENID_USER_ID}}', }, }; @@ -376,7 +381,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); expect(processedOptions.env?.['AWS_COGNITO_TOKEN']).toBe('cognito-access-token-123'); - expect(processedOptions.env?.['AWS_COGNITO_ID_TOKEN']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); + expect(processedOptions.env?.['AWS_COGNITO_ID_TOKEN']).toContain( + 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', + ); expect(processedOptions.env?.['COGNITO_USER_SUB']).toBe('cognito-user-123'); }); }); @@ -384,7 +391,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { describe('Security and Edge Cases', () => { it('should not process OpenID Connect placeholders for expired tokens', () => { const headers = { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -405,7 +412,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }; const headers = { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -418,7 +425,8 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); it('should handle multiple placeholder instances in same string', () => { - const template = '{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_USER_ID}}'; + const template = + '{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_USER_ID}}'; const tokenInfo: OpenIDTokenInfo = { accessToken: 'cognito-token123', @@ -439,7 +447,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }; const headers = { - 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -470,4 +478,4 @@ describe('OpenID Connect Federated Provider Token Integration', () => { expect(tokenInfo?.accessToken).toBe('federated-priority-token'); }); }); -}); \ No newline at end of file +}); From 488dd2d496869b4382d969a054d764456b62c34b Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 15:49:57 +0100 Subject: [PATCH 09/23] refactor: replace validateVisionModel usage with useVisionModel hook - Removed direct calls to validateVisionModel in AttachFileMenu and DragDropModal components. - Introduced useVisionModel hook to encapsulate vision model validation logic. - Updated imports to reflect the new hook usage, improving code modularity and readability. --- .../Chat/Input/Files/AttachFileMenu.tsx | 16 ++----------- .../Chat/Input/Files/DragDropModal.tsx | 19 +++------------ client/src/hooks/index.ts | 1 + client/src/hooks/useVisionModel.ts | 24 +++++++++++++++++++ packages/data-provider/src/config.ts | 8 +------ 5 files changed, 31 insertions(+), 37 deletions(-) create mode 100644 client/src/hooks/useVisionModel.ts diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index cb3d2fede55f..a3ee7458a85b 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -14,7 +14,6 @@ import { EModelEndpoint, defaultAgentCapabilities, isDocumentSupportedProvider, - validateVisionModel, } from 'librechat-data-provider'; import { FileUpload, @@ -37,7 +36,7 @@ import { useGetStartupConfig } from '~/data-provider'; import { ephemeralAgentByConvoId } from '~/store'; import { MenuItemProps } from '~/common'; import { cn } from '~/utils'; -import { useChatContext } from '~/Providers'; +import { useVisionModel } from '~/hooks'; type FileUploadType = 'image' | 'document' | 'image_document' | 'image_document_video_audio'; @@ -75,19 +74,8 @@ const AttachFileMenu = ({ const { agentsConfig } = useGetAgentsConfig(); const { data: startupConfig } = useGetStartupConfig(); - const { conversation } = useChatContext(); const sharePointEnabled = startupConfig?.sharePointFilePickerEnabled; - - const isVisionModel = useMemo(() => { - const model = conversation?.model; - if (!model) { - return false; - } - return validateVisionModel({ - model, - modelSpecs: startupConfig?.modelSpecs, - }); - }, [conversation?.model, startupConfig?.modelSpecs]); + const isVisionModel = useVisionModel(); const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false); diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index baf31a92acc7..c422c371a99b 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -8,7 +8,6 @@ import { EModelEndpoint, defaultAgentCapabilities, isDocumentSupportedProvider, - validateVisionModel, } from 'librechat-data-provider'; import { ImageUpIcon, @@ -24,8 +23,8 @@ import { useLocalize, } from '~/hooks'; import { ephemeralAgentByConvoId } from '~/store'; -import { useDragDropContext, useChatContext } from '~/Providers'; -import { useGetStartupConfig } from '~/data-provider'; +import { useDragDropContext } from '~/Providers'; +import { useVisionModel } from '~/hooks'; interface DragDropModalProps { onOptionSelect: (option: EToolResources | undefined) => void; @@ -50,24 +49,12 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); const { conversationId, agentId, endpoint, endpointType, useResponsesApi } = useDragDropContext(); - const { conversation } = useChatContext(); - const { data: startupConfig } = useGetStartupConfig(); const ephemeralAgent = useRecoilValue(ephemeralAgentByConvoId(conversationId ?? '')); const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( agentId, ephemeralAgent, ); - - const isVisionModel = useMemo(() => { - const model = conversation?.model; - if (!model) { - return false; - } - return validateVisionModel({ - model, - modelSpecs: startupConfig?.modelSpecs, - }); - }, [conversation?.model, startupConfig?.modelSpecs]); + const isVisionModel = useVisionModel(); const options = useMemo(() => { const _options: FileOption[] = []; diff --git a/client/src/hooks/index.ts b/client/src/hooks/index.ts index 62682b84d8bb..6e1431421817 100644 --- a/client/src/hooks/index.ts +++ b/client/src/hooks/index.ts @@ -36,3 +36,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech'; export { default as useGenerationsByLatest } from './useGenerationsByLatest'; export { default as useLocalizedConfig } from './useLocalizedConfig'; export { default as useResourcePermissions } from './useResourcePermissions'; +export { useVisionModel } from './useVisionModel'; diff --git a/client/src/hooks/useVisionModel.ts b/client/src/hooks/useVisionModel.ts new file mode 100644 index 000000000000..61c8d74b91d0 --- /dev/null +++ b/client/src/hooks/useVisionModel.ts @@ -0,0 +1,24 @@ +import { useMemo } from 'react'; +import { validateVisionModel } from 'librechat-data-provider'; +import { useChatContext } from '~/Providers'; +import { useGetStartupConfig } from '~/data-provider'; + +/** + * Hook to determine if the current conversation model supports vision capabilities. + * Checks modelSpecs configuration first, then falls back to hardcoded list. + */ +export function useVisionModel(): boolean { + const { conversation } = useChatContext(); + const { data: startupConfig } = useGetStartupConfig(); + + return useMemo(() => { + const model = conversation?.model; + if (!model) { + return false; + } + return validateVisionModel({ + model, + modelSpecs: startupConfig?.modelSpecs, + }); + }, [conversation?.model, startupConfig?.modelSpecs]); +} diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index c6d25179c969..bd4a3624dd48 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1286,12 +1286,6 @@ export enum VisionModes { /** * Validates whether a model supports vision capabilities. * Checks modelSpecs configuration first, then falls back to hardcoded list. - * - * @param model - The model name to check - * @param additionalModels - Additional vision models to include in the check - * @param availableModels - List of available models (if provided, model must be in this list) - * @param modelSpecs - Optional modelSpecs configuration to check first - * @returns true if the model supports vision, false otherwise */ export function validateVisionModel({ model, @@ -1323,7 +1317,7 @@ export function validateVisionModel({ const matchingSpec = modelSpecs.list.find( (spec) => spec.preset?.model === model || model.includes(spec.preset?.model ?? ''), ); - if (matchingSpec && matchingSpec.vision !== undefined) { + if (matchingSpec?.vision !== undefined) { return matchingSpec.vision === true; } } From 87f8d1706a24996f51c5356d0dd5ffd86e106488 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 19:00:06 +0100 Subject: [PATCH 10/23] refactor: remove modelSpecs from agents API (agent-specific concern) - Remove modelSpecs parameter from createRun() function - Remove modelSpecs conversion logic (handled by agent-level vision toggle) - Remove modelSpecs from createRun() call in client.js - This keeps PR 11501 focused on modelSpecs vision for UI gating only --- api/server/controllers/agents/client.js | 1 - packages/api/src/agents/run.ts | 14 +------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 9f7719454439..35cf7de784fe 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -1035,7 +1035,6 @@ class AgentClient extends BaseClient { requestBody: config.configurable.requestBody, user: createSafeUser(this.options.req?.user), tokenCounter: createTokenCounter(this.getEncoding()), - modelSpecs: appConfig.modelSpecs, }); if (!run) { diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index cc410d95aba8..781320cdc621 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -8,10 +8,9 @@ import type { GenericTool, RunConfig, IState, - ModelSpecsConfig, } from '@librechat/agents'; import type { IUser } from '@librechat/data-schemas'; -import type { Agent, TSpecsConfig } from 'librechat-data-provider'; +import type { Agent } from 'librechat-data-provider'; import type * as t from '~/types'; import { resolveHeaders, createSafeUser } from '~/utils/env'; @@ -157,21 +156,10 @@ export async function createRun({ buildAgentContext(agent); } - // Convert TSpecsConfig to ModelSpecsConfig (minimal type for agents package) - const convertedModelSpecs: ModelSpecsConfig | undefined = modelSpecs - ? { - list: modelSpecs.list.map((spec) => ({ - preset: spec.preset ? { model: spec.preset.model } : undefined, - vision: spec.vision, - })), - } - : undefined; - const graphConfig: RunConfig['graphConfig'] = { signal, agents: agentInputs, edges: agents[0].edges, - modelSpecs: convertedModelSpecs, }; if (agentInputs.length > 1 || ((graphConfig as MultiAgentGraphConfig).edges?.length ?? 0) > 0) { From 5f8d81bf4ca50d88c1d9098879941a89dbd74e45 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 19:20:26 +0100 Subject: [PATCH 11/23] chore: Revert formatting changes --- client/vite.config.ts | 11 ++--- config/create-user.js | 8 ++-- .../api/src/tools/toolkits/imageContext.ts | 1 + packages/api/src/utils/env.spec.ts | 7 ++- packages/api/src/utils/env.ts | 10 ++-- src/tests/oidc-integration.test.ts | 46 ++++++++----------- 6 files changed, 38 insertions(+), 45 deletions(-) diff --git a/client/vite.config.ts b/client/vite.config.ts index beea46165e79..b3f6541ab3cb 100644 --- a/client/vite.config.ts +++ b/client/vite.config.ts @@ -8,18 +8,15 @@ import { nodePolyfills } from 'vite-plugin-node-polyfills'; import { VitePWA } from 'vite-plugin-pwa'; // https://vitejs.dev/config/ -const backendPort = (process.env.BACKEND_PORT && Number(process.env.BACKEND_PORT)) || 3080; -const backendURL = process.env.HOST - ? `http://${process.env.HOST}:${backendPort}` - : `http://localhost:${backendPort}`; +const backendPort = process.env.BACKEND_PORT && Number(process.env.BACKEND_PORT) || 3080; +const backendURL = process.env.HOST ? `http://${process.env.HOST}:${backendPort}` : `http://localhost:${backendPort}`; export default defineConfig(({ command }) => ({ base: '', server: { - allowedHosts: - (process.env.VITE_ALLOWED_HOSTS && process.env.VITE_ALLOWED_HOSTS.split(',')) || [], + allowedHosts: process.env.VITE_ALLOWED_HOSTS && process.env.VITE_ALLOWED_HOSTS.split(',') || [], host: process.env.HOST || 'localhost', - port: (process.env.PORT && Number(process.env.PORT)) || 3090, + port: process.env.PORT && Number(process.env.PORT) || 3090, strictPort: false, proxy: { '/api': { diff --git a/config/create-user.js b/config/create-user.js index 2b7608c4a982..3688d736e24c 100644 --- a/config/create-user.js +++ b/config/create-user.js @@ -14,9 +14,7 @@ const connect = require('./connect'); console.purple('--------------------------'); if (process.argv.length < 5) { - console.orange( - 'Usage: npm run create-user -- [--email-verified=false]', - ); + console.orange('Usage: npm run create-user -- [--email-verified=false]'); console.orange('Note: if you do not pass in the arguments, you will be prompted for them.'); console.orange( 'If you really need to pass in the password, you can do so as the 4th argument (not recommended for security).', @@ -90,9 +88,9 @@ If \`n\`, and email service is configured, the user will be sent a verification If \`n\`, and email service is not configured, you must have the \`ALLOW_UNVERIFIED_EMAIL_LOGIN\` .env variable set to true, or the user will need to attempt logging in to have a verification link sent to them.`); - const normalizedEmailVerifiedInput = emailVerifiedInput.trim().toLowerCase(); + const normalizedEmailVerifiedInput = emailVerifiedInput.trim().toLowerCase() - emailVerified = true; + emailVerified = true if (normalizedEmailVerifiedInput === 'n') { emailVerified = false; diff --git a/packages/api/src/tools/toolkits/imageContext.ts b/packages/api/src/tools/toolkits/imageContext.ts index 723f1731044f..0485ed815a52 100644 --- a/packages/api/src/tools/toolkits/imageContext.ts +++ b/packages/api/src/tools/toolkits/imageContext.ts @@ -35,3 +35,4 @@ export function buildImageToolContext({ } return toolContext; } + diff --git a/packages/api/src/utils/env.spec.ts b/packages/api/src/utils/env.spec.ts index 62180d65dfc7..eec15c1c2555 100644 --- a/packages/api/src/utils/env.spec.ts +++ b/packages/api/src/utils/env.spec.ts @@ -1,5 +1,10 @@ import { TokenExchangeMethodEnum } from 'librechat-data-provider'; -import { resolveHeaders, resolveNestedObject, processMCPEnv, encodeHeaderValue } from './env'; +import { + resolveHeaders, + resolveNestedObject, + processMCPEnv, + encodeHeaderValue, +} from './env'; import type { MCPOptions } from 'librechat-data-provider'; import type { IUser } from '@librechat/data-schemas'; import { Types } from 'mongoose'; diff --git a/packages/api/src/utils/env.ts b/packages/api/src/utils/env.ts index dc93e769ec26..5a8ea19ac301 100644 --- a/packages/api/src/utils/env.ts +++ b/packages/api/src/utils/env.ts @@ -46,10 +46,10 @@ type SafeUser = Pick; * if (headerValue.startsWith('b64:')) { * const decoded = Buffer.from(headerValue.slice(4), 'base64').toString('utf8'); * } - * + * * @param value - The string value to encode * @returns ASCII-safe string (encoded if necessary) - * + * * @example * encodeHeaderValue("José") // Returns "José" (é = 233, safe) * encodeHeaderValue("Marić") // Returns "b64:TWFyacSH" (ć = 263, needs encoding) @@ -59,17 +59,17 @@ export function encodeHeaderValue(value: string): string { if (!value || typeof value !== 'string') { return ''; } - + // Check if string contains extended Unicode characters (> 255) // Characters 0-255 (ASCII + Latin-1) are safe and don't need encoding // Characters > 255 (e.g., ć=263, đ=272, ł=322) need Base64 encoding // eslint-disable-next-line no-control-regex const hasExtendedUnicode = /[^\u0000-\u00FF]/.test(value); - + if (!hasExtendedUnicode) { return value; // Safe to pass through } - + // Encode to Base64 for extended Unicode characters const base64 = Buffer.from(value, 'utf8').toString('base64'); return `b64:${base64}`; diff --git a/src/tests/oidc-integration.test.ts b/src/tests/oidc-integration.test.ts index 654f0e7df869..2bbff4fd37ac 100644 --- a/src/tests/oidc-integration.test.ts +++ b/src/tests/oidc-integration.test.ts @@ -30,8 +30,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { openidId: 'cognito-user-123', federatedTokens: { access_token: 'cognito-access-token-123', - id_token: - 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb2duaXRvLXVzZXItMTIzIiwiZW1haWwiOiJ0ZXN0QGV4YW1wbGUuY29tIiwibmFtZSI6IlRlc3QgVXNlciIsImV4cCI6MTcwMDAwMDAwMH0.fake-signature', + id_token: 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb2duaXRvLXVzZXItMTIzIiwiZW1haWwiOiJ0ZXN0QGV4YW1wbGUuY29tIiwibmFtZSI6IlRlc3QgVXNlciIsImV4cCI6MTcwMDAwMDAwMH0.fake-signature', expires_at: Math.floor(Date.now() / 1000) + 3600, // Expires in 1 hour }, }; @@ -273,7 +272,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { describe('Integration with resolveHeaders', () => { it('should resolve OpenID Connect placeholders in headers for Cognito', () => { const headers = { - Authorization: '{{LIBRECHAT_OPENID_TOKEN}}', + 'Authorization': '{{LIBRECHAT_OPENID_TOKEN}}', 'X-User-ID': '{{LIBRECHAT_OPENID_USER_ID}}', 'X-User-Email': '{{LIBRECHAT_OPENID_USER_EMAIL}}', }; @@ -290,7 +289,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { it('should work with Bearer token format for Cognito', () => { const headers = { - Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -303,7 +302,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { it('should work with specific access token placeholder', () => { const headers = { - Authorization: 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', 'X-Cognito-ID-Token': '{{LIBRECHAT_OPENID_ID_TOKEN}}', }; @@ -313,9 +312,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); expect(resolvedHeaders['Authorization']).toBe('Bearer cognito-access-token-123'); - expect(resolvedHeaders['X-Cognito-ID-Token']).toContain( - 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', - ); + expect(resolvedHeaders['X-Cognito-ID-Token']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); }); }); @@ -325,9 +322,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { command: 'node', args: ['server.js'], env: { - COGNITO_ACCESS_TOKEN: '{{LIBRECHAT_OPENID_TOKEN}}', - USER_ID: '{{LIBRECHAT_OPENID_USER_ID}}', - USER_EMAIL: '{{LIBRECHAT_OPENID_USER_EMAIL}}', + 'COGNITO_ACCESS_TOKEN': '{{LIBRECHAT_OPENID_TOKEN}}', + 'USER_ID': '{{LIBRECHAT_OPENID_USER_ID}}', + 'USER_EMAIL': '{{LIBRECHAT_OPENID_USER_EMAIL}}', }, }; @@ -346,7 +343,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { type: 'sse' as const, url: 'https://api.example.com/mcp', headers: { - Authorization: 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_ACCESS_TOKEN}}', 'X-Cognito-User-Info': '{{LIBRECHAT_OPENID_USER_EMAIL}}', 'X-Cognito-ID-Token': '{{LIBRECHAT_OPENID_ID_TOKEN}}', }, @@ -359,9 +356,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { expect(processedOptions.headers?.['Authorization']).toBe('Bearer cognito-access-token-123'); expect(processedOptions.headers?.['X-Cognito-User-Info']).toBe('test@example.com'); - expect(processedOptions.headers?.['X-Cognito-ID-Token']).toContain( - 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', - ); + expect(processedOptions.headers?.['X-Cognito-ID-Token']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); }); it('should handle AWS-specific MCP server configuration', () => { @@ -369,9 +364,9 @@ describe('OpenID Connect Federated Provider Token Integration', () => { command: 'node', args: ['aws-mcp-server.js'], env: { - AWS_COGNITO_TOKEN: '{{LIBRECHAT_OPENID_ACCESS_TOKEN}}', - AWS_COGNITO_ID_TOKEN: '{{LIBRECHAT_OPENID_ID_TOKEN}}', - COGNITO_USER_SUB: '{{LIBRECHAT_OPENID_USER_ID}}', + 'AWS_COGNITO_TOKEN': '{{LIBRECHAT_OPENID_ACCESS_TOKEN}}', + 'AWS_COGNITO_ID_TOKEN': '{{LIBRECHAT_OPENID_ID_TOKEN}}', + 'COGNITO_USER_SUB': '{{LIBRECHAT_OPENID_USER_ID}}', }, }; @@ -381,9 +376,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); expect(processedOptions.env?.['AWS_COGNITO_TOKEN']).toBe('cognito-access-token-123'); - expect(processedOptions.env?.['AWS_COGNITO_ID_TOKEN']).toContain( - 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9', - ); + expect(processedOptions.env?.['AWS_COGNITO_ID_TOKEN']).toContain('eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9'); expect(processedOptions.env?.['COGNITO_USER_SUB']).toBe('cognito-user-123'); }); }); @@ -391,7 +384,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { describe('Security and Edge Cases', () => { it('should not process OpenID Connect placeholders for expired tokens', () => { const headers = { - Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -412,7 +405,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }; const headers = { - Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -425,8 +418,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }); it('should handle multiple placeholder instances in same string', () => { - const template = - '{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_USER_ID}}'; + const template = '{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_TOKEN}}-{{LIBRECHAT_OPENID_USER_ID}}'; const tokenInfo: OpenIDTokenInfo = { accessToken: 'cognito-token123', @@ -447,7 +439,7 @@ describe('OpenID Connect Federated Provider Token Integration', () => { }; const headers = { - Authorization: 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', + 'Authorization': 'Bearer {{LIBRECHAT_OPENID_TOKEN}}', }; const resolvedHeaders = resolveHeaders({ @@ -478,4 +470,4 @@ describe('OpenID Connect Federated Provider Token Integration', () => { expect(tokenInfo?.accessToken).toBe('federated-priority-token'); }); }); -}); +}); \ No newline at end of file From ee4ee304de1eca2a623a0b343f8fe81817fa62de Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 20:43:18 +0100 Subject: [PATCH 12/23] feat: Auto-detect OpenAI-compatible endpoints for MCP content formatting Automatically recognize and format MCP tool responses for all OpenAI-compatible custom endpoints without requiring explicit additions. Uses negative list (NON_OPENAI_PROVIDERS) instead of maintaining positive list for each new provider. --- .../api/src/mcp/__tests__/parsers.test.ts | 66 +++++++++++++++++ packages/api/src/mcp/parsers.ts | 72 +++++++++++++++++-- 2 files changed, 131 insertions(+), 7 deletions(-) diff --git a/packages/api/src/mcp/__tests__/parsers.test.ts b/packages/api/src/mcp/__tests__/parsers.test.ts index dd9a09a0fb35..71e27b09ab79 100644 --- a/packages/api/src/mcp/__tests__/parsers.test.ts +++ b/packages/api/src/mcp/__tests__/parsers.test.ts @@ -29,6 +29,72 @@ describe('formatToolContent', () => { expect(content).toBe('(No response)'); expect(artifacts).toBeUndefined(); }); + + it('should return string for known non-OpenAI providers', () => { + const result: t.MCPToolCallResponse = { + content: [{ type: 'text', text: 'Test content' }], + }; + const [content, artifacts] = formatToolContent(result, 'google' as t.Provider); + // Google is recognized but uses array format, so this should be an array + expect(Array.isArray(content)).toBe(true); + }); + }); + + describe('automatic detection of OpenAI-compatible custom endpoints', () => { + it('should automatically recognize new OpenAI-compatible custom endpoints', () => { + const result: t.MCPToolCallResponse = { + content: [ + { type: 'text', text: 'First text' }, + { type: 'text', text: 'Second text' }, + ], + }; + + // Test with a custom endpoint that's not explicitly listed + const [content, artifacts] = formatToolContent(result, 'scaleway' as t.Provider); + // Should be recognized and use array format (OpenAI-compatible) + expect(Array.isArray(content)).toBe(true); + expect(content).toEqual([{ type: 'text', text: 'First text\n\nSecond text' }]); + expect(artifacts).toBeUndefined(); + }); + + it('should use array format for unknown OpenAI-compatible endpoints', () => { + const result: t.MCPToolCallResponse = { + content: [ + { type: 'text', text: 'Before image' }, + { type: 'image', data: 'base64data', mimeType: 'image/png' }, + { type: 'text', text: 'After image' }, + ], + }; + + // Test with another custom endpoint (e.g., together, perplexity, anyscale) + const [content, artifacts] = formatToolContent(result, 'together' as t.Provider); + // Should use array format like OpenAI + expect(Array.isArray(content)).toBe(true); + expect(content).toEqual([ + { type: 'text', text: 'Before image' }, + { type: 'text', text: 'After image' }, + ]); + expect(artifacts).toEqual({ + content: [ + { + type: 'image_url', + image_url: { url: 'data:image/png;base64,base64data' }, + }, + ], + }); + }); + + it('should NOT recognize known non-OpenAI providers', () => { + const result: t.MCPToolCallResponse = { + content: [{ type: 'text', text: 'Test content' }], + }; + + // Non-OpenAI providers should return string format + const [content, artifacts] = formatToolContent(result, 'bedrock' as t.Provider); + expect(typeof content).toBe('string'); + expect(content).toBe('Test content'); + expect(artifacts).toBeUndefined(); + }); }); describe('recognized providers - content array providers', () => { diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index 87fdb6f83619..17ac406d02a4 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -7,6 +7,10 @@ function generateResourceId(text: string): string { return crypto.createHash('sha256').update(text).digest('hex').substring(0, 10); } +// Known providers that are NOT OpenAI-compatible +// This is a small, stable list that rarely changes +const NON_OPENAI_PROVIDERS = new Set(['google', 'anthropic', 'bedrock', 'ollama']); + const RECOGNIZED_PROVIDERS = new Set([ 'google', 'anthropic', @@ -17,17 +21,71 @@ const RECOGNIZED_PROVIDERS = new Set([ 'deepseek', 'ollama', 'bedrock', - 'scaleway', + // Note: Custom OpenAI-compatible endpoints (like scaleway, together, perplexity, etc.) + // are automatically recognized if they're not in NON_OPENAI_PROVIDERS ]); +// Known providers that use content array format (structured content blocks) +// These are the standard OpenAI-compatible providers plus Google and Anthropic +const CONTENT_ARRAY_PROVIDERS = new Set(['google', 'anthropic', 'azureopenai', 'openai']); + /** * Check if a provider should receive structured content formatting for MCP tool responses. - * Custom endpoints are passed with their endpoint name, so they need to be explicitly added to RECOGNIZED_PROVIDERS. + * + * Recognizes: + * 1. Explicitly listed providers in RECOGNIZED_PROVIDERS + * 2. Custom OpenAI-compatible endpoints (any provider not in NON_OPENAI_PROVIDERS) + * + * Custom endpoints are passed with their endpoint name (not "openai"), so we automatically + * detect them rather than requiring explicit additions for each new provider. */ function isRecognizedProvider(provider: t.Provider): boolean { - return RECOGNIZED_PROVIDERS.has(provider); + // Check explicit list first (for known providers) + if (RECOGNIZED_PROVIDERS.has(provider)) { + return true; + } + + // If not explicitly recognized and not a known non-OpenAI provider, + // assume it's an OpenAI-compatible custom endpoint + // This automatically supports all new OpenAI-compatible providers without code changes + if (!NON_OPENAI_PROVIDERS.has(provider)) { + return true; + } + + return false; +} + +/** + * Check if a provider uses content array format (structured content blocks). + * + * Uses array format: + * - Standard OpenAI-compatible providers (openai, azureopenai) + * - Google and Anthropic (native array format) + * - New unknown custom endpoints (assumed OpenAI-compatible, so use array format) + * + * Uses string format: + * - Known custom providers with special handling (openrouter, xai, deepseek) + * - Other non-OpenAI providers (ollama, bedrock) + */ +function usesContentArrayFormat(provider: t.Provider): boolean { + // Explicit array format providers + if (CONTENT_ARRAY_PROVIDERS.has(provider)) { + return true; + } + + // Known custom providers that use string format (despite being OpenAI-compatible) + if (['openrouter', 'xai', 'deepseek'].includes(provider)) { + return false; + } + + // Unknown providers: if not a known non-OpenAI provider, assume OpenAI-compatible + // and use array format (like standard OpenAI endpoints) + if (!NON_OPENAI_PROVIDERS.has(provider)) { + return true; + } + + return false; } -const CONTENT_ARRAY_PROVIDERS = new Set(['google', 'anthropic', 'azureopenai', 'openai']); const imageFormatters: Record = { // google: (item) => ({ @@ -131,7 +189,7 @@ export function formatToolContent( if (!isImageContent(item)) { return; } - if (CONTENT_ARRAY_PROVIDERS.has(provider) && currentTextBlock) { + if (usesContentArrayFormat(provider) && currentTextBlock) { formattedContent.push({ type: 'text', text: currentTextBlock }); currentTextBlock = ''; } @@ -204,7 +262,7 @@ UI Resource Markers Available: currentTextBlock += uiInstructions; } - if (CONTENT_ARRAY_PROVIDERS.has(provider) && currentTextBlock) { + if (usesContentArrayFormat(provider) && currentTextBlock) { formattedContent.push({ type: 'text', text: currentTextBlock }); } @@ -220,7 +278,7 @@ UI Resource Markers Available: }; } - if (CONTENT_ARRAY_PROVIDERS.has(provider)) { + if (usesContentArrayFormat(provider)) { return [formattedContent, artifacts]; } From da07dce564f55e59005f6d6961a65af7ed0bc080 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Sat, 24 Jan 2026 15:51:33 +0100 Subject: [PATCH 13/23] feat: add vision toggle for agents - Add vision to AgentCapabilities enum and default capabilities - Add vision?: boolean field to Agent type and validation schema - Add vision toggle UI component for agents with hover card and info description - Include vision in agent create/update payload - Pass vision from agent to AgentInputs in run API Depends on PR #11501 (modelSpecs vision) for validateVisionModel function --- client/src/common/agents-types.ts | 1 + .../SidePanel/Agents/AgentConfig.tsx | 22 ++++- .../SidePanel/Agents/AgentPanel.tsx | 2 + .../SidePanel/Agents/AgentSelect.tsx | 1 + .../SidePanel/Agents/ImageVision.tsx | 89 ++++++++++++------- .../src/hooks/Agents/useAgentCapabilities.ts | 7 ++ client/src/locales/en/translation.json | 2 + packages/api/src/agents/run.ts | 3 +- packages/api/src/agents/validation.ts | 1 + packages/data-provider/src/config.ts | 2 + .../data-provider/src/types/assistants.ts | 1 + 11 files changed, 96 insertions(+), 35 deletions(-) diff --git a/client/src/common/agents-types.ts b/client/src/common/agents-types.ts index 9ac6b440a397..5d43d99a8d7e 100644 --- a/client/src/common/agents-types.ts +++ b/client/src/common/agents-types.ts @@ -20,6 +20,7 @@ export type TAgentCapabilities = { [AgentCapabilities.web_search]: boolean; [AgentCapabilities.file_search]: boolean; [AgentCapabilities.execute_code]: boolean; + [AgentCapabilities.vision]: boolean; [AgentCapabilities.end_after_tools]?: boolean; [AgentCapabilities.hide_sequential_outputs]?: boolean; }; diff --git a/client/src/components/SidePanel/Agents/AgentConfig.tsx b/client/src/components/SidePanel/Agents/AgentConfig.tsx index a81ef780a9e3..f78e132f2e0d 100644 --- a/client/src/components/SidePanel/Agents/AgentConfig.tsx +++ b/client/src/components/SidePanel/Agents/AgentConfig.tsx @@ -1,7 +1,7 @@ import React, { useState, useMemo, useCallback } from 'react'; import { useToastContext } from '@librechat/client'; import { Controller, useWatch, useFormContext } from 'react-hook-form'; -import { EModelEndpoint, getEndpointField } from 'librechat-data-provider'; +import { EModelEndpoint, getEndpointField, defaultAgentCapabilities } from 'librechat-data-provider'; import type { AgentForm, IconComponentTypes } from '~/common'; import { removeFocusOutlines, @@ -29,6 +29,7 @@ import Artifacts from './Artifacts'; import AgentTool from './AgentTool'; import CodeForm from './Code/Form'; import MCPTools from './MCPTools'; +import ImageVision from './ImageVision'; const labelClass = 'mb-2 text-token-text-primary block font-medium'; const inputClass = cn( @@ -85,7 +86,8 @@ export default function AgentConfig() { artifactsEnabled, webSearchEnabled, fileSearchEnabled, - } = useAgentCapabilities(agentsConfig?.capabilities); + visionEnabled, + } = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); const context_files = useMemo(() => { if (typeof agent === 'string') { @@ -288,7 +290,8 @@ export default function AgentConfig() { fileSearchEnabled || artifactsEnabled || contextEnabled || - webSearchEnabled) && ( + webSearchEnabled || + visionEnabled) && (
)} {/* MCP Section */} diff --git a/client/src/components/SidePanel/Agents/AgentPanel.tsx b/client/src/components/SidePanel/Agents/AgentPanel.tsx index 86ec27dc5e1d..080d89c275fe 100644 --- a/client/src/components/SidePanel/Agents/AgentPanel.tsx +++ b/client/src/components/SidePanel/Agents/AgentPanel.tsx @@ -69,6 +69,7 @@ export function composeAgentUpdatePayload(data: AgentForm, agent_id?: string | n edges, end_after_tools, hide_sequential_outputs, + vision, recursion_limit, category, support_contact, @@ -94,6 +95,7 @@ export function composeAgentUpdatePayload(data: AgentForm, agent_id?: string | n edges, end_after_tools, hide_sequential_outputs, + vision, recursion_limit, category, support_contact, diff --git a/client/src/components/SidePanel/Agents/AgentSelect.tsx b/client/src/components/SidePanel/Agents/AgentSelect.tsx index 9a3ef387c9b9..345f6356c1ac 100644 --- a/client/src/components/SidePanel/Agents/AgentSelect.tsx +++ b/client/src/components/SidePanel/Agents/AgentSelect.tsx @@ -58,6 +58,7 @@ export default function AgentSelect({ [AgentCapabilities.web_search]: false, [AgentCapabilities.file_search]: false, [AgentCapabilities.execute_code]: false, + [AgentCapabilities.vision]: false, [AgentCapabilities.end_after_tools]: false, [AgentCapabilities.hide_sequential_outputs]: false, }; diff --git a/client/src/components/SidePanel/Agents/ImageVision.tsx b/client/src/components/SidePanel/Agents/ImageVision.tsx index bc4e1178966b..643f144ec89b 100644 --- a/client/src/components/SidePanel/Agents/ImageVision.tsx +++ b/client/src/components/SidePanel/Agents/ImageVision.tsx @@ -1,40 +1,69 @@ -import { Checkbox } from '@librechat/client'; -import { Capabilities } from 'librechat-data-provider'; +import { memo } from 'react'; +import { AgentCapabilities } from 'librechat-data-provider'; import { useFormContext, Controller } from 'react-hook-form'; +import { + Checkbox, + HoverCard, + HoverCardContent, + HoverCardPortal, + HoverCardTrigger, + CircleHelpIcon, +} from '@librechat/client'; import type { AgentForm } from '~/common'; import { useLocalize } from '~/hooks'; +import { ESide } from '~/common'; -export default function ImageVision() { +function ImageVision() { const localize = useLocalize(); const methods = useFormContext(); - const { control, setValue, getValues } = methods; + const { control } = methods; return ( -
- ( - - )} - /> - -
+ +
+ ( + + )} + /> + + + + + + +
+

+ {localize('com_agents_image_vision_info')} +

+
+
+
+
+
); } + +export default memo(ImageVision); diff --git a/client/src/hooks/Agents/useAgentCapabilities.ts b/client/src/hooks/Agents/useAgentCapabilities.ts index 8d2bd6ef87ea..571bdfe175ed 100644 --- a/client/src/hooks/Agents/useAgentCapabilities.ts +++ b/client/src/hooks/Agents/useAgentCapabilities.ts @@ -10,6 +10,7 @@ interface AgentCapabilitiesResult { fileSearchEnabled: boolean; webSearchEnabled: boolean; codeEnabled: boolean; + visionEnabled: boolean; } export default function useAgentCapabilities( @@ -55,6 +56,11 @@ export default function useAgentCapabilities( [capabilities], ); + const visionEnabled = useMemo( + () => capabilities?.includes(AgentCapabilities.vision) ?? false, + [capabilities], + ); + return { ocrEnabled, codeEnabled, @@ -64,5 +70,6 @@ export default function useAgentCapabilities( artifactsEnabled, webSearchEnabled, fileSearchEnabled, + visionEnabled, }; } diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index addf32e08adb..8d1a7d5bff39 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -40,6 +40,7 @@ "com_agents_description_placeholder": "Optional: Describe your Agent here", "com_agents_empty_state_heading": "No agents found", "com_agents_enable_file_search": "Enable File Search", + "com_agents_enable_image_vision": "Enable Image Vision", "com_agents_error_bad_request_message": "The request could not be processed.", "com_agents_error_bad_request_suggestion": "Please check your input and try again.", "com_agents_error_category_title": "Category Error", @@ -68,6 +69,7 @@ "com_agents_file_context_label": "File Context", "com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.", "com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.", + "com_agents_image_vision_info": "When enabled, images generated by MCP tools (e.g., image generation tools) will be sent back to the LLM. Disable this for non-vision models to prevent context overflow errors.", "com_agents_grid_announcement": "Showing {{count}} agents in {{category}} category", "com_agents_instructions_placeholder": "The system instructions that the agent uses", "com_agents_link_copied": "Link copied", diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 6a58d16fbd67..e9b9eb55a5e5 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -89,7 +89,6 @@ export async function createRun({ tokenCounter, customHandlers, indexTokenCountMap, - modelSpecs, streaming = true, streamUsage = true, }: { @@ -100,7 +99,6 @@ export async function createRun({ streamUsage?: boolean; requestBody?: t.RequestBody; user?: IUser; - modelSpecs?: TSpecsConfig; } & Pick): Promise< Run > { @@ -171,6 +169,7 @@ export async function createRun({ instructions: systemContent, maxContextTokens: agent.maxContextTokens, useLegacyContent: agent.useLegacyContent ?? false, + vision: agent.vision, }; agentInputs.push(agentInput); }; diff --git a/packages/api/src/agents/validation.ts b/packages/api/src/agents/validation.ts index 4798ffeb80ae..d64fc11fe4cc 100644 --- a/packages/api/src/agents/validation.ts +++ b/packages/api/src/agents/validation.ts @@ -66,6 +66,7 @@ export const agentBaseSchema = z.object({ hide_sequential_outputs: z.boolean().optional(), artifacts: z.string().optional(), recursion_limit: z.number().optional(), + vision: z.boolean().optional(), conversation_starters: z.array(z.string()).optional(), tool_resources: agentToolResourcesSchema, support_contact: agentSupportContactSchema, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index bd4a3624dd48..6709ff3f5368 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -186,6 +186,7 @@ export enum AgentCapabilities { tools = 'tools', chain = 'chain', ocr = 'ocr', + vision = 'vision', } export const defaultAssistantsVersion = { @@ -268,6 +269,7 @@ export const defaultAgentCapabilities = [ AgentCapabilities.tools, AgentCapabilities.chain, AgentCapabilities.ocr, + AgentCapabilities.vision, ]; export const agentsEndpointSchema = baseEndpointSchema diff --git a/packages/data-provider/src/types/assistants.ts b/packages/data-provider/src/types/assistants.ts index da773071e777..511543c35f19 100644 --- a/packages/data-provider/src/types/assistants.ts +++ b/packages/data-provider/src/types/assistants.ts @@ -236,6 +236,7 @@ export type Agent = { end_after_tools?: boolean; hide_sequential_outputs?: boolean; artifacts?: ArtifactModes; + vision?: boolean; recursion_limit?: number; isPublic?: boolean; version?: number; From a22a466e70d01aaf4710aaa84f3d3f41061d49f5 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Mon, 26 Jan 2026 16:10:38 +0100 Subject: [PATCH 14/23] feat: Enhance vision capabilities and artifact processing for agents - Implement automatic detection of vision capability based on model specifications. - Update agent configuration to auto-set vision based on model changes. - Introduce artifact processing for MCP tools, ensuring proper handling of image URLs and base64 data. - Refactor related components to utilize new vision validation logic and improve modularity. - Update UI elements to reflect changes in vision capability handling and provide clearer user guidance. --- api/models/Agent.js | 17 +- api/server/controllers/agents/callbacks.js | 3 + api/server/controllers/agents/client.js | 2 + api/server/services/AssistantService.js | 17 ++ api/server/services/MCP.js | 9 +- api/server/services/ToolService.js | 185 +++++++++++++++++- .../SidePanel/Agents/AgentConfig.tsx | 40 +++- .../SidePanel/Agents/AgentSelect.tsx | 28 ++- .../SidePanel/Agents/ImageVision.tsx | 5 +- client/src/hooks/index.ts | 2 +- client/src/hooks/useVisionModel.ts | 1 + client/src/locales/en/translation.json | 2 +- packages/api/src/agents/run.ts | 74 ++++--- packages/api/src/mcp/parsers.ts | 21 +- packages/api/src/utils/image-helpers.ts | 38 ++++ packages/api/src/utils/index.ts | 1 + packages/data-provider/src/config.ts | 12 +- 17 files changed, 408 insertions(+), 49 deletions(-) create mode 100644 packages/api/src/utils/image-helpers.ts diff --git a/api/models/Agent.js b/api/models/Agent.js index 11789ca63b05..29e436196e6b 100644 --- a/api/models/Agent.js +++ b/api/models/Agent.js @@ -476,9 +476,24 @@ const updateAgent = async (searchParameter, updateData, options = {}) => { versions: versionEntry, }; } + + // Merge directUpdates back into the agent document so the latest values are at the top level + // This ensures that when getAgent() is called, it returns the latest version's values + // Note: $push/$pull/$addToSet must come after direct updates in MongoDB + if (Object.keys(directUpdates).length > 0) { + // Preserve MongoDB operators while merging direct updates + const { $push: _preservedPush, $pull: _preservedPull, $addToSet: _preservedAddToSet, ...restUpdateData } = updateData; + updateData = { + ...directUpdates, + ...restUpdateData, + }; + if (_preservedPush) updateData.$push = _preservedPush; + if (_preservedPull) updateData.$pull = _preservedPull; + if (_preservedAddToSet) updateData.$addToSet = _preservedAddToSet; + } } - return Agent.findOneAndUpdate(searchParameter, updateData, mongoOptions).lean(); + return await Agent.findOneAndUpdate(searchParameter, updateData, mongoOptions).lean(); }; /** diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 0d2a7bc31760..0a4fc484c161 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -397,6 +397,7 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) if (output.artifact.content) { /** @type {FormattedContent[]} */ const content = output.artifact.content; + for (let i = 0; i < content.length; i++) { const part = content[i]; if (!part) { @@ -406,6 +407,7 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) continue; } const { url } = part.image_url; + artifactPromises.push( (async () => { const filename = `${output.name}_img_${nanoid()}`; @@ -417,6 +419,7 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) endpoint: metadata.provider, context: FileContext.image_generation, }); + const fileMetadata = Object.assign(file, { messageId: metadata.run_id, toolCallId: output.tool_call_id, diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 90e9640d5c4b..48126ed6ff12 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -1041,6 +1041,8 @@ class AgentClient extends BaseClient { requestBody: config.configurable.requestBody, user: createSafeUser(this.options.req?.user), tokenCounter: createTokenCounter(this.getEncoding()), + modelSpecs: appConfig.modelSpecs, + availableModels: appConfig.availableModels, }); if (!run) { diff --git a/api/server/services/AssistantService.js b/api/server/services/AssistantService.js index a7018f715b8c..7f03280704f3 100644 --- a/api/server/services/AssistantService.js +++ b/api/server/services/AssistantService.js @@ -437,6 +437,23 @@ async function runAssistant({ }); const tool_outputs = await processRequiredActions(openai, actions); + + // Add artifact content as user message to thread if artifacts were processed + if (openai.pendingArtifactContent?.length) { + const artifactMessage = { + role: 'user', + content: openai.pendingArtifactContent, + }; + if (openai.pendingArtifactFileIds?.length) { + artifactMessage.file_ids = openai.pendingArtifactFileIds; + } + await openai.beta.threads.messages.create(thread_id, artifactMessage); + + // Clear after use + delete openai.pendingArtifactContent; + delete openai.pendingArtifactFileIds; + } + const toolRun = await openai.beta.threads.runs.submitToolOutputs(run.id, { thread_id: run.thread_id, tool_outputs, diff --git a/api/server/services/MCP.js b/api/server/services/MCP.js index 81d7107de40c..b211f953b36f 100644 --- a/api/server/services/MCP.js +++ b/api/server/services/MCP.js @@ -453,6 +453,7 @@ function createToolInstance({ derivedSignal = config?.signal ? AbortSignal.any([config.signal]) : undefined; const mcpManager = getMCPManager(userId); const provider = (config?.metadata?.provider || _provider)?.toLowerCase(); + const endpoint = config?.metadata?.endpoint; const { args: _args, stepId, ...toolCall } = config.toolCall ?? {}; const flowId = `${serverName}:oauth_login:${config.metadata.thread_id}:${config.metadata.run_id}`; @@ -503,8 +504,12 @@ function createToolInstance({ oauthEnd, }); - if (isAssistantsEndpoint(provider) && Array.isArray(result)) { - return result[0]; + // For MCP tools, always return the full [content, artifact] array + // This allows both ToolService.js (Assistants) and ToolNode.ts (Agents) to process artifacts correctly + // MCP tools use responseFormat: CONTENT_AND_ARTIFACT, so we must return the tuple + if (Array.isArray(result) && result.length === 2) { + // This is a [content, artifact] tuple from formatToolContent + return result; } if (isGoogle && Array.isArray(result[0]) && result[0][0]?.type === ContentTypes.TEXT) { return [result[0][0].text, result[1]]; diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index 62d25b23eb70..f1a5e143922b 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -6,6 +6,7 @@ const { hasCustomUserVars, getUserMCPAuthMap, isActionDomainAllowed, + isBase64ImageUrl, } = require('@librechat/api'); const { Tools, @@ -21,7 +22,11 @@ const { validateActionDomain, defaultAgentCapabilities, validateAndParseOpenAPISpec, + validateVisionModel, } = require('librechat-data-provider'); +const { v4 } = require('uuid'); +const { saveBase64Image } = require('~/server/services/Files/process'); +const { FileContext } = require('librechat-data-provider'); const { createActionTool, decryptMetadata, @@ -36,6 +41,119 @@ const { recordUsage } = require('~/server/services/Threads'); const { loadTools } = require('~/app/clients/tools/util'); const { redactMessage } = require('~/config/parsers'); const { findPluginAuthsByKeys } = require('~/models'); + +/** + * Gets vision capability for the current model using validateVisionModel. + * @param {OpenAIClient} client - OpenAI or StreamRunManager Client. + * @returns {boolean} true if the model supports vision, false otherwise + */ +function getVisionCapability(client) { + const model = client.req.body.model ?? 'gpt-4o-mini'; + const modelSpecs = client.req.config?.modelSpecs; + const availableModels = client.req.config?.availableModels; + return validateVisionModel({ model, modelSpecs, availableModels }); +} + +/** + * Processes MCP tool artifacts for Assistants endpoint. + * + * ALWAYS saves base64 images to files (for LibreChat UI/attachments). + * ONLY includes images in contentParts if vision is enabled (prevents errors for non-vision LLMs). + * + * This allows image generation tools to work with non-vision LLMs: + * - Images are generated and saved (visible in UI) + * - Images are NOT sent back to the LLM (prevents context overflow errors) + * + * @param {Object} params - Processing parameters + * @param {Object} params.artifacts - Artifacts object with content array + * @param {boolean} params.isVisionModel - Whether the model supports vision + * @param {Object} params.req - Express request object + * @param {string} params.thread_id - Thread ID + * @param {string} params.conversationId - Conversation ID + * @returns {Promise<{fileIds: string[], contentParts: Array}>} Processed artifacts + */ +async function processArtifactsForAssistants({ + artifacts, + isVisionModel, + req, + thread_id, + conversationId, +}) { + if (!artifacts?.content) { + return { fileIds: [], contentParts: [] }; + } + + // Ensure content is an array and filter out null/undefined items + if (!Array.isArray(artifacts.content)) { + return { fileIds: [], contentParts: [] }; + } + + const fileIds = []; + const contentParts = []; + + for (const item of artifacts.content) { + // Skip null or undefined items to prevent "Cannot read properties of null" errors + if (!item || typeof item !== 'object') { + continue; + } + + if (item.type === 'image_url') { + // Validate image_url structure before processing + if (!item.image_url) { + logger.warn('[processArtifactsForAssistants] Skipping image_url item with missing image_url property'); + continue; + } + + const isBase64 = isBase64ImageUrl(item); + + // ALWAYS save base64 images to files (for LibreChat UI/attachments) + if (isBase64) { + const imageUrl = typeof item.image_url === 'string' + ? item.image_url + : item.image_url?.url; + + if (!imageUrl) { + logger.warn('[processArtifactsForAssistants] Skipping base64 image with missing URL'); + continue; + } + + try { + const file = await saveBase64Image(imageUrl, { + req, + filename: `artifact_img_${v4()}`, + endpoint: req.body.endpoint, + context: FileContext.image_generation, + }); + if (file?.file_id) { + fileIds.push(file.file_id); + + // ONLY add to contentParts if vision enabled (prevents errors for non-vision LLMs) + if (isVisionModel) { + contentParts.push({ + type: ContentTypes.IMAGE_FILE, + [ContentTypes.IMAGE_FILE]: { file_id: file.file_id }, + }); + } + // If vision disabled: file is saved but NOT added to contentParts + // This allows image generation with non-vision LLMs without errors + } + } catch (error) { + logger.error('[processArtifactsForAssistants] Error saving base64 image:', error); + // Continue processing other items even if one fails + } + } else { + // HTTP URLs are just text references - always include them + contentParts.push(item); + } + } else { + // Non-image content: always keep as-is + contentParts.push(item); + } + } + + return { fileIds, contentParts }; +} + /** * Processes the required actions by calling the appropriate tools and returning the outputs. * @param {OpenAIClient} client - OpenAI or StreamRunManager Client. @@ -139,14 +257,20 @@ async function processRequiredActions(client, requiredActions) { let tool = ToolMap[currentAction.tool] ?? ActionToolMap[currentAction.tool]; const handleToolOutput = async (output) => { + // For MCP tools, output is [content, artifact] array + // Store the full array in requiredActions[i].output for artifact processing + // For tool output to OpenAI, we'll extract just the content requiredActions[i].output = output; + // Extract content for tool call display (first element of array if array, otherwise output itself) + const outputContent = Array.isArray(output) && output.length >= 1 ? output[0] : output; + /** @type {FunctionToolCall & PartMetadata} */ const toolCall = { function: { name: currentAction.tool, arguments: JSON.stringify(currentAction.toolInput), - output, + output: outputContent, }, id: currentAction.toolCallId, type: 'function', @@ -157,7 +281,7 @@ async function processRequiredActions(client, requiredActions) { const toolCallIndex = client.mappedOrder.get(toolCall.id); if (imageGenTools.has(currentAction.tool)) { - const imageOutput = output; + const imageOutput = outputContent; toolCall.function.output = `${currentAction.tool} displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.`; // Streams the "Finished" state of the tool call in the UI @@ -202,9 +326,13 @@ async function processRequiredActions(client, requiredActions) { // result: tool.result, }); + // For MCP tools with artifacts, return the content string for OpenAI tool output + // The full array [content, artifact] is stored in requiredActions[i].output for artifact processing + const finalOutput = outputContent; + return { tool_call_id: currentAction.toolCallId, - output, + output: finalOutput, }; }; @@ -354,8 +482,57 @@ async function processRequiredActions(client, requiredActions) { } } + const tool_outputs = await Promise.all(promises); + + // Process artifacts from MCP tools and prepare for next user message + const allArtifacts = []; + for (let i = 0; i < requiredActions.length; i++) { + const action = requiredActions[i]; + // MCP tools return [content, artifact] format + // For OpenRouter (string format): [string, artifacts] + // For OpenAI-compatible (array format): [[contentArray], artifacts] + if ( + action.output && + Array.isArray(action.output) && + action.output.length === 2 && + action.output[1]?.content + ) { + allArtifacts.push({ + artifacts: action.output[1], + toolName: action.tool, + }); + } + } + + // Process artifacts if any exist + if (allArtifacts.length > 0) { + const isVisionModel = getVisionCapability(client); + const artifactFileIds = []; + const artifactContent = []; + + for (const { artifacts, toolName } of allArtifacts) { + const processed = await processArtifactsForAssistants({ + artifacts, + isVisionModel, + req: client.req, + thread_id: requiredActions[0].thread_id, + conversationId: + (client.responseMessage ?? client.finalMessage)?.conversationId, + }); + + artifactFileIds.push(...processed.fileIds); + artifactContent.push(...processed.contentParts); + } + + // Store processed artifacts on client for later use in runAssistant + if (artifactContent.length > 0) { + client.pendingArtifactContent = artifactContent; + client.pendingArtifactFileIds = artifactFileIds; + } + } + return { - tool_outputs: await Promise.all(promises), + tool_outputs, }; } diff --git a/client/src/components/SidePanel/Agents/AgentConfig.tsx b/client/src/components/SidePanel/Agents/AgentConfig.tsx index f78e132f2e0d..a40fd9bfe651 100644 --- a/client/src/components/SidePanel/Agents/AgentConfig.tsx +++ b/client/src/components/SidePanel/Agents/AgentConfig.tsx @@ -1,7 +1,13 @@ -import React, { useState, useMemo, useCallback } from 'react'; +import React, { useState, useMemo, useCallback, useEffect } from 'react'; import { useToastContext } from '@librechat/client'; import { Controller, useWatch, useFormContext } from 'react-hook-form'; -import { EModelEndpoint, getEndpointField, defaultAgentCapabilities } from 'librechat-data-provider'; +import { + EModelEndpoint, + getEndpointField, + defaultAgentCapabilities, + validateVisionModel, + AgentCapabilities, +} from 'librechat-data-provider'; import type { AgentForm, IconComponentTypes } from '~/common'; import { removeFocusOutlines, @@ -18,7 +24,7 @@ import AgentCategorySelector from './AgentCategorySelector'; import Action from '~/components/SidePanel/Builder/Action'; import { useLocalize, useVisibleTools } from '~/hooks'; import { Panel, isEphemeralAgent } from '~/common'; -import { useGetAgentFiles } from '~/data-provider'; +import { useGetAgentFiles, useGetStartupConfig } from '~/data-provider'; import { icons } from '~/hooks/Endpoint/Icons'; import Instructions from './Instructions'; import AgentAvatar from './AgentAvatar'; @@ -65,8 +71,12 @@ export default function AgentConfig() { const agent = useWatch({ control, name: 'agent' }); const tools = useWatch({ control, name: 'tools' }); const agent_id = useWatch({ control, name: 'id' }); + const vision = useWatch({ control, name: AgentCapabilities.vision }); + const modelParameters = useWatch({ control, name: 'model_parameters' }); const { data: agentFiles = [] } = useGetAgentFiles(agent_id); + const { data: startupConfig } = useGetStartupConfig(); + const { setValue, getValues } = methods; const mergedFileMap = useMemo(() => { const newFileMap = { ...fileMap }; @@ -89,6 +99,30 @@ export default function AgentConfig() { visionEnabled, } = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); + // Auto-update vision when model changes if vision was not explicitly set + useEffect(() => { + // Only update if vision is undefined (not explicitly set) + if (vision !== undefined) { + return; + } + + const agentModel = (modelParameters as { model?: string })?.model ?? model; + if (!agentModel) { + return; + } + + const autoVision = validateVisionModel({ + model: agentModel, + modelSpecs: startupConfig?.modelSpecs, + availableModels: startupConfig?.availableModels, + }); + + // Only update if the calculated value differs from current form value + if (getValues(AgentCapabilities.vision) !== autoVision) { + setValue(AgentCapabilities.vision, autoVision, { shouldDirty: false }); + } + }, [model, modelParameters, vision, startupConfig, setValue, getValues]); + const context_files = useMemo(() => { if (typeof agent === 'string') { return []; diff --git a/client/src/components/SidePanel/Agents/AgentSelect.tsx b/client/src/components/SidePanel/Agents/AgentSelect.tsx index 345f6356c1ac..a626fdad41b3 100644 --- a/client/src/components/SidePanel/Agents/AgentSelect.tsx +++ b/client/src/components/SidePanel/Agents/AgentSelect.tsx @@ -2,13 +2,17 @@ import { EarthIcon } from 'lucide-react'; import { ControlCombobox } from '@librechat/client'; import { useCallback, useEffect, useRef } from 'react'; import { useFormContext, Controller } from 'react-hook-form'; -import { AgentCapabilities, defaultAgentFormValues } from 'librechat-data-provider'; +import { + AgentCapabilities, + defaultAgentFormValues, + validateVisionModel, +} from 'librechat-data-provider'; import type { UseMutationResult, QueryObserverResult } from '@tanstack/react-query'; import type { Agent, AgentCreateParams } from 'librechat-data-provider'; import type { TAgentCapabilities, AgentForm } from '~/common'; import { cn, createProviderOption, processAgentOption, getDefaultAgentFormValues } from '~/utils'; import { useLocalize, useAgentDefaultPermissionLevel } from '~/hooks'; -import { useListAgentsQuery } from '~/data-provider'; +import { useListAgentsQuery, useGetStartupConfig } from '~/data-provider'; const keys = new Set(Object.keys(defaultAgentFormValues)); @@ -27,6 +31,7 @@ export default function AgentSelect({ const lastSelectedAgent = useRef(null); const { control, reset } = useFormContext(); const permissionLevel = useAgentDefaultPermissionLevel(); + const { data: startupConfig } = useGetStartupConfig(); const { data: agents = null } = useListAgentsQuery( { requiredPermission: permissionLevel }, @@ -54,11 +59,28 @@ export default function AgentSelect({ icon: isGlobal ? : null, }; + // Get vision from top-level agent or from latest version if not present + // If not explicitly set, automatically determine from model + const explicitVision = + fullAgent.vision ?? fullAgent.versions?.[fullAgent.versions.length - 1]?.vision; + const agentModel = + (fullAgent.model_parameters as { model?: string })?.model ?? fullAgent.model; + const agentVision = + explicitVision !== undefined + ? explicitVision + : agentModel + ? validateVisionModel({ + model: agentModel, + modelSpecs: startupConfig?.modelSpecs, + availableModels: startupConfig?.availableModels, + }) + : false; + const capabilities: TAgentCapabilities = { [AgentCapabilities.web_search]: false, [AgentCapabilities.file_search]: false, [AgentCapabilities.execute_code]: false, - [AgentCapabilities.vision]: false, + [AgentCapabilities.vision]: agentVision, [AgentCapabilities.end_after_tools]: false, [AgentCapabilities.hide_sequential_outputs]: false, }; diff --git a/client/src/components/SidePanel/Agents/ImageVision.tsx b/client/src/components/SidePanel/Agents/ImageVision.tsx index 643f144ec89b..9a629b068e87 100644 --- a/client/src/components/SidePanel/Agents/ImageVision.tsx +++ b/client/src/components/SidePanel/Agents/ImageVision.tsx @@ -1,4 +1,3 @@ -import { memo } from 'react'; import { AgentCapabilities } from 'librechat-data-provider'; import { useFormContext, Controller } from 'react-hook-form'; import { @@ -13,7 +12,7 @@ import type { AgentForm } from '~/common'; import { useLocalize } from '~/hooks'; import { ESide } from '~/common'; -function ImageVision() { +export default function ImageVision() { const localize = useLocalize(); const methods = useFormContext(); const { control } = methods; @@ -65,5 +64,3 @@ function ImageVision() { ); } - -export default memo(ImageVision); diff --git a/client/src/hooks/index.ts b/client/src/hooks/index.ts index 6e1431421817..c1e6a1836b96 100644 --- a/client/src/hooks/index.ts +++ b/client/src/hooks/index.ts @@ -36,4 +36,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech'; export { default as useGenerationsByLatest } from './useGenerationsByLatest'; export { default as useLocalizedConfig } from './useLocalizedConfig'; export { default as useResourcePermissions } from './useResourcePermissions'; -export { useVisionModel } from './useVisionModel'; +export { useVisionModel } from './useVisionModel'; \ No newline at end of file diff --git a/client/src/hooks/useVisionModel.ts b/client/src/hooks/useVisionModel.ts index 61c8d74b91d0..3f911ad686f0 100644 --- a/client/src/hooks/useVisionModel.ts +++ b/client/src/hooks/useVisionModel.ts @@ -16,6 +16,7 @@ export function useVisionModel(): boolean { if (!model) { return false; } + return validateVisionModel({ model, modelSpecs: startupConfig?.modelSpecs, diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 8d1a7d5bff39..9b5dbd541936 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -69,7 +69,7 @@ "com_agents_file_context_label": "File Context", "com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.", "com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.", - "com_agents_image_vision_info": "When enabled, images generated by MCP tools (e.g., image generation tools) will be sent back to the LLM. Disable this for non-vision models to prevent context overflow errors.", + "com_agents_image_vision_info": "Vision capability is automatically determined from the agent's model. Enable this checkbox to manually override and force vision capability on, or disable to force it off. When enabled, images generated by MCP tools will be sent back to the LLM. For non-vision models, disable to prevent context overflow errors.", "com_agents_grid_announcement": "Showing {{count}} agents in {{category}} category", "com_agents_instructions_placeholder": "The system instructions that the agent uses", "com_agents_link_copied": "Link copied", diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index e9b9eb55a5e5..428fcde64327 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -1,5 +1,10 @@ import { Run, Providers } from '@librechat/agents'; -import { providerEndpointMap, KnownEndpoints } from 'librechat-data-provider'; +import { + providerEndpointMap, + KnownEndpoints, + type TSpecsConfig, + validateVisionModel, +} from 'librechat-data-provider'; import type { MultiAgentGraphConfig, OpenAIClientOptions, @@ -21,22 +26,6 @@ const customProviders = new Set([ KnownEndpoints.ollama, ]); -/** - * Check if the endpoint is a custom OpenAI-compatible endpoint. - * Custom endpoints are identified when provider is OPENAI but endpoint name differs. - */ -function isCustomOpenAIEndpoint( - provider?: string, - endpoint?: string | null, -): boolean { - return ( - provider === Providers.OPENAI && - endpoint != null && - endpoint !== provider && - endpoint !== Providers.OPENAI - ); -} - export function getReasoningKey( provider: Providers, llmConfig: t.RunLLMConfig, @@ -61,6 +50,38 @@ export function getReasoningKey( return reasoningKey; } +/** + * Determines vision capability for an agent. + * Uses explicit override if set, otherwise auto-detects from model. + * + * @param agent - The agent to check + * @param modelSpecs - Optional modelSpecs configuration + * @param availableModels - Optional list of available models + * @returns true if the agent supports vision, false otherwise + */ +function determineVisionCapability( + agent: RunAgent, + modelSpecs?: TSpecsConfig, + availableModels?: string[] +): boolean { + // Explicit override takes precedence + if (agent.vision !== undefined) { + return agent.vision; + } + + // Auto-detect from model + const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; + if (!agentModel) { + return false; + } + + return validateVisionModel({ + model: agentModel, + modelSpecs, + availableModels, + }); +} + type RunAgent = Omit & { tools?: GenericTool[]; maxContextTokens?: number; @@ -89,6 +110,8 @@ export async function createRun({ tokenCounter, customHandlers, indexTokenCountMap, + modelSpecs, + availableModels, streaming = true, streamUsage = true, }: { @@ -99,6 +122,8 @@ export async function createRun({ streamUsage?: boolean; requestBody?: t.RequestBody; user?: IUser; + modelSpecs?: TSpecsConfig; + availableModels?: string[]; } & Pick): Promise< Run > { @@ -145,20 +170,20 @@ export async function createRun({ } /** Resolves issues with new OpenAI usage field */ - const isCustomEndpoint = isCustomOpenAIEndpoint( - agent.provider, - agent.endpoint, - ); - if ( customProviders.has(agent.provider) || - isCustomEndpoint + (agent.provider === Providers.OPENAI && + agent.endpoint != null && + agent.endpoint !== agent.provider && + agent.endpoint !== Providers.OPENAI) ) { llmConfig.streamUsage = false; llmConfig.usage = true; } const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint); + const visionCapability = determineVisionCapability(agent, modelSpecs, availableModels); + const agentInput: AgentInputs = { provider, reasoningKey, @@ -169,8 +194,9 @@ export async function createRun({ instructions: systemContent, maxContextTokens: agent.maxContextTokens, useLegacyContent: agent.useLegacyContent ?? false, - vision: agent.vision, + vision: visionCapability, }; + agentInputs.push(agentInput); }; diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index 17ac406d02a4..213f289a2f7e 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -156,6 +156,19 @@ function parseAsString(result: t.MCPToolCallResponse): string { * @param provider - The provider name (google, anthropic, openai) * @returns Tuple of content and image_urls */ +/** + * Formats MCP tool call response content for different provider types. + * + * Handles provider-specific formatting: + * - OpenAI-compatible providers: Uses content array format with artifacts + * - Non-OpenAI providers: Uses string format + * + * Automatically detects custom OpenAI-compatible endpoints (not in NON_OPENAI_PROVIDERS). + * + * @param result - MCP tool call response with content array + * @param provider - Provider identifier (e.g., 'openai', 'scaleway', 'anthropic') + * @returns Tuple of [formattedContent, artifacts] where artifacts contain image URLs + */ export function formatToolContent( result: t.MCPToolCallResponse, provider: t.Provider, @@ -278,9 +291,7 @@ UI Resource Markers Available: }; } - if (usesContentArrayFormat(provider)) { - return [formattedContent, artifacts]; - } - - return [currentTextBlock, artifacts]; + return usesContentArrayFormat(provider) + ? [formattedContent, artifacts] + : [currentTextBlock, artifacts]; } diff --git a/packages/api/src/utils/image-helpers.ts b/packages/api/src/utils/image-helpers.ts new file mode 100644 index 000000000000..48602c248e3f --- /dev/null +++ b/packages/api/src/utils/image-helpers.ts @@ -0,0 +1,38 @@ +import type { Agents } from 'librechat-data-provider'; + +/** + * Checks if an image_url content item contains base64 data (not an HTTP URL). + * + * IMPORTANT: This function is duplicated in two locations: + * 1. librechat/packages/api/src/utils/image-helpers.ts (for Assistants endpoint) + * 2. agents/src/messages/core.ts (for Agents endpoint) + * + * Both implementations MUST remain identical. The agents package cannot import + * from @librechat/api as it's a separate npm package. + * + * Base64 data URLs start with "data:" and can cause context overflow when + * sent to non-vision models. HTTP URLs are just text references and don't + * need filtering. + * + * @param item - Message content item to check + * @returns true if the item is an image_url with base64 data, false otherwise + */ +export function isBase64ImageUrl(item: Agents.MessageContentComplex): boolean { + if (item.type !== 'image_url') { + return false; + } + + const itemWithImageUrl = item as { image_url?: string | { url?: string } }; + const imageUrl = itemWithImageUrl.image_url; + + if (typeof imageUrl === 'string') { + return imageUrl.startsWith('data:'); + } + + if (imageUrl && typeof imageUrl === 'object' && 'url' in imageUrl) { + const url = imageUrl.url; + return typeof url === 'string' && url.startsWith('data:'); + } + + return false; +} diff --git a/packages/api/src/utils/index.ts b/packages/api/src/utils/index.ts index 947e566ce090..567e866d1669 100644 --- a/packages/api/src/utils/index.ts +++ b/packages/api/src/utils/index.ts @@ -23,3 +23,4 @@ export * from './http'; export * from './tokens'; export * from './url'; export * from './message'; +export * from './image-helpers'; \ No newline at end of file diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 6709ff3f5368..fd80628e14d8 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1287,7 +1287,16 @@ export enum VisionModes { /** * Validates whether a model supports vision capabilities. - * Checks modelSpecs configuration first, then falls back to hardcoded list. + * + * Checks in order: + * 1. modelSpecs configuration (if provided) - explicit vision flag per model + * 2. Hardcoded visionModels list - fallback for known vision-capable models + * + * @param model - Model identifier to check + * @param modelSpecs - Optional modelSpecs configuration from librechat.yaml + * @param availableModels - Optional list of available models to validate against + * @param additionalModels - Optional additional models to include in vision check + * @returns true if the model supports vision, false otherwise */ export function validateVisionModel({ model, @@ -1319,6 +1328,7 @@ export function validateVisionModel({ const matchingSpec = modelSpecs.list.find( (spec) => spec.preset?.model === model || model.includes(spec.preset?.model ?? ''), ); + if (matchingSpec?.vision !== undefined) { return matchingSpec.vision === true; } From 986f6e6ce500eed5f3391e8ebcc3d60f4ff9edb1 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Mon, 26 Jan 2026 16:19:46 +0100 Subject: [PATCH 15/23] refactor: Clean up whitespace in createToolEndCallback function - Removed unnecessary blank lines in the createToolEndCallback function to improve code readability and maintainability. --- api/server/controllers/agents/callbacks.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js index 0a4fc484c161..0d2a7bc31760 100644 --- a/api/server/controllers/agents/callbacks.js +++ b/api/server/controllers/agents/callbacks.js @@ -397,7 +397,6 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) if (output.artifact.content) { /** @type {FormattedContent[]} */ const content = output.artifact.content; - for (let i = 0; i < content.length; i++) { const part = content[i]; if (!part) { @@ -407,7 +406,6 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) continue; } const { url } = part.image_url; - artifactPromises.push( (async () => { const filename = `${output.name}_img_${nanoid()}`; @@ -419,7 +417,6 @@ function createToolEndCallback({ req, res, artifactPromises, streamId = null }) endpoint: metadata.provider, context: FileContext.image_generation, }); - const fileMetadata = Object.assign(file, { messageId: metadata.run_id, toolCallId: output.tool_call_id, From 72149a2c406b89b5581cba07d3ee0d91ded27895 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Mon, 26 Jan 2026 16:54:12 +0100 Subject: [PATCH 16/23] refactor: Clean up MCP tool result handling and add vision debug logging - Remove redundant result processing in MCP.js - formatToolContent already returns correct tuple format - Add debug logging in run.ts to diagnose vision capability detection issues - Improve code clarity by removing workaround code --- api/server/services/MCP.js | 17 ++++------------- packages/api/src/agents/run.ts | 9 +++++++-- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/api/server/services/MCP.js b/api/server/services/MCP.js index b211f953b36f..7644e3ec2d20 100644 --- a/api/server/services/MCP.js +++ b/api/server/services/MCP.js @@ -483,7 +483,10 @@ function createToolInstance({ const customUserVars = config?.configurable?.userMCPAuthMap?.[`${Constants.mcp_prefix}${serverName}`]; - const result = await mcpManager.callTool({ + // mcpManager.callTool returns FormattedContentResult: [content, artifacts] + // This tuple format is already handled by formatToolContent in @librechat/api + // and is compatible with responseFormat: CONTENT_AND_ARTIFACT + return await mcpManager.callTool({ serverName, toolName, provider, @@ -503,18 +506,6 @@ function createToolInstance({ oauthStart, oauthEnd, }); - - // For MCP tools, always return the full [content, artifact] array - // This allows both ToolService.js (Assistants) and ToolNode.ts (Agents) to process artifacts correctly - // MCP tools use responseFormat: CONTENT_AND_ARTIFACT, so we must return the tuple - if (Array.isArray(result) && result.length === 2) { - // This is a [content, artifact] tuple from formatToolContent - return result; - } - if (isGoogle && Array.isArray(result[0]) && result[0][0]?.type === ContentTypes.TEXT) { - return [result[0][0].text, result[1]]; - } - return result; } catch (error) { logger.error( `[MCP][${serverName}][${toolName}][User: ${userId}] Error calling MCP tool:`, diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 428fcde64327..f799e5df54c9 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -66,20 +66,25 @@ function determineVisionCapability( ): boolean { // Explicit override takes precedence if (agent.vision !== undefined) { + console.log(`[VISION] Agent ${agent.id}: Using explicit vision override: ${agent.vision}`); return agent.vision; } // Auto-detect from model const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; if (!agentModel) { + console.log(`[VISION] Agent ${agent.id}: No model found, returning false`); return false; } - return validateVisionModel({ + const result = validateVisionModel({ model: agentModel, modelSpecs, - availableModels, + // Don't pass availableModels - it incorrectly filters out valid models }); + + console.log(`[VISION] Agent ${agent.id}: Model "${agentModel}" -> vision=${result}`); + return result; } type RunAgent = Omit & { From a3a845fab054cd42ce7a88d270f9c7fead0c58d1 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Mon, 26 Jan 2026 21:33:06 +0100 Subject: [PATCH 17/23] refactor: Simplify MCP tool result handling and remove vision debug logs - Change MCP.js to directly return the result from mcpManager.callTool, enhancing clarity. - Remove console logs in run.ts related to vision capability detection to streamline the code. --- api/server/services/MCP.js | 3 ++- packages/api/src/agents/run.ts | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/api/server/services/MCP.js b/api/server/services/MCP.js index 7644e3ec2d20..74b5e9444999 100644 --- a/api/server/services/MCP.js +++ b/api/server/services/MCP.js @@ -486,7 +486,7 @@ function createToolInstance({ // mcpManager.callTool returns FormattedContentResult: [content, artifacts] // This tuple format is already handled by formatToolContent in @librechat/api // and is compatible with responseFormat: CONTENT_AND_ARTIFACT - return await mcpManager.callTool({ + const result = await mcpManager.callTool({ serverName, toolName, provider, @@ -506,6 +506,7 @@ function createToolInstance({ oauthStart, oauthEnd, }); + return result; } catch (error) { logger.error( `[MCP][${serverName}][${toolName}][User: ${userId}] Error calling MCP tool:`, diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index f799e5df54c9..200248e7805e 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -66,14 +66,12 @@ function determineVisionCapability( ): boolean { // Explicit override takes precedence if (agent.vision !== undefined) { - console.log(`[VISION] Agent ${agent.id}: Using explicit vision override: ${agent.vision}`); return agent.vision; } // Auto-detect from model const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; if (!agentModel) { - console.log(`[VISION] Agent ${agent.id}: No model found, returning false`); return false; } @@ -83,7 +81,6 @@ function determineVisionCapability( // Don't pass availableModels - it incorrectly filters out valid models }); - console.log(`[VISION] Agent ${agent.id}: Model "${agentModel}" -> vision=${result}`); return result; } From 54282d15fb6de83d4b4fd5f029c0d808b0399c17 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Tue, 27 Jan 2026 06:28:51 +0100 Subject: [PATCH 18/23] refactor: Enhance vision capability handling and artifact processing - Update AgentClient to conditionally handle image URLs and attachments based on vision capability. - Modify AssistantService to check for image file types before processing artifact messages. - Refactor ToolService to improve vision capability validation and ensure proper handling of artifacts for non-vision models. - Clarify documentation regarding vision capabilities and processing behavior for better understanding. --- api/server/controllers/agents/client.js | 12 ++++++++ api/server/services/AssistantService.js | 5 ++- api/server/services/ToolService.js | 41 ++++++++++++------------- packages/api/src/agents/run.ts | 22 ++++++------- packages/data-provider/src/config.ts | 14 +++------ 5 files changed, 51 insertions(+), 43 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 48126ed6ff12..f33ad1b8aacb 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -344,6 +344,10 @@ class AgentClient extends BaseClient { * @returns {Promise>>} */ async addImageURLs(message, attachments) { + if (!(this.options.agent?.vision ?? false)) { + return attachments; + } + const { files, image_urls } = await encodeAndFormat( this.options.req, attachments, @@ -421,6 +425,14 @@ class AgentClient extends BaseClient { ); } + if (!(this.options.agent?.vision ?? false)) { + orderedMessages.forEach((msg) => { + if (msg.image_urls) { + delete msg.image_urls; + } + }); + } + const formattedMessages = orderedMessages.map((message, i) => { const formattedMessage = formatMessage({ message, diff --git a/api/server/services/AssistantService.js b/api/server/services/AssistantService.js index 7f03280704f3..6eff943e8498 100644 --- a/api/server/services/AssistantService.js +++ b/api/server/services/AssistantService.js @@ -444,7 +444,10 @@ async function runAssistant({ role: 'user', content: openai.pendingArtifactContent, }; - if (openai.pendingArtifactFileIds?.length) { + if ( + openai.pendingArtifactFileIds?.length && + openai.pendingArtifactContent.some((item) => item?.type === ContentTypes.IMAGE_FILE) + ) { artifactMessage.file_ids = openai.pendingArtifactFileIds; } await openai.beta.threads.messages.create(thread_id, artifactMessage); diff --git a/api/server/services/ToolService.js b/api/server/services/ToolService.js index f1a5e143922b..cc6ec6ebcbcf 100644 --- a/api/server/services/ToolService.js +++ b/api/server/services/ToolService.js @@ -43,29 +43,28 @@ const { redactMessage } = require('~/config/parsers'); const { findPluginAuthsByKeys } = require('~/models'); /** - * Gets vision capability for the current model using validateVisionModel. + * Determines if the current model supports vision capabilities. * @param {OpenAIClient} client - OpenAI or StreamRunManager Client. * @returns {boolean} true if the model supports vision, false otherwise */ function getVisionCapability(client) { const model = client.req.body.model ?? 'gpt-4o-mini'; const modelSpecs = client.req.config?.modelSpecs; - const availableModels = client.req.config?.availableModels; - return validateVisionModel({ model, modelSpecs, availableModels }); + return validateVisionModel({ model, modelSpecs }); } /** * Processes MCP tool artifacts for Assistants endpoint. * - * ALWAYS saves base64 images to files (for LibreChat UI/attachments). - * ONLY includes images in contentParts if vision is enabled (prevents errors for non-vision LLMs). + * Behavior: + * - Base64 images: Always saved to files (visible in UI), included in contentParts only if vision enabled + * - HTTP image URLs: Included in contentParts only if vision enabled + * - Non-image content: Always included * - * This allows image generation tools to work with non-vision LLMs: - * - Images are generated and saved (visible in UI) - * - Images are NOT sent back to the LLM (prevents context overflow errors) + * This allows image generation tools to work with non-vision models without API errors. * * @param {Object} params - Processing parameters - * @param {Object} params.artifacts - Artifacts object with content array + * @param {Object} params.artifacts - Artifacts from MCP tool * @param {boolean} params.isVisionModel - Whether the model supports vision * @param {Object} params.req - Express request object * @param {string} params.thread_id - Thread ID @@ -88,6 +87,15 @@ async function processArtifactsForAssistants({ return { fileIds: [], contentParts: [] }; } + if (typeof isVisionModel !== 'boolean') { + logger.warn('[processArtifactsForAssistants] Invalid isVisionModel value, defaulting to false', { + isVisionModel, + model: req.body.model, + endpoint: req.body.endpoint, + }); + isVisionModel = false; + } + const fileIds = []; const contentParts = []; @@ -106,7 +114,6 @@ async function processArtifactsForAssistants({ const isBase64 = isBase64ImageUrl(item); - // ALWAYS save base64 images to files (for LibreChat UI/attachments) if (isBase64) { const imageUrl = typeof item.image_url === 'string' ? item.image_url @@ -126,27 +133,20 @@ async function processArtifactsForAssistants({ }); if (file?.file_id) { fileIds.push(file.file_id); - - // ONLY add to contentParts if vision enabled (prevents errors for non-vision LLMs) if (isVisionModel) { contentParts.push({ type: ContentTypes.IMAGE_FILE, [ContentTypes.IMAGE_FILE]: { file_id: file.file_id }, }); } - // If vision disabled: file is saved but NOT added to contentParts - // This allows image generation with non-vision LLMs without errors } } catch (error) { logger.error('[processArtifactsForAssistants] Error saving base64 image:', error); - // Continue processing other items even if one fails } - } else { - // HTTP URLs are just text references - always include them + } else if (isVisionModel) { contentParts.push(item); } } else { - // Non-image content: always keep as-is contentParts.push(item); } } @@ -504,7 +504,6 @@ async function processRequiredActions(client, requiredActions) { } } - // Process artifacts if any exist if (allArtifacts.length > 0) { const isVisionModel = getVisionCapability(client); const artifactFileIds = []; @@ -524,8 +523,7 @@ async function processRequiredActions(client, requiredActions) { artifactContent.push(...processed.contentParts); } - // Store processed artifacts on client for later use in runAssistant - if (artifactContent.length > 0) { + if (artifactContent.length > 0 || artifactFileIds.length > 0) { client.pendingArtifactContent = artifactContent; client.pendingArtifactFileIds = artifactFileIds; } @@ -838,4 +836,5 @@ module.exports = { getToolkitKey, loadAgentTools, processRequiredActions, + getVisionCapability, }; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 200248e7805e..329291f0695e 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -52,11 +52,16 @@ export function getReasoningKey( /** * Determines vision capability for an agent. - * Uses explicit override if set, otherwise auto-detects from model. - * - * @param agent - The agent to check - * @param modelSpecs - Optional modelSpecs configuration - * @param availableModels - Optional list of available models + * + * Priority: + * 1. Explicit override (`agent.vision`) takes precedence + * 2. Auto-detection from model using `validateVisionModel()` + * + * Model is resolved from `agent.model_parameters?.model` or `agent.model`. + * + * @param agent - The agent to check for vision capability + * @param modelSpecs - Optional modelSpecs configuration from librechat.yaml + * @param availableModels - Not used (kept for backwards compatibility) * @returns true if the agent supports vision, false otherwise */ function determineVisionCapability( @@ -64,24 +69,19 @@ function determineVisionCapability( modelSpecs?: TSpecsConfig, availableModels?: string[] ): boolean { - // Explicit override takes precedence if (agent.vision !== undefined) { return agent.vision; } - // Auto-detect from model const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; if (!agentModel) { return false; } - const result = validateVisionModel({ + return validateVisionModel({ model: agentModel, modelSpecs, - // Don't pass availableModels - it incorrectly filters out valid models }); - - return result; } type RunAgent = Omit & { diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index fd80628e14d8..6df737e12f48 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1289,12 +1289,13 @@ export enum VisionModes { * Validates whether a model supports vision capabilities. * * Checks in order: - * 1. modelSpecs configuration (if provided) - explicit vision flag per model - * 2. Hardcoded visionModels list - fallback for known vision-capable models + * 1. Exclude known non-vision models + * 2. modelSpecs configuration (highest priority if provided) + * 3. Hardcoded visionModels list * * @param model - Model identifier to check * @param modelSpecs - Optional modelSpecs configuration from librechat.yaml - * @param availableModels - Optional list of available models to validate against + * @param availableModels - Not used (kept for backwards compatibility) * @param additionalModels - Optional additional models to include in vision check * @returns true if the model supports vision, false otherwise */ @@ -1313,17 +1314,10 @@ export function validateVisionModel({ return false; } - // Exclude known non-vision models if (model.includes('gpt-4-turbo-preview') || model.includes('o1-mini')) { return false; } - // Check if model is in available models list - if (availableModels && !availableModels.includes(model)) { - return false; - } - - // Check modelSpecs first if provided if (modelSpecs?.list) { const matchingSpec = modelSpecs.list.find( (spec) => spec.preset?.model === model || model.includes(spec.preset?.model ?? ''), From ba611c363554157a419eefdd70f3450f6190724d Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Tue, 27 Jan 2026 07:50:45 +0100 Subject: [PATCH 19/23] refactor: Improve handling of vision capability and artifact message processing - Simplify image URL handling in AgentClient by removing unnecessary checks when vision is disabled. - Enhance AssistantService to use a boolean flag for determining if file IDs should be attached to artifact messages. - Add validation for max_tokens in createRun to ensure it is always set to a valid value, preventing potential errors from invalid configurations. --- api/server/controllers/agents/client.js | 9 ++------- api/server/services/AssistantService.js | 9 +++++---- packages/api/src/agents/run.ts | 20 ++++++++++++++++++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index f33ad1b8aacb..d4b8a7fde031 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -425,13 +425,8 @@ class AgentClient extends BaseClient { ); } - if (!(this.options.agent?.vision ?? false)) { - orderedMessages.forEach((msg) => { - if (msg.image_urls) { - delete msg.image_urls; - } - }); - } + // Image content in messages is filtered by the LLM layer (_convertMessagesToOpenAIParams) + // when agent.vision is false; no need to strip image_urls here. const formattedMessages = orderedMessages.map((message, i) => { const formattedMessage = formatMessage({ diff --git a/api/server/services/AssistantService.js b/api/server/services/AssistantService.js index 6eff943e8498..0ef91d3a997c 100644 --- a/api/server/services/AssistantService.js +++ b/api/server/services/AssistantService.js @@ -440,14 +440,15 @@ async function runAssistant({ // Add artifact content as user message to thread if artifacts were processed if (openai.pendingArtifactContent?.length) { + const willAttachFileIds = + openai.pendingArtifactFileIds?.length && + openai.pendingArtifactContent.some((item) => item?.type === ContentTypes.IMAGE_FILE); + const artifactMessage = { role: 'user', content: openai.pendingArtifactContent, }; - if ( - openai.pendingArtifactFileIds?.length && - openai.pendingArtifactContent.some((item) => item?.type === ContentTypes.IMAGE_FILE) - ) { + if (willAttachFileIds) { artifactMessage.file_ids = openai.pendingArtifactFileIds; } await openai.beta.threads.messages.create(thread_id, artifactMessage); diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index 329291f0695e..ad15b5181d84 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -72,12 +72,12 @@ function determineVisionCapability( if (agent.vision !== undefined) { return agent.vision; } - + const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; if (!agentModel) { return false; } - + return validateVisionModel({ model: agentModel, modelSpecs, @@ -183,6 +183,22 @@ export async function createRun({ llmConfig.usage = true; } + /** + * Ensure max_tokens/maxTokens is at least 1 for provider APIs. + * Avoids invalid values from missing/wrong model metadata or stored config + * (e.g. custom/Scaleway with undefined context leading to negative max_tokens). + */ + const llmConfigRecord = llmConfig as Record; + const rawMaxTokens = llmConfig.maxTokens ?? llmConfigRecord.max_tokens; + const sanitizedMaxTokens = + typeof rawMaxTokens === 'number' && + !Number.isNaN(rawMaxTokens) && + rawMaxTokens >= 1 + ? rawMaxTokens + : 4096; + llmConfig.maxTokens = sanitizedMaxTokens; + delete llmConfigRecord.max_tokens; + const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint); const visionCapability = determineVisionCapability(agent, modelSpecs, availableModels); From f34fc80e014751b3b6e37831821e107d1de8dce7 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Tue, 27 Jan 2026 09:04:55 +0100 Subject: [PATCH 20/23] refactor: Enhance vision handling and specification processing in agent models - Update loadEphemeralAgent and loadAddedAgent functions to prioritize model specifications for vision and spec attributes. - Modify determineVisionCapability to incorporate spec-based vision detection, improving clarity and functionality. - Refactor createRun to ensure valid max_tokens handling, enhancing robustness against invalid configurations. --- api/models/Agent.js | 10 +++++++++ api/models/loadAddedAgent.js | 16 +++++++++++++- packages/api/src/agents/run.ts | 40 +++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/api/models/Agent.js b/api/models/Agent.js index 29e436196e6b..7e6d3080ca99 100644 --- a/api/models/Agent.js +++ b/api/models/Agent.js @@ -178,6 +178,16 @@ const loadEphemeralAgent = async ({ req, spec, endpoint, model_parameters: _m }) if (ephemeralAgent?.artifacts != null && ephemeralAgent.artifacts) { result.artifacts = ephemeralAgent.artifacts; } + + // Manual spec wins: when the user chose a model spec with vision set, use it so + // vision is determined by the spec rather than the hardcoded list. + if (modelSpec?.vision !== undefined) { + result.vision = modelSpec.vision; + } + if (spec != null && spec !== '') { + result.spec = spec; + } + return result; }; diff --git a/api/models/loadAddedAgent.js b/api/models/loadAddedAgent.js index aa83375eae4d..54054794e44f 100644 --- a/api/models/loadAddedAgent.js +++ b/api/models/loadAddedAgent.js @@ -101,7 +101,7 @@ const loadAddedAgent = async ({ req, conversation, primaryAgent }) => { const ephemeralId = encodeEphemeralAgentId({ endpoint, model, sender, index: 1 }); - return { + const added = { id: ephemeralId, instructions: promptPrefix || '', provider: endpoint, @@ -109,6 +109,13 @@ const loadAddedAgent = async ({ req, conversation, primaryAgent }) => { model, tools: [...primaryAgent.tools], }; + if (modelSpec?.vision !== undefined) { + added.vision = modelSpec.vision; + } + if (spec != null && spec !== '') { + added.spec = spec; + } + return added; } // Extract ephemeral agent options from conversation if present @@ -208,6 +215,13 @@ const loadAddedAgent = async ({ req, conversation, primaryAgent }) => { result.artifacts = ephemeralAgent.artifacts; } + if (modelSpec?.vision !== undefined) { + result.vision = modelSpec.vision; + } + if (spec != null && spec !== '') { + result.spec = spec; + } + return result; }; diff --git a/packages/api/src/agents/run.ts b/packages/api/src/agents/run.ts index ad15b5181d84..08f9585d3286 100644 --- a/packages/api/src/agents/run.ts +++ b/packages/api/src/agents/run.ts @@ -52,13 +52,14 @@ export function getReasoningKey( /** * Determines vision capability for an agent. - * - * Priority: + * + * Priority (manual specification wins over hardcoded list): * 1. Explicit override (`agent.vision`) takes precedence - * 2. Auto-detection from model using `validateVisionModel()` - * + * 2. Spec-based: when agent has a `spec` and modelSpecs has that spec with vision set, use it + * 3. Auto-detection from model using `validateVisionModel()` (modelSpecs then hardcoded list) + * * Model is resolved from `agent.model_parameters?.model` or `agent.model`. - * + * * @param agent - The agent to check for vision capability * @param modelSpecs - Optional modelSpecs configuration from librechat.yaml * @param availableModels - Not used (kept for backwards compatibility) @@ -73,6 +74,14 @@ function determineVisionCapability( return agent.vision; } + const agentSpec = (agent as { spec?: string }).spec; + if (agentSpec != null && agentSpec !== '' && modelSpecs?.list?.length) { + const specByName = modelSpecs.list.find((s) => s.name === agentSpec); + if (specByName?.vision !== undefined) { + return specByName.vision === true; + } + } + const agentModel = (agent.model_parameters as { model?: string })?.model ?? agent.model; if (!agentModel) { return false; @@ -184,19 +193,20 @@ export async function createRun({ } /** - * Ensure max_tokens/maxTokens is at least 1 for provider APIs. - * Avoids invalid values from missing/wrong model metadata or stored config - * (e.g. custom/Scaleway with undefined context leading to negative max_tokens). + * Only pass max_tokens/maxTokens when it has a valid value (number >= 1). + * Invalid or missing values are omitted so the provider uses its default. */ - const llmConfigRecord = llmConfig as Record; - const rawMaxTokens = llmConfig.maxTokens ?? llmConfigRecord.max_tokens; - const sanitizedMaxTokens = + const llmConfigRecord = llmConfig as unknown as Record; + const rawMaxTokens = llmConfigRecord.maxTokens ?? llmConfigRecord.max_tokens; + const isValidMaxTokens = typeof rawMaxTokens === 'number' && !Number.isNaN(rawMaxTokens) && - rawMaxTokens >= 1 - ? rawMaxTokens - : 4096; - llmConfig.maxTokens = sanitizedMaxTokens; + rawMaxTokens >= 1; + if (isValidMaxTokens) { + llmConfigRecord.maxTokens = rawMaxTokens; + } else { + delete llmConfigRecord.maxTokens; + } delete llmConfigRecord.max_tokens; const reasoningKey = getReasoningKey(provider, llmConfig, agent.endpoint); From bb657c2dbba965a27582c38e682210471232aa49 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Thu, 19 Feb 2026 18:27:00 +0100 Subject: [PATCH 21/23] feat: Enhance model validation logic in Vision Model --- packages/data-provider/src/config.ts | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 178ed4bd29dd..8739ef42a777 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -1331,7 +1331,25 @@ export function validateVisionModel({ if (modelSpecs?.list) { const matchingSpec = modelSpecs.list.find( - (spec) => spec.preset?.model === model || model.includes(spec.preset?.model ?? ''), + (spec) => { + // Exact match with preset.model + if (spec.preset?.model && spec.preset.model === model) { + return true; + } + // Partial match: model contains preset.model (only if preset.model is not empty) + if (spec.preset?.model && spec.preset.model.length > 0 && model.includes(spec.preset.model)) { + return true; + } + // Exact match with spec.name + if (spec.name && spec.name === model) { + return true; + } + // Partial match: model contains spec.name (only if spec.name is not empty) + if (spec.name && spec.name.length > 0 && model.includes(spec.name)) { + return true; + } + return false; + }, ); if (matchingSpec?.vision !== undefined) { From 13763be11195b4336c9dfe7b9470862c7c4cd729 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Thu, 19 Feb 2026 21:03:50 +0100 Subject: [PATCH 22/23] feat: Integrate vision capability checks in agent tools and UI components - Enhanced the agent client to validate vision capabilities based on agent settings and model specifications. - Updated AttachFileMenu and DragDropModal components to utilize the new vision capability checks, ensuring proper handling of image uploads. - Introduced visionEnabledByAgent in useAgentToolPermissions hook to streamline permission checks across components. --- api/server/controllers/agents/client.js | 27 +++++++++++++++++-- .../Chat/Input/Files/AttachFileMenu.tsx | 13 +++++---- .../Chat/Input/Files/DragDropModal.tsx | 14 +++++----- .../hooks/Agents/useAgentToolPermissions.ts | 14 ++++++++++ 4 files changed, 52 insertions(+), 16 deletions(-) diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 2997dd5fac5d..47655b9e0223 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -42,6 +42,7 @@ const { isAgentsEndpoint, isEphemeralAgentId, removeNullishValues, + validateVisionModel, } = require('librechat-data-provider'); const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); @@ -164,7 +165,29 @@ class AgentClient extends BaseClient { * @returns {Promise>>} */ async addImageURLs(message, attachments) { - if (!(this.options.agent?.vision ?? false)) { + const agent = this.options.agent; + if (!agent) { + return attachments; + } + + // Determine vision capability: explicit agent.vision takes precedence, + // otherwise check if the model supports vision + let isVisionCapable = false; + if (agent.vision !== undefined) { + isVisionCapable = agent.vision === true; + } else { + const agentModel = + agent.model_parameters?.model ?? agent.model; + if (agentModel) { + const appConfig = this.options.req?.config; + isVisionCapable = validateVisionModel({ + model: agentModel, + modelSpecs: appConfig?.modelSpecs, + }); + } + } + + if (!isVisionCapable) { return attachments; } @@ -172,7 +195,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, { - provider: this.options.agent.provider, + provider: agent.provider, endpoint: this.options.endpoint, }, VisionModes.agents, diff --git a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx index a3ee7458a85b..37d757b276fe 100644 --- a/client/src/components/Chat/Input/Files/AttachFileMenu.tsx +++ b/client/src/components/Chat/Input/Files/AttachFileMenu.tsx @@ -85,10 +85,9 @@ const AttachFileMenu = ({ * */ const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); - const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( - agentId, - ephemeralAgent, - ); + const { fileSearchAllowedByAgent, codeAllowedByAgent, visionEnabledByAgent, provider } = + useAgentToolPermissions(agentId, ephemeralAgent); + const isVisionAvailable = isVisionModel || visionEnabledByAgent; const handleUploadClick = (fileType?: FileUploadType) => { if (!inputRef.current) { @@ -129,7 +128,7 @@ const AttachFileMenu = ({ isDocumentSupportedProvider(currentProvider) || isAzureWithResponsesApi ) { - if (isVisionModel) { + if (isVisionAvailable) { items.push({ label: localize('com_ui_upload_provider'), onClick: () => { @@ -147,7 +146,7 @@ const AttachFileMenu = ({ }); } } else { - if (isVisionModel) { + if (isVisionAvailable) { items.push({ label: localize('com_ui_upload_image_input'), onClick: () => { @@ -233,7 +232,7 @@ const AttachFileMenu = ({ codeAllowedByAgent, fileSearchAllowedByAgent, setIsSharePointDialogOpen, - isVisionModel, + isVisionAvailable, ]); const menuTrigger = ( diff --git a/client/src/components/Chat/Input/Files/DragDropModal.tsx b/client/src/components/Chat/Input/Files/DragDropModal.tsx index c422c371a99b..ec5d621d1d95 100644 --- a/client/src/components/Chat/Input/Files/DragDropModal.tsx +++ b/client/src/components/Chat/Input/Files/DragDropModal.tsx @@ -50,11 +50,10 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities); const { conversationId, agentId, endpoint, endpointType, useResponsesApi } = useDragDropContext(); const ephemeralAgent = useRecoilValue(ephemeralAgentByConvoId(conversationId ?? '')); - const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions( - agentId, - ephemeralAgent, - ); + const { fileSearchAllowedByAgent, codeAllowedByAgent, visionEnabledByAgent, provider } = + useAgentToolPermissions(agentId, ephemeralAgent); const isVisionModel = useVisionModel(); + const isVisionAvailable = isVisionModel || visionEnabledByAgent; const options = useMemo(() => { const _options: FileOption[] = []; @@ -98,14 +97,15 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD label: localize('com_ui_upload_provider'), value: undefined, icon: , - condition: validFileTypes && isVisionModel, + condition: validFileTypes && isVisionAvailable, }); } else { _options.push({ label: localize('com_ui_upload_image_input'), value: undefined, icon: , - condition: files.every((file) => getFileType(file)?.startsWith('image/')) && isVisionModel, + condition: + files.every((file) => getFileType(file)?.startsWith('image/')) && isVisionAvailable, }); } if (capabilities.fileSearchEnabled && fileSearchAllowedByAgent) { @@ -141,7 +141,7 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD useResponsesApi, codeAllowedByAgent, fileSearchAllowedByAgent, - isVisionModel, + isVisionAvailable, ]); if (!isVisible) { diff --git a/client/src/hooks/Agents/useAgentToolPermissions.ts b/client/src/hooks/Agents/useAgentToolPermissions.ts index cff9e9635bbf..96627b5098c5 100644 --- a/client/src/hooks/Agents/useAgentToolPermissions.ts +++ b/client/src/hooks/Agents/useAgentToolPermissions.ts @@ -8,6 +8,8 @@ import { isEphemeralAgent } from '~/common'; interface AgentToolPermissionsResult { fileSearchAllowedByAgent: boolean; codeAllowedByAgent: boolean; + /** True when the current agent has image vision enabled (shows upload-to-provider in chat input) */ + visionEnabledByAgent: boolean; tools: string[] | undefined; provider?: string; } @@ -64,9 +66,21 @@ export default function useAgentToolPermissions( return tools?.includes(Tools.execute_code) ?? false; }, [agentId, selectedAgent, tools, ephemeralAgent]); + const visionEnabledByAgent = useMemo(() => { + if (agentId == null || agentId === '') return false; + const agent = agentData ?? selectedAgent; + const vision = agent?.vision; + if (vision !== undefined) return vision; + const versions = (agent as { versions?: Array<{ vision?: boolean }> })?.versions; + return versions?.length + ? (versions[versions.length - 1]?.vision ?? false) + : false; + }, [agentId, agentData, selectedAgent]); + return { fileSearchAllowedByAgent, codeAllowedByAgent, + visionEnabledByAgent, provider, tools, }; From eaff1ba2fd574a4dd875d16d4186726758059137 Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Fri, 6 Mar 2026 11:35:03 +0100 Subject: [PATCH 23/23] refactor: Revert provider recognition and content formatting logic --- .../api/src/mcp/__tests__/parsers.test.ts | 66 ------------- packages/api/src/mcp/parsers.ts | 94 ++----------------- 2 files changed, 8 insertions(+), 152 deletions(-) diff --git a/packages/api/src/mcp/__tests__/parsers.test.ts b/packages/api/src/mcp/__tests__/parsers.test.ts index 71e27b09ab79..dd9a09a0fb35 100644 --- a/packages/api/src/mcp/__tests__/parsers.test.ts +++ b/packages/api/src/mcp/__tests__/parsers.test.ts @@ -29,72 +29,6 @@ describe('formatToolContent', () => { expect(content).toBe('(No response)'); expect(artifacts).toBeUndefined(); }); - - it('should return string for known non-OpenAI providers', () => { - const result: t.MCPToolCallResponse = { - content: [{ type: 'text', text: 'Test content' }], - }; - const [content, artifacts] = formatToolContent(result, 'google' as t.Provider); - // Google is recognized but uses array format, so this should be an array - expect(Array.isArray(content)).toBe(true); - }); - }); - - describe('automatic detection of OpenAI-compatible custom endpoints', () => { - it('should automatically recognize new OpenAI-compatible custom endpoints', () => { - const result: t.MCPToolCallResponse = { - content: [ - { type: 'text', text: 'First text' }, - { type: 'text', text: 'Second text' }, - ], - }; - - // Test with a custom endpoint that's not explicitly listed - const [content, artifacts] = formatToolContent(result, 'scaleway' as t.Provider); - // Should be recognized and use array format (OpenAI-compatible) - expect(Array.isArray(content)).toBe(true); - expect(content).toEqual([{ type: 'text', text: 'First text\n\nSecond text' }]); - expect(artifacts).toBeUndefined(); - }); - - it('should use array format for unknown OpenAI-compatible endpoints', () => { - const result: t.MCPToolCallResponse = { - content: [ - { type: 'text', text: 'Before image' }, - { type: 'image', data: 'base64data', mimeType: 'image/png' }, - { type: 'text', text: 'After image' }, - ], - }; - - // Test with another custom endpoint (e.g., together, perplexity, anyscale) - const [content, artifacts] = formatToolContent(result, 'together' as t.Provider); - // Should use array format like OpenAI - expect(Array.isArray(content)).toBe(true); - expect(content).toEqual([ - { type: 'text', text: 'Before image' }, - { type: 'text', text: 'After image' }, - ]); - expect(artifacts).toEqual({ - content: [ - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,base64data' }, - }, - ], - }); - }); - - it('should NOT recognize known non-OpenAI providers', () => { - const result: t.MCPToolCallResponse = { - content: [{ type: 'text', text: 'Test content' }], - }; - - // Non-OpenAI providers should return string format - const [content, artifacts] = formatToolContent(result, 'bedrock' as t.Provider); - expect(typeof content).toBe('string'); - expect(content).toBe('Test content'); - expect(artifacts).toBeUndefined(); - }); }); describe('recognized providers - content array providers', () => { diff --git a/packages/api/src/mcp/parsers.ts b/packages/api/src/mcp/parsers.ts index 213f289a2f7e..76e59b2e9cf1 100644 --- a/packages/api/src/mcp/parsers.ts +++ b/packages/api/src/mcp/parsers.ts @@ -7,10 +7,6 @@ function generateResourceId(text: string): string { return crypto.createHash('sha256').update(text).digest('hex').substring(0, 10); } -// Known providers that are NOT OpenAI-compatible -// This is a small, stable list that rarely changes -const NON_OPENAI_PROVIDERS = new Set(['google', 'anthropic', 'bedrock', 'ollama']); - const RECOGNIZED_PROVIDERS = new Set([ 'google', 'anthropic', @@ -21,72 +17,9 @@ const RECOGNIZED_PROVIDERS = new Set([ 'deepseek', 'ollama', 'bedrock', - // Note: Custom OpenAI-compatible endpoints (like scaleway, together, perplexity, etc.) - // are automatically recognized if they're not in NON_OPENAI_PROVIDERS ]); - -// Known providers that use content array format (structured content blocks) -// These are the standard OpenAI-compatible providers plus Google and Anthropic const CONTENT_ARRAY_PROVIDERS = new Set(['google', 'anthropic', 'azureopenai', 'openai']); -/** - * Check if a provider should receive structured content formatting for MCP tool responses. - * - * Recognizes: - * 1. Explicitly listed providers in RECOGNIZED_PROVIDERS - * 2. Custom OpenAI-compatible endpoints (any provider not in NON_OPENAI_PROVIDERS) - * - * Custom endpoints are passed with their endpoint name (not "openai"), so we automatically - * detect them rather than requiring explicit additions for each new provider. - */ -function isRecognizedProvider(provider: t.Provider): boolean { - // Check explicit list first (for known providers) - if (RECOGNIZED_PROVIDERS.has(provider)) { - return true; - } - - // If not explicitly recognized and not a known non-OpenAI provider, - // assume it's an OpenAI-compatible custom endpoint - // This automatically supports all new OpenAI-compatible providers without code changes - if (!NON_OPENAI_PROVIDERS.has(provider)) { - return true; - } - - return false; -} - -/** - * Check if a provider uses content array format (structured content blocks). - * - * Uses array format: - * - Standard OpenAI-compatible providers (openai, azureopenai) - * - Google and Anthropic (native array format) - * - New unknown custom endpoints (assumed OpenAI-compatible, so use array format) - * - * Uses string format: - * - Known custom providers with special handling (openrouter, xai, deepseek) - * - Other non-OpenAI providers (ollama, bedrock) - */ -function usesContentArrayFormat(provider: t.Provider): boolean { - // Explicit array format providers - if (CONTENT_ARRAY_PROVIDERS.has(provider)) { - return true; - } - - // Known custom providers that use string format (despite being OpenAI-compatible) - if (['openrouter', 'xai', 'deepseek'].includes(provider)) { - return false; - } - - // Unknown providers: if not a known non-OpenAI provider, assume OpenAI-compatible - // and use array format (like standard OpenAI endpoints) - if (!NON_OPENAI_PROVIDERS.has(provider)) { - return true; - } - - return false; -} - const imageFormatters: Record = { // google: (item) => ({ // type: 'image', @@ -156,24 +89,11 @@ function parseAsString(result: t.MCPToolCallResponse): string { * @param provider - The provider name (google, anthropic, openai) * @returns Tuple of content and image_urls */ -/** - * Formats MCP tool call response content for different provider types. - * - * Handles provider-specific formatting: - * - OpenAI-compatible providers: Uses content array format with artifacts - * - Non-OpenAI providers: Uses string format - * - * Automatically detects custom OpenAI-compatible endpoints (not in NON_OPENAI_PROVIDERS). - * - * @param result - MCP tool call response with content array - * @param provider - Provider identifier (e.g., 'openai', 'scaleway', 'anthropic') - * @returns Tuple of [formattedContent, artifacts] where artifacts contain image URLs - */ export function formatToolContent( result: t.MCPToolCallResponse, provider: t.Provider, ): t.FormattedContentResult { - if (!isRecognizedProvider(provider)) { + if (!RECOGNIZED_PROVIDERS.has(provider)) { return [parseAsString(result), undefined]; } @@ -202,7 +122,7 @@ export function formatToolContent( if (!isImageContent(item)) { return; } - if (usesContentArrayFormat(provider) && currentTextBlock) { + if (CONTENT_ARRAY_PROVIDERS.has(provider) && currentTextBlock) { formattedContent.push({ type: 'text', text: currentTextBlock }); currentTextBlock = ''; } @@ -275,7 +195,7 @@ UI Resource Markers Available: currentTextBlock += uiInstructions; } - if (usesContentArrayFormat(provider) && currentTextBlock) { + if (CONTENT_ARRAY_PROVIDERS.has(provider) && currentTextBlock) { formattedContent.push({ type: 'text', text: currentTextBlock }); } @@ -291,7 +211,9 @@ UI Resource Markers Available: }; } - return usesContentArrayFormat(provider) - ? [formattedContent, artifacts] - : [currentTextBlock, artifacts]; + if (CONTENT_ARRAY_PROVIDERS.has(provider)) { + return [formattedContent, artifacts]; + } + + return [currentTextBlock, artifacts]; }