Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ac40e23
feat: Add Scaleway custom endpoint support
JumpLink Jan 24, 2026
3f8347b
fix: Improve usage metadata extraction from LangChain responses
JumpLink Jan 24, 2026
d735df0
feat: Add Scaleway custom endpoint support
JumpLink Jan 24, 2026
c5d6c43
feat: Improve support for OpenAI-compatible custom endpoints
JumpLink Jan 24, 2026
3166bcc
refactor: Simplify usage metadata extraction in ModelEndHandler
JumpLink Jan 24, 2026
612b14c
feat: Improve support for OpenAI-compatible custom endpoints
JumpLink Jan 24, 2026
4942cf2
feat: Improve support for OpenAI-compatible custom endpoints
JumpLink Jan 24, 2026
dc1ee4d
feat: add vision capability flag to modelSpecs
JumpLink Jan 24, 2026
488dd2d
refactor: replace validateVisionModel usage with useVisionModel hook
JumpLink Jan 24, 2026
87f8d17
refactor: remove modelSpecs from agents API (agent-specific concern)
JumpLink Jan 24, 2026
5f8d81b
chore: Revert formatting changes
JumpLink Jan 24, 2026
77cd68f
Merge branch 'feat/vision'
JumpLink Jan 24, 2026
ee4ee30
feat: Auto-detect OpenAI-compatible endpoints for MCP content formatting
JumpLink Jan 24, 2026
daf56d2
Merge branch 'feat/scaleway'
JumpLink Jan 24, 2026
4a67bbc
Merge remote-tracking branch 'upstream/main'
JumpLink Jan 26, 2026
da07dce
feat: add vision toggle for agents
JumpLink Jan 24, 2026
a22a466
feat: Enhance vision capabilities and artifact processing for agents
JumpLink Jan 26, 2026
986f6e6
refactor: Clean up whitespace in createToolEndCallback function
JumpLink Jan 26, 2026
72149a2
refactor: Clean up MCP tool result handling and add vision debug logging
JumpLink Jan 26, 2026
a3a845f
refactor: Simplify MCP tool result handling and remove vision debug logs
JumpLink Jan 26, 2026
54282d1
refactor: Enhance vision capability handling and artifact processing
JumpLink Jan 27, 2026
ba611c3
refactor: Improve handling of vision capability and artifact message …
JumpLink Jan 27, 2026
f34fc80
refactor: Enhance vision handling and specification processing in age…
JumpLink Jan 27, 2026
cd6f861
Merge upstream: resolve conflicts (vision + deferred/programmatic tools)
JumpLink Feb 11, 2026
5cb2c91
Merge remote-tracking branch 'upstream' into feat/vision
JumpLink Feb 14, 2026
4ff3bbc
Merge remote-tracking branch 'upstream' into feat/vision
JumpLink Feb 16, 2026
dd54998
Merge remote-tracking branch 'upstream' into feat/vision
JumpLink Feb 17, 2026
bb657c2
feat: Enhance model validation logic in Vision Model
JumpLink Feb 19, 2026
13763be
feat: Integrate vision capability checks in agent tools and UI compon…
JumpLink Feb 19, 2026
4adebaa
Merge upstream
JumpLink Mar 6, 2026
eaff1ba
refactor: Revert provider recognition and content formatting logic
JumpLink Mar 6, 2026
34a8a09
Merge remote-tracking branch 'upstream/main' into feat/vision
JumpLink Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions api/server/controllers/agents/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const {
isAgentsEndpoint,
isEphemeralAgentId,
removeNullishValues,
validateVisionModel,
} = require('librechat-data-provider');
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
Expand Down Expand Up @@ -165,11 +166,37 @@ class AgentClient extends BaseClient {
* @returns {Promise<Array<Partial<MongoFile>>>}
*/
async addImageURLs(message, attachments) {
const agent = this.options.agent;
if (!agent) {
return attachments;
}

// Determine vision capability: explicit agent.vision takes precedence,
// otherwise check if the model supports vision
let isVisionCapable = false;
if (agent.vision !== undefined) {
isVisionCapable = agent.vision === true;
} else {
const agentModel =
agent.model_parameters?.model ?? agent.model;
if (agentModel) {
const appConfig = this.options.req?.config;
isVisionCapable = validateVisionModel({
model: agentModel,
modelSpecs: appConfig?.modelSpecs,
});
}
}

if (!isVisionCapable) {
return attachments;
}

const { files, image_urls } = await encodeAndFormat(
this.options.req,
attachments,
{
provider: this.options.agent.provider,
provider: agent.provider,
endpoint: this.options.endpoint,
},
VisionModes.agents,
Expand Down Expand Up @@ -241,6 +268,9 @@ class AgentClient extends BaseClient {
);
}

// Image content in messages is filtered by the LLM layer (_convertMessagesToOpenAIParams)
// when agent.vision is false; no need to strip image_urls here.

/** @type {Record<number, number>} */
const canonicalTokenCountMap = {};
/** @type {Record<string, number>} */
Expand Down Expand Up @@ -840,8 +870,10 @@ class AgentClient extends BaseClient {
customHandlers: this.options.eventHandlers,
requestBody: config.configurable.requestBody,
user: createSafeUser(this.options.req?.user),
summarizationConfig: appConfig?.summarization,
tokenCounter,
modelSpecs: appConfig.modelSpecs,
availableModels: appConfig.availableModels,
summarizationConfig: appConfig?.summarization,
});

if (!run) {
Expand Down
21 changes: 21 additions & 0 deletions api/server/services/AssistantService.js
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,27 @@ async function runAssistant({
});

const tool_outputs = await processRequiredActions(openai, actions);

// Add artifact content as user message to thread if artifacts were processed
if (openai.pendingArtifactContent?.length) {
const willAttachFileIds =
openai.pendingArtifactFileIds?.length &&
openai.pendingArtifactContent.some((item) => item?.type === ContentTypes.IMAGE_FILE);

const artifactMessage = {
role: 'user',
content: openai.pendingArtifactContent,
};
if (willAttachFileIds) {
artifactMessage.file_ids = openai.pendingArtifactFileIds;
}
await openai.beta.threads.messages.create(thread_id, artifactMessage);

// Clear after use
delete openai.pendingArtifactContent;
delete openai.pendingArtifactFileIds;
}

const toolRun = await openai.beta.threads.runs.submitToolOutputs(run.id, {
thread_id: run.thread_id,
tool_outputs,
Expand Down
4 changes: 4 additions & 0 deletions api/server/services/MCP.js
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ function createToolInstance({
derivedSignal = config?.signal ? AbortSignal.any([config.signal]) : undefined;
const mcpManager = getMCPManager(userId);
const provider = (config?.metadata?.provider || _provider)?.toLowerCase();
const endpoint = config?.metadata?.endpoint;

const { args: _args, stepId, ...toolCall } = config.toolCall ?? {};
const flowId = `${serverName}:oauth_login:${config.metadata.thread_id}:${config.metadata.run_id}`;
Expand Down Expand Up @@ -574,6 +575,9 @@ function createToolInstance({
const customUserVars =
config?.configurable?.userMCPAuthMap?.[`${Constants.mcp_prefix}${serverName}`];

// mcpManager.callTool returns FormattedContentResult: [content, artifacts]
// This tuple format is already handled by formatToolContent in @librechat/api
// and is compatible with responseFormat: CONTENT_AND_ARTIFACT
const result = await mcpManager.callTool({
serverName,
toolName,
Expand Down
65 changes: 61 additions & 4 deletions api/server/services/ToolService.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,20 @@ async function processRequiredActions(client, requiredActions) {
let tool = ToolMap[currentAction.tool] ?? ActionToolMap[currentAction.tool];

const handleToolOutput = async (output) => {
// For MCP tools, output is [content, artifact] array
// Store the full array in requiredActions[i].output for artifact processing
// For tool output to OpenAI, we'll extract just the content
requiredActions[i].output = output;

// Extract content for tool call display (first element of array if array, otherwise output itself)
const outputContent = Array.isArray(output) && output.length >= 1 ? output[0] : output;

/** @type {FunctionToolCall & PartMetadata} */
const toolCall = {
function: {
name: currentAction.tool,
arguments: JSON.stringify(currentAction.toolInput),
output,
output: outputContent,
},
id: currentAction.toolCallId,
type: 'function',
Expand All @@ -207,7 +213,7 @@ async function processRequiredActions(client, requiredActions) {
const toolCallIndex = client.mappedOrder.get(toolCall.id);

if (imageGenTools.has(currentAction.tool)) {
const imageOutput = output;
const imageOutput = outputContent;
toolCall.function.output = `${currentAction.tool} displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.`;

// Streams the "Finished" state of the tool call in the UI
Expand Down Expand Up @@ -252,9 +258,13 @@ async function processRequiredActions(client, requiredActions) {
// result: tool.result,
});

// For MCP tools with artifacts, return the content string for OpenAI tool output
// The full array [content, artifact] is stored in requiredActions[i].output for artifact processing
const finalOutput = outputContent;

return {
tool_call_id: currentAction.toolCallId,
output,
output: finalOutput,
};
};

Expand Down Expand Up @@ -410,8 +420,55 @@ async function processRequiredActions(client, requiredActions) {
}
}

const tool_outputs = await Promise.all(promises);

// Process artifacts from MCP tools and prepare for next user message
const allArtifacts = [];
for (let i = 0; i < requiredActions.length; i++) {
const action = requiredActions[i];
// MCP tools return [content, artifact] format
// For OpenRouter (string format): [string, artifacts]
// For OpenAI-compatible (array format): [[contentArray], artifacts]
if (
action.output &&
Array.isArray(action.output) &&
action.output.length === 2 &&
action.output[1]?.content
) {
allArtifacts.push({
artifacts: action.output[1],
toolName: action.tool,
});
}
}

if (allArtifacts.length > 0) {
const isVisionModel = getVisionCapability(client);
const artifactFileIds = [];
const artifactContent = [];

for (const { artifacts, toolName } of allArtifacts) {
const processed = await processArtifactsForAssistants({
artifacts,
isVisionModel,
req: client.req,
thread_id: requiredActions[0].thread_id,
conversationId:
(client.responseMessage ?? client.finalMessage)?.conversationId,
});

artifactFileIds.push(...processed.fileIds);
artifactContent.push(...processed.contentParts);
}

if (artifactContent.length > 0 || artifactFileIds.length > 0) {
client.pendingArtifactContent = artifactContent;
client.pendingArtifactFileIds = artifactFileIds;
}
}

return {
tool_outputs: await Promise.all(promises),
tool_outputs,
};
}

Expand Down
1 change: 1 addition & 0 deletions client/src/common/agents-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export type TAgentCapabilities = {
[AgentCapabilities.web_search]: boolean;
[AgentCapabilities.file_search]: boolean;
[AgentCapabilities.execute_code]: boolean;
[AgentCapabilities.vision]: boolean;
[AgentCapabilities.end_after_tools]?: boolean;
[AgentCapabilities.hide_sequential_outputs]?: boolean;
};
Expand Down
67 changes: 38 additions & 29 deletions client/src/components/Chat/Input/Files/AttachFileMenu.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import { useGetStartupConfig } from '~/data-provider';
import { ephemeralAgentByConvoId } from '~/store';
import { MenuItemProps } from '~/common';
import { cn } from '~/utils';
import { useVisionModel } from '~/hooks';

type FileUploadType =
| 'image'
Expand Down Expand Up @@ -80,6 +81,7 @@ const AttachFileMenu = ({
const { agentsConfig } = useGetAgentsConfig();
const { data: startupConfig } = useGetStartupConfig();
const sharePointEnabled = startupConfig?.sharePointFilePickerEnabled;
const isVisionModel = useVisionModel();

const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false);

Expand All @@ -89,10 +91,9 @@ const AttachFileMenu = ({
* */
const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);

const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions(
agentId,
ephemeralAgent,
);
const { fileSearchAllowedByAgent, codeAllowedByAgent, visionEnabledByAgent, provider } =
useAgentToolPermissions(agentId, ephemeralAgent);
const isVisionAvailable = isVisionModel || visionEnabledByAgent;

const handleUploadClick = (fileType?: FileUploadType) => {
if (!inputRef.current) {
Expand Down Expand Up @@ -135,32 +136,39 @@ const AttachFileMenu = ({
isDocumentSupportedProvider(currentProvider) ||
isAzureWithResponsesApi
) {
items.push({
label: localize('com_ui_upload_provider'),
onClick: () => {
setToolResource(undefined);
let fileType: Exclude<FileUploadType, 'image' | 'document'> = 'image_document';
if (currentProvider === Providers.GOOGLE || currentProvider === Providers.OPENROUTER) {
fileType = 'image_document_video_audio';
} else if (
currentProvider === Providers.BEDROCK ||
endpointType === EModelEndpoint.bedrock
) {
fileType = 'image_document_extended';
}
onAction(fileType);
},
icon: <FileImageIcon className="icon-md" />,
});
if (isVisionAvailable) {
items.push({
label: localize('com_ui_upload_provider'),
onClick: () => {
setToolResource(undefined);
let fileType: Exclude<FileUploadType, 'image' | 'document'> = 'image_document';
if (
currentProvider === Providers.GOOGLE ||
currentProvider === Providers.OPENROUTER
) {
fileType = 'image_document_video_audio';
} else if (
currentProvider === Providers.BEDROCK ||
endpointType === EModelEndpoint.bedrock
) {
fileType = 'image_document_extended';
}
onAction(fileType);
},
icon: <FileImageIcon className="icon-md" />,
});
}
} else {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
setToolResource(undefined);
onAction('image');
},
icon: <ImageUpIcon className="icon-md" />,
});
if (isVisionAvailable) {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
setToolResource(undefined);
onAction('image');
},
icon: <ImageUpIcon className="icon-md" />,
});
}
}

if (capabilities.contextEnabled) {
Expand Down Expand Up @@ -237,6 +245,7 @@ const AttachFileMenu = ({
codeAllowedByAgent,
fileSearchAllowedByAgent,
setIsSharePointDialogOpen,
isVisionAvailable,
]);

const menuTrigger = (
Expand Down
16 changes: 9 additions & 7 deletions client/src/components/Chat/Input/Files/DragDropModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
} from '~/hooks';
import { ephemeralAgentByConvoId } from '~/store';
import { useDragDropContext } from '~/Providers';
import { useVisionModel } from '~/hooks';

interface DragDropModalProps {
onOptionSelect: (option: EToolResources | undefined) => void;
Expand All @@ -50,10 +51,10 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
const capabilities = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);
const { conversationId, agentId, endpoint, endpointType, useResponsesApi } = useDragDropContext();
const ephemeralAgent = useRecoilValue(ephemeralAgentByConvoId(conversationId ?? ''));
const { fileSearchAllowedByAgent, codeAllowedByAgent, provider } = useAgentToolPermissions(
agentId,
ephemeralAgent,
);
const { fileSearchAllowedByAgent, codeAllowedByAgent, visionEnabledByAgent, provider } =
useAgentToolPermissions(agentId, ephemeralAgent);
const isVisionModel = useVisionModel();
const isVisionAvailable = isVisionModel || visionEnabledByAgent;

const options = useMemo(() => {
const _options: FileOption[] = [];
Expand Down Expand Up @@ -103,15 +104,15 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
label: localize('com_ui_upload_provider'),
value: undefined,
icon: <FileImageIcon className="icon-md" />,
condition: validFileTypes,
condition: validFileTypes && isVisionAvailable,
});
} else {
// Only show image upload option if all files are images and provider doesn't support documents
_options.push({
label: localize('com_ui_upload_image_input'),
value: undefined,
icon: <ImageUpIcon className="icon-md" />,
condition: files.every((file) => getFileType(file)?.startsWith('image/')),
condition:
files.every((file) => getFileType(file)?.startsWith('image/')) && isVisionAvailable,
});
}
if (capabilities.fileSearchEnabled && fileSearchAllowedByAgent) {
Expand Down Expand Up @@ -147,6 +148,7 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
useResponsesApi,
codeAllowedByAgent,
fileSearchAllowedByAgent,
isVisionAvailable,
]);

if (!isVisible) {
Expand Down
Loading