Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions client/src/common/agents-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export type TAgentCapabilities = {
[AgentCapabilities.web_search]: boolean;
[AgentCapabilities.file_search]: boolean;
[AgentCapabilities.execute_code]: boolean;
[AgentCapabilities.vision]: boolean;
[AgentCapabilities.end_after_tools]?: boolean;
[AgentCapabilities.hide_sequential_outputs]?: boolean;
};
Expand Down
50 changes: 30 additions & 20 deletions client/src/components/Chat/Input/Files/AttachFileMenu.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { useGetStartupConfig } from '~/data-provider';
import { ephemeralAgentByConvoId } from '~/store';
import { MenuItemProps } from '~/common';
import { cn } from '~/utils';
import { useVisionModel } from '~/hooks';

type FileUploadType = 'image' | 'document' | 'image_document' | 'image_document_video_audio';

Expand Down Expand Up @@ -74,6 +75,7 @@ const AttachFileMenu = ({
const { agentsConfig } = useGetAgentsConfig();
const { data: startupConfig } = useGetStartupConfig();
const sharePointEnabled = startupConfig?.sharePointFilePickerEnabled;
const isVisionModel = useVisionModel();

const [isSharePointDialogOpen, setIsSharePointDialogOpen] = useState(false);

Expand Down Expand Up @@ -127,27 +129,34 @@ const AttachFileMenu = ({
isDocumentSupportedProvider(currentProvider) ||
isAzureWithResponsesApi
) {
items.push({
label: localize('com_ui_upload_provider'),
onClick: () => {
setToolResource(undefined);
let fileType: Exclude<FileUploadType, 'image' | 'document'> = 'image_document';
if (currentProvider === Providers.GOOGLE || currentProvider === Providers.OPENROUTER) {
fileType = 'image_document_video_audio';
}
onAction(fileType);
},
icon: <FileImageIcon className="icon-md" />,
});
if (isVisionModel) {
items.push({
label: localize('com_ui_upload_provider'),
onClick: () => {
setToolResource(undefined);
let fileType: Exclude<FileUploadType, 'image' | 'document'> = 'image_document';
if (
currentProvider === Providers.GOOGLE ||
currentProvider === Providers.OPENROUTER
) {
fileType = 'image_document_video_audio';
}
onAction(fileType);
},
icon: <FileImageIcon className="icon-md" />,
});
}
} else {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
setToolResource(undefined);
onAction('image');
},
icon: <ImageUpIcon className="icon-md" />,
});
if (isVisionModel) {
items.push({
label: localize('com_ui_upload_image_input'),
onClick: () => {
setToolResource(undefined);
onAction('image');
},
icon: <ImageUpIcon className="icon-md" />,
});
}
}

if (capabilities.contextEnabled) {
Expand Down Expand Up @@ -224,6 +233,7 @@ const AttachFileMenu = ({
codeAllowedByAgent,
fileSearchAllowedByAgent,
setIsSharePointDialogOpen,
isVisionModel,
]);

const menuTrigger = (
Expand Down
8 changes: 5 additions & 3 deletions client/src/components/Chat/Input/Files/DragDropModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
} from '~/hooks';
import { ephemeralAgentByConvoId } from '~/store';
import { useDragDropContext } from '~/Providers';
import { useVisionModel } from '~/hooks';

interface DragDropModalProps {
onOptionSelect: (option: EToolResources | undefined) => void;
Expand Down Expand Up @@ -53,6 +54,7 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
agentId,
ephemeralAgent,
);
const isVisionModel = useVisionModel();

const options = useMemo(() => {
const _options: FileOption[] = [];
Expand Down Expand Up @@ -96,15 +98,14 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
label: localize('com_ui_upload_provider'),
value: undefined,
icon: <FileImageIcon className="icon-md" />,
condition: validFileTypes,
condition: validFileTypes && isVisionModel,
});
} else {
// Only show image upload option if all files are images and provider doesn't support documents
_options.push({
label: localize('com_ui_upload_image_input'),
value: undefined,
icon: <ImageUpIcon className="icon-md" />,
condition: files.every((file) => getFileType(file)?.startsWith('image/')),
condition: files.every((file) => getFileType(file)?.startsWith('image/')) && isVisionModel,
});
}
if (capabilities.fileSearchEnabled && fileSearchAllowedByAgent) {
Expand Down Expand Up @@ -140,6 +141,7 @@ const DragDropModal = ({ onOptionSelect, setShowModal, files, isVisible }: DragD
useResponsesApi,
codeAllowedByAgent,
fileSearchAllowedByAgent,
isVisionModel,
]);

if (!isVisible) {
Expand Down
22 changes: 19 additions & 3 deletions client/src/components/SidePanel/Agents/AgentConfig.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import React, { useState, useMemo, useCallback } from 'react';
import { useToastContext } from '@librechat/client';
import { Controller, useWatch, useFormContext } from 'react-hook-form';
import { EModelEndpoint, getEndpointField } from 'librechat-data-provider';
import { EModelEndpoint, getEndpointField, defaultAgentCapabilities } from 'librechat-data-provider';
import type { AgentForm, IconComponentTypes } from '~/common';
import {
removeFocusOutlines,
Expand Down Expand Up @@ -29,6 +29,7 @@ import Artifacts from './Artifacts';
import AgentTool from './AgentTool';
import CodeForm from './Code/Form';
import MCPTools from './MCPTools';
import ImageVision from './ImageVision';

const labelClass = 'mb-2 text-token-text-primary block font-medium';
const inputClass = cn(
Expand Down Expand Up @@ -85,7 +86,8 @@ export default function AgentConfig() {
artifactsEnabled,
webSearchEnabled,
fileSearchEnabled,
} = useAgentCapabilities(agentsConfig?.capabilities);
visionEnabled,
} = useAgentCapabilities(agentsConfig?.capabilities ?? defaultAgentCapabilities);

const context_files = useMemo(() => {
if (typeof agent === 'string') {
Expand Down Expand Up @@ -288,7 +290,8 @@ export default function AgentConfig() {
fileSearchEnabled ||
artifactsEnabled ||
contextEnabled ||
webSearchEnabled) && (
webSearchEnabled ||
visionEnabled) && (
<div className="mb-4 flex w-full flex-col items-start gap-3">
<label className="text-token-text-primary block font-medium">
{localize('com_assistants_capabilities')}
Expand All @@ -303,6 +306,19 @@ export default function AgentConfig() {
{artifactsEnabled && <Artifacts />}
{/* File Search */}
{fileSearchEnabled && <FileSearch agent_id={agent_id} files={knowledge_files} />}
{/* Vision */}
{visionEnabled && (
<div className="w-full">
<div className="mb-1.5 flex items-center gap-2">
<span>
<label className="text-token-text-primary block font-medium">
{localize('com_assistants_image_vision')}
</label>
</span>
</div>
<ImageVision />
</div>
)}
</div>
)}
{/* MCP Section */}
Expand Down
2 changes: 2 additions & 0 deletions client/src/components/SidePanel/Agents/AgentPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export function composeAgentUpdatePayload(data: AgentForm, agent_id?: string | n
edges,
end_after_tools,
hide_sequential_outputs,
vision,
recursion_limit,
category,
support_contact,
Expand All @@ -94,6 +95,7 @@ export function composeAgentUpdatePayload(data: AgentForm, agent_id?: string | n
edges,
end_after_tools,
hide_sequential_outputs,
vision,
recursion_limit,
category,
support_contact,
Expand Down
1 change: 1 addition & 0 deletions client/src/components/SidePanel/Agents/AgentSelect.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export default function AgentSelect({
[AgentCapabilities.web_search]: false,
[AgentCapabilities.file_search]: false,
[AgentCapabilities.execute_code]: false,
[AgentCapabilities.vision]: false,
[AgentCapabilities.end_after_tools]: false,
[AgentCapabilities.hide_sequential_outputs]: false,
};
Expand Down
89 changes: 59 additions & 30 deletions client/src/components/SidePanel/Agents/ImageVision.tsx
Original file line number Diff line number Diff line change
@@ -1,40 +1,69 @@
import { Checkbox } from '@librechat/client';
import { Capabilities } from 'librechat-data-provider';
import { memo } from 'react';
import { AgentCapabilities } from 'librechat-data-provider';
import { useFormContext, Controller } from 'react-hook-form';
import {
Checkbox,
HoverCard,
HoverCardContent,
HoverCardPortal,
HoverCardTrigger,
CircleHelpIcon,
} from '@librechat/client';
import type { AgentForm } from '~/common';
import { useLocalize } from '~/hooks';
import { ESide } from '~/common';

export default function ImageVision() {
function ImageVision() {
const localize = useLocalize();
const methods = useFormContext<AgentForm>();
const { control, setValue, getValues } = methods;
const { control } = methods;

return (
<div className="flex items-center">
<Controller
name={Capabilities.image_vision}
control={control}
render={({ field }) => (
<Checkbox
{...field}
checked={field.value}
onCheckedChange={field.onChange}
className="relative float-left mr-2 inline-flex h-4 w-4 cursor-pointer"
value={field.value?.toString()}
/>
)}
/>
<label
className="form-check-label text-token-text-primary w-full cursor-pointer"
htmlFor={Capabilities.image_vision}
onClick={() =>
setValue(Capabilities.image_vision, !getValues(Capabilities.image_vision), {
shouldDirty: true,
})
}
>
<div className="flex items-center">{localize('com_assistants_image_vision')}</div>
</label>
</div>
<HoverCard openDelay={50}>
<div className="my-2 flex items-center">
<Controller
name={AgentCapabilities.vision}
control={control}
render={({ field }) => (
<Checkbox
{...field}
id="image-vision-checkbox"
checked={field.value}
onCheckedChange={field.onChange}
className="relative float-left mr-2 inline-flex h-4 w-4 cursor-pointer"
value={field.value?.toString()}
aria-labelledby="image-vision-label"
/>
)}
/>
<label
id="image-vision-label"
htmlFor="image-vision-checkbox"
className="form-check-label text-token-text-primary cursor-pointer"
>
{localize('com_agents_enable_image_vision')}
</label>
<HoverCardTrigger asChild className="ml-2">
<button
type="button"
className="inline-flex items-center"
aria-label={localize('com_agents_image_vision_info')}
>
<CircleHelpIcon className="h-4 w-4 text-text-tertiary" />
</button>
</HoverCardTrigger>
<HoverCardPortal>
<HoverCardContent side={ESide.Top} className="w-80">
<div className="space-y-2">
<p className="text-sm text-text-secondary">
{localize('com_agents_image_vision_info')}
</p>
</div>
</HoverCardContent>
</HoverCardPortal>
</div>
</HoverCard>
);
}

export default memo(ImageVision);
7 changes: 7 additions & 0 deletions client/src/hooks/Agents/useAgentCapabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ interface AgentCapabilitiesResult {
fileSearchEnabled: boolean;
webSearchEnabled: boolean;
codeEnabled: boolean;
visionEnabled: boolean;
}

export default function useAgentCapabilities(
Expand Down Expand Up @@ -55,6 +56,11 @@ export default function useAgentCapabilities(
[capabilities],
);

const visionEnabled = useMemo(
() => capabilities?.includes(AgentCapabilities.vision) ?? false,
[capabilities],
);

return {
ocrEnabled,
codeEnabled,
Expand All @@ -64,5 +70,6 @@ export default function useAgentCapabilities(
artifactsEnabled,
webSearchEnabled,
fileSearchEnabled,
visionEnabled,
};
}
1 change: 1 addition & 0 deletions client/src/hooks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ export { default as useTextToSpeech } from './Input/useTextToSpeech';
export { default as useGenerationsByLatest } from './useGenerationsByLatest';
export { default as useLocalizedConfig } from './useLocalizedConfig';
export { default as useResourcePermissions } from './useResourcePermissions';
export { useVisionModel } from './useVisionModel';
24 changes: 24 additions & 0 deletions client/src/hooks/useVisionModel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { useMemo } from 'react';
import { validateVisionModel } from 'librechat-data-provider';
import { useChatContext } from '~/Providers';
import { useGetStartupConfig } from '~/data-provider';

/**
* Hook to determine if the current conversation model supports vision capabilities.
* Checks modelSpecs configuration first, then falls back to hardcoded list.
*/
export function useVisionModel(): boolean {
const { conversation } = useChatContext();
const { data: startupConfig } = useGetStartupConfig();

return useMemo(() => {
const model = conversation?.model;
if (!model) {
return false;
}
return validateVisionModel({
model,
modelSpecs: startupConfig?.modelSpecs,
});
}, [conversation?.model, startupConfig?.modelSpecs]);
}
2 changes: 2 additions & 0 deletions client/src/locales/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"com_agents_description_placeholder": "Optional: Describe your Agent here",
"com_agents_empty_state_heading": "No agents found",
"com_agents_enable_file_search": "Enable File Search",
"com_agents_enable_image_vision": "Enable Image Vision",
"com_agents_error_bad_request_message": "The request could not be processed.",
"com_agents_error_bad_request_suggestion": "Please check your input and try again.",
"com_agents_error_category_title": "Category Error",
Expand Down Expand Up @@ -68,6 +69,7 @@
"com_agents_file_context_label": "File Context",
"com_agents_file_search_disabled": "Agent must be created before uploading files for File Search.",
"com_agents_file_search_info": "When enabled, the agent will be informed of the exact filenames listed below, allowing it to retrieve relevant context from these files.",
"com_agents_image_vision_info": "When enabled, images generated by MCP tools (e.g., image generation tools) will be sent back to the LLM. Disable this for non-vision models to prevent context overflow errors.",
"com_agents_grid_announcement": "Showing {{count}} agents in {{category}} category",
"com_agents_instructions_placeholder": "The system instructions that the agent uses",
"com_agents_link_copied": "Link copied",
Expand Down
1 change: 1 addition & 0 deletions packages/api/src/agents/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ export async function createRun({
instructions: systemContent,
maxContextTokens: agent.maxContextTokens,
useLegacyContent: agent.useLegacyContent ?? false,
vision: agent.vision,
};
agentInputs.push(agentInput);
};
Expand Down
1 change: 1 addition & 0 deletions packages/api/src/agents/validation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const agentBaseSchema = z.object({
hide_sequential_outputs: z.boolean().optional(),
artifacts: z.string().optional(),
recursion_limit: z.number().optional(),
vision: z.boolean().optional(),
conversation_starters: z.array(z.string()).optional(),
tool_resources: agentToolResourcesSchema,
support_contact: agentSupportContactSchema,
Expand Down
Loading