diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 905cadfd235d..ab79b3bb53b1 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -492,6 +492,22 @@ class BaseClient { } delete userMessage.image_urls; } + /** + * Persist the user's manual skill picks onto the user message so the + * frontend `ManualSkillPills` component can render them in history + * after reload. UI-only metadata — the runtime skill resolution + * pipeline reads the top-level `req.body.manualSkills` separately. + * Filter is defense-in-depth on top of Mongoose schema validation: + * keeps the DB row free of empty/non-string entries even if a + * crafted payload slips past schema checks upstream. + */ + const rawManualSkills = this.options.req?.body?.manualSkills; + if (Array.isArray(rawManualSkills) && rawManualSkills.length > 0) { + const skills = rawManualSkills.filter((s) => typeof s === 'string' && s.length > 0); + if (skills.length > 0) { + userMessage.manualSkills = skills; + } + } userMessagePromise = this.saveMessageToDatabase(userMessage, saveOptions, user).catch( (err) => { logger.error('[BaseClient] Failed to save user message:', err); diff --git a/api/server/controllers/agents/__tests__/openai.spec.js b/api/server/controllers/agents/__tests__/openai.spec.js index 3bf57ebd86cb..89aad741434d 100644 --- a/api/server/controllers/agents/__tests__/openai.spec.js +++ b/api/server/controllers/agents/__tests__/openai.spec.js @@ -56,6 +56,7 @@ jest.mock('@librechat/api', () => ({ createErrorResponse: jest.fn(), getTransactionsConfig: mockGetTransactionsConfig, recordCollectedUsage: mockRecordCollectedUsage, + extractManualSkills: jest.fn().mockReturnValue(undefined), buildNonStreamingResponse: jest.fn().mockReturnValue({ id: 'resp-123' }), createOpenAIStreamTracker: jest.fn().mockReturnValue({ addText: jest.fn(), diff --git a/api/server/controllers/agents/__tests__/responses.unit.spec.js b/api/server/controllers/agents/__tests__/responses.unit.spec.js index 83473db86ec5..04c3d1e4e7a5 100644 --- a/api/server/controllers/agents/__tests__/responses.unit.spec.js +++ b/api/server/controllers/agents/__tests__/responses.unit.spec.js @@ -52,6 +52,7 @@ jest.mock('@librechat/api', () => ({ getBalanceConfig: mockGetBalanceConfig, getTransactionsConfig: mockGetTransactionsConfig, recordCollectedUsage: mockRecordCollectedUsage, + extractManualSkills: jest.fn().mockReturnValue(undefined), createToolExecuteHandler: jest.fn().mockReturnValue({ handle: jest.fn() }), // Responses API writeDone: jest.fn(), diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index 5099606801bc..40673e163c05 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -28,6 +28,9 @@ const { filterMalformedContentParts, countFormattedMessageTokens, hydrateMissingIndexTokenCounts, + injectManualSkillPrimes, + isSkillPrimeMessage, + buildSkillPrimeContentParts, } = require('@librechat/api'); const { Callback, @@ -603,18 +606,27 @@ class AgentClient extends BaseClient { const memoryConfig = appConfig.memory; const messageWindowSize = memoryConfig?.messageWindowSize ?? 5; - let messagesToProcess = [...messages]; - if (messages.length > messageWindowSize) { - for (let i = messages.length - messageWindowSize; i >= 0; i--) { - const potentialWindow = messages.slice(i, i + messageWindowSize); + /** + * Strip skill-primed meta messages before memory extraction. The primes + * sit next to the latest user message and carry large SKILL.md bodies, + * so letting them into the window would crowd out real chat turns and + * pollute extracted memories with synthetic instruction content the + * user never typed. + */ + const chatMessages = messages.filter((m) => !isSkillPrimeMessage(m)); + + let messagesToProcess = [...chatMessages]; + if (chatMessages.length > messageWindowSize) { + for (let i = chatMessages.length - messageWindowSize; i >= 0; i--) { + const potentialWindow = chatMessages.slice(i, i + messageWindowSize); if (potentialWindow[0]?.role === 'user') { messagesToProcess = [...potentialWindow]; break; } } - if (messagesToProcess.length === messages.length) { - messagesToProcess = [...messages.slice(-messageWindowSize)]; + if (messagesToProcess.length === chatMessages.length) { + messagesToProcess = [...chatMessages.slice(-messageWindowSize)]; } } @@ -759,6 +771,32 @@ class AgentClient extends BaseClient { `[AgentClient] Boundary token adjustment: ${boundaryTokenAdjustment.original} → ${boundaryTokenAdjustment.adjusted} (${boundaryTokenAdjustment.remainingChars}/${boundaryTokenAdjustment.totalChars} chars)`, ); } + + /** + * Phase 3 manual skill priming — injected by user via `$` popover. + * + * Splice + index-shift logic lives in `injectManualSkillPrimes` + * (packages/api/src/agents/skills.ts) so the delicate position math + * can be unit-tested in TS without standing up AgentClient. Runs for + * both single-agent and multi-agent runs; how primes interact with + * handoff / added-convo agents' per-agent state is an agents-SDK + * concern, not this layer's to gate. + */ + const manualSkillPrimes = this.options.agent?.manualSkillPrimes; + if (manualSkillPrimes && manualSkillPrimes.length > 0) { + const primeResult = injectManualSkillPrimes({ + initialMessages, + indexTokenCountMap, + manualSkillPrimes, + }); + indexTokenCountMap = primeResult.indexTokenCountMap; + if (primeResult.inserted > 0) { + logger.debug( + `[AgentClient] Primed ${primeResult.inserted} manual skill(s) at message index ${primeResult.insertIdx}: ${manualSkillPrimes.map((p) => p.name).join(', ')}`, + ); + } + } + if (indexTokenCountMap && isEnabled(process.env.AGENT_DEBUG_LOGGING)) { const entries = Object.entries(indexTokenCountMap); const perMsg = entries.map(([idx, count]) => { @@ -875,6 +913,31 @@ class AgentClient extends BaseClient { const hideSequentialOutputs = config.configurable.hide_sequential_outputs; await runAgents(initialMessages); + /** + * Surface a completed `skill` tool_call content part per manually- + * invoked skill so the existing `SkillCall` frontend renderer shows + * a "Skill X loaded" card on the assistant response. Applied after + * the graph finishes to avoid clashing with the aggregator's own + * per-step content indexing. Prepended (not appended) so cards sit + * above the model's output — priming ran before the turn, the + * reply follows. + * + * Live streaming display of cards is handled on the user side via + * `ManualSkillPills` reading the message's `manualSkills` field; + * no separate SSE emit is needed here, and trying to stream a + * mid-run tool_call at index 0 collided with the LLM's first text + * content, while emitting at a sparse offset pushed the card below + * the reply on finalize. Post-run unshift keeps the final + * responseMessage.content in the right order. + */ + const primedSkills = this.options.agent?.manualSkillPrimes; + if (primedSkills && primedSkills.length > 0) { + const primeParts = buildSkillPrimeContentParts(primedSkills, { + runId: this.responseMessageId ?? 'manual-skill', + }); + this.contentParts.unshift(...primeParts); + } + /** @deprecated Agent Chain */ if (hideSequentialOutputs) { this.contentParts = this.contentParts.filter((part, index) => { diff --git a/api/server/controllers/agents/openai.js b/api/server/controllers/agents/openai.js index a7d982d28744..a5553af5a937 100644 --- a/api/server/controllers/agents/openai.js +++ b/api/server/controllers/agents/openai.js @@ -19,6 +19,7 @@ const { validateRequest, initializeAgent, getBalanceConfig, + extractManualSkills, createErrorResponse, recordCollectedUsage, getTransactionsConfig, @@ -27,6 +28,7 @@ const { buildNonStreamingResponse, createOpenAIStreamTracker, createOpenAIContentAggregator, + injectManualSkillPrimes, isChatCompletionValidationFailure, } = require('@librechat/api'); const { @@ -237,6 +239,8 @@ const OpenAIChatCompletionController = async (req, res) => { accessibleSkillIds, }); + const manualSkills = extractManualSkills(req.body); + const primaryConfig = await initializeAgent( { req, @@ -256,6 +260,7 @@ const OpenAIChatCompletionController = async (req, res) => { codeEnvAvailable: enabledCapabilities.has(AgentCapabilities.execute_code), skillStates, defaultActiveOnShare, + manualSkills, }, { getConvoFiles: db.getConvoFiles, @@ -268,6 +273,7 @@ const OpenAIChatCompletionController = async (req, res) => { getToolFilesByIds: db.getToolFilesByIds, getCodeGeneratedFiles: db.getCodeGeneratedFiles, listSkillsByAccess: db.listSkillsByAccess, + getSkillByName: db.getSkillByName, }, ); @@ -331,11 +337,26 @@ const OpenAIChatCompletionController = async (req, res) => { const openaiMessages = convertMessages(request.messages); const toolSet = buildToolSet(primaryConfig); - const { - messages: formattedMessages, - indexTokenCountMap, - summary: initialSummary, - } = formatAgentMessages(openaiMessages, {}, toolSet); + const formatted = formatAgentMessages(openaiMessages, {}, toolSet); + const formattedMessages = formatted.messages; + const initialSummary = formatted.summary; + let indexTokenCountMap = formatted.indexTokenCountMap; + + /** + * Inject manual skill primes so the model sees SKILL.md bodies for this + * turn — parity with AgentClient's chat path. OpenAI-compatible streaming + * uses its own tracker/aggregator shape, so the LibreChat-style card SSE + * events don't apply here; only the message-context part carries over. + */ + const manualSkillPrimes = primaryConfig.manualSkillPrimes; + if (manualSkillPrimes && manualSkillPrimes.length > 0) { + const primeResult = injectManualSkillPrimes({ + initialMessages: formattedMessages, + indexTokenCountMap, + manualSkillPrimes, + }); + indexTokenCountMap = primeResult.indexTokenCountMap; + } /** * Create a simple handler that processes data diff --git a/api/server/controllers/agents/responses.js b/api/server/controllers/agents/responses.js index 887e0fd79901..5703a5817625 100644 --- a/api/server/controllers/agents/responses.js +++ b/api/server/controllers/agents/responses.js @@ -18,6 +18,8 @@ const { getBalanceConfig, recordCollectedUsage, getTransactionsConfig, + extractManualSkills, + injectManualSkillPrimes, createToolExecuteHandler, // Responses API writeDone, @@ -377,6 +379,8 @@ const createResponse = async (req, res) => { accessibleSkillIds, }); + const manualSkills = extractManualSkills(req.body); + const primaryConfig = await initializeAgent( { req, @@ -396,6 +400,7 @@ const createResponse = async (req, res) => { codeEnvAvailable: enabledCapabilities.has(AgentCapabilities.execute_code), skillStates, defaultActiveOnShare, + manualSkills, }, { getConvoFiles: db.getConvoFiles, @@ -408,6 +413,7 @@ const createResponse = async (req, res) => { getToolFilesByIds: db.getToolFilesByIds, getCodeGeneratedFiles: db.getCodeGeneratedFiles, listSkillsByAccess: db.listSkillsByAccess, + getSkillByName: db.getSkillByName, }, ); @@ -431,11 +437,26 @@ const createResponse = async (req, res) => { const allMessages = [...previousMessages, ...inputMessages]; const toolSet = buildToolSet(primaryConfig); - const { - messages: formattedMessages, - indexTokenCountMap, - summary: initialSummary, - } = formatAgentMessages(allMessages, {}, toolSet); + const formatted = formatAgentMessages(allMessages, {}, toolSet); + const formattedMessages = formatted.messages; + const initialSummary = formatted.summary; + let indexTokenCountMap = formatted.indexTokenCountMap; + + /** + * Inject manual skill primes so the model sees SKILL.md bodies for this + * turn — parity with AgentClient's chat path. The Responses API uses its + * own response-builder shape, so LibreChat-style card SSE events don't + * apply; only the message-context part carries over. + */ + const manualSkillPrimes = primaryConfig.manualSkillPrimes; + if (manualSkillPrimes && manualSkillPrimes.length > 0) { + const primeResult = injectManualSkillPrimes({ + initialMessages: formattedMessages, + indexTokenCountMap, + manualSkillPrimes, + }); + indexTokenCountMap = primeResult.indexTokenCountMap; + } // Create tracker for streaming or aggregator for non-streaming const tracker = actuallyStreaming ? createResponseTracker() : null; diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js index c28c16b313e7..49913f98e3dd 100644 --- a/api/server/services/Endpoints/agents/initialize.js +++ b/api/server/services/Endpoints/agents/initialize.js @@ -8,6 +8,7 @@ const { validateAgentModel, createEdgeCollector, filterOrphanedEdges, + extractManualSkills, GenerationJobManager, getCustomEndpointConfig, createSequentialChainEdges, @@ -237,6 +238,15 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { const conversationId = req.body.conversationId; /** @type {string | undefined} */ const parentMessageId = req.body.parentMessageId; + /** + * Skill names the user invoked via the `$` popover for this turn. Only flows + * to the primary agent — handoff agents are follow-up turns that don't see + * the user's per-submission `$` selections. `extractManualSkills` also + * drops non-string / empty elements so a crafted payload can't reach the + * `getSkillByName` DB query with nonsense values. + * @type {string[] | undefined} + */ + const manualSkills = extractManualSkills(req.body); const primaryConfig = await initializeAgent( { @@ -257,6 +267,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { codeEnvAvailable: enabledCapabilities.has(AgentCapabilities.execute_code), skillStates, defaultActiveOnShare, + manualSkills, }, { getFiles: db.getFiles, @@ -270,6 +281,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { getCodeGeneratedFiles: db.getCodeGeneratedFiles, filterFilesByAgentAccess, listSkillsByAccess: db.listSkillsByAccess, + getSkillByName: db.getSkillByName, }, ); @@ -359,6 +371,7 @@ const initializeClient = async ({ req, res, signal, endpointOption }) => { getCodeGeneratedFiles: db.getCodeGeneratedFiles, filterFilesByAgentAccess, listSkillsByAccess: db.listSkillsByAccess, + getSkillByName: db.getSkillByName, }, ); diff --git a/client/src/common/types.ts b/client/src/common/types.ts index 6ca408685f77..1db8ba3b3e75 100644 --- a/client/src/common/types.ts +++ b/client/src/common/types.ts @@ -349,6 +349,14 @@ export type TOptions = { isResubmission?: boolean; /** Currently only utilized when `isResubmission === true`, uses that message's currently attached files */ overrideFiles?: t.TMessage['files']; + /** + * Carry forward a user message's manually-invoked skills when the caller + * is resubmitting / regenerating that same message — the compose-time + * atom has already been drained on the original submit, so without this + * the second turn would run without any manual priming even though the + * pills are still visible on the user bubble. + */ + overrideManualSkills?: string[]; /** Added conversation for multi-convo feature - sent to server as part of submission payload */ addedConvo?: t.TConversation; }; diff --git a/client/src/components/Chat/Input/ChatForm.tsx b/client/src/components/Chat/Input/ChatForm.tsx index 3f62b716b458..a4f4f062f551 100644 --- a/client/src/components/Chat/Input/ChatForm.tsx +++ b/client/src/components/Chat/Input/ChatForm.tsx @@ -26,6 +26,7 @@ import AttachFileChat from './Files/AttachFileChat'; import FileFormChat from './Files/FileFormChat'; import { cn, removeFocusRings } from '~/utils'; import TextareaHeader from './TextareaHeader'; +import PendingManualSkillsChips from './PendingManualSkillsChips'; import SkillsCommand from './SkillsCommand'; import PromptsCommand from './PromptsCommand'; import AudioRecorder from './AudioRecorder'; @@ -272,6 +273,7 @@ const ChatForm = memo(function ChatForm({ )} > + {/* WIP */} { + setSkills((prev) => prev.filter((s) => s !== name)); + }, + [setSkills], + ); + + if (skills.length === 0) { + return null; + } + + return ( +
+ {skills.map((name) => ( + + + ))} +
+ ); +} + +export default memo(PendingManualSkillsChips); diff --git a/client/src/components/Chat/Input/SkillsCommand.tsx b/client/src/components/Chat/Input/SkillsCommand.tsx index ab768ab8bdb9..88b94edb51a0 100644 --- a/client/src/components/Chat/Input/SkillsCommand.tsx +++ b/client/src/components/Chat/Input/SkillsCommand.tsx @@ -214,30 +214,18 @@ function SkillsCommandContent({ return { ...(prev || {}), skills: true }; }); - /* Structured channel for manual skill invocations. The follow-up PR - will read this in the submit pipeline and prime the corresponding - SKILL.md as a meta user message before the LLM turn, mirroring - Claude Code's `/skill` deterministic injection. The textual - `$skill-name ` insertion below remains as user-visible confirmation - and is treated as cosmetic by that future pipeline. */ + /* Structured channel for manual skill invocations. The submit + pipeline reads this and primes SKILL.md as a meta user message + before the LLM turn — no textarea-level marker is needed, and + injecting `$skill-name ` as text would mislead users into thinking + free-form text invocation is supported. Visual confirmation after + submit comes from `ManualSkillPills` on the user message bubble + until the live skill-card stream takes over. */ setPendingManualSkills((prev) => prev.includes(mention.value) ? prev : [...prev, mention.value], ); - const textarea = textAreaRef.current; - if (textarea) { - const insertion = `$${mention.value} `; - const nativeInputValueSetter = Object.getOwnPropertyDescriptor( - HTMLTextAreaElement.prototype, - 'value', - )?.set; - if (nativeInputValueSetter) { - nativeInputValueSetter.call(textarea, insertion); - textarea.dispatchEvent(new Event('input', { bubbles: true })); - } - textarea.focus(); - textarea.setSelectionRange(insertion.length, insertion.length); - } + textAreaRef.current?.focus(); }, [ setSearchValue, diff --git a/client/src/components/Chat/Input/__tests__/PendingManualSkillsChips.spec.tsx b/client/src/components/Chat/Input/__tests__/PendingManualSkillsChips.spec.tsx new file mode 100644 index 000000000000..a8eb7984f4ad --- /dev/null +++ b/client/src/components/Chat/Input/__tests__/PendingManualSkillsChips.spec.tsx @@ -0,0 +1,59 @@ +import React from 'react'; +import { RecoilRoot, MutableSnapshot } from 'recoil'; +import { render, screen } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; + +jest.mock('~/hooks', () => ({ + useLocalize: () => (key: string, params?: Record) => + `${key}:${params?.[0] ?? ''}`, +})); + +import PendingManualSkillsChips from '../PendingManualSkillsChips'; +import store from '~/store'; + +const CONVO_ID = 'convo-1'; + +const renderWithSkills = (initialSkills: string[]) => { + const initializeState = (snapshot: MutableSnapshot) => { + snapshot.set(store.pendingManualSkillsByConvoId(CONVO_ID), initialSkills); + }; + return render( + + + , + ); +}; + +describe('PendingManualSkillsChips', () => { + it('renders nothing when no skills are queued', () => { + const { container } = renderWithSkills([]); + expect(container.firstChild).toBeNull(); + }); + + it('renders one chip per queued skill', () => { + renderWithSkills(['brand-guidelines', 'pptx']); + const items = screen.getAllByRole('listitem'); + expect(items).toHaveLength(2); + expect(items[0]).toHaveTextContent('brand-guidelines'); + expect(items[1]).toHaveTextContent('pptx'); + }); + + it('removes the chip when its × button is clicked', async () => { + const user = userEvent.setup(); + renderWithSkills(['brand-guidelines', 'pptx']); + const removeBrand = screen.getByRole('button', { + name: 'com_ui_remove_skill_var:brand-guidelines', + }); + await user.click(removeBrand); + const items = screen.getAllByRole('listitem'); + expect(items).toHaveLength(1); + expect(items[0]).toHaveTextContent('pptx'); + }); + + it('clears the list when every chip is dismissed', async () => { + const user = userEvent.setup(); + const { container } = renderWithSkills(['a']); + await user.click(screen.getByRole('button', { name: 'com_ui_remove_skill_var:a' })); + expect(container.firstChild).toBeNull(); + }); +}); diff --git a/client/src/components/Chat/Input/__tests__/SkillsCommand.spec.tsx b/client/src/components/Chat/Input/__tests__/SkillsCommand.spec.tsx index 3366779cb85b..18ac124f7733 100644 --- a/client/src/components/Chat/Input/__tests__/SkillsCommand.spec.tsx +++ b/client/src/components/Chat/Input/__tests__/SkillsCommand.spec.tsx @@ -190,7 +190,7 @@ describe('SkillsCommand', () => { expect(container).toBeEmptyDOMElement(); }); - it('selecting a skill pushes to pendingManualSkillsByConvoId, flips ephemeralAgent.skills, inserts $name into textarea, and closes the popover', async () => { + it('selecting a skill pushes to pendingManualSkillsByConvoId, flips ephemeralAgent.skills, strips the $ trigger from the textarea, and closes the popover', async () => { const user = userEvent.setup(); const textAreaRef = makeTextarea('$'); render(); @@ -200,8 +200,8 @@ describe('SkillsCommand', () => { await user.click(skillButton); }); - /* Structured channel: the skill name is pushed into the per-convo atom, - which is the contract the follow-up PR depends on. */ + /* Structured channel: the skill name is pushed into the per-convo atom + and drained by `useChatFunctions.ask` on submission. */ expect(mockSetPendingManualSkills).toHaveBeenCalledTimes(1); const updater = mockSetPendingManualSkills.mock.calls[0][0] as (prev: string[]) => string[]; expect(updater([])).toEqual(['brand-guidelines']); @@ -216,8 +216,11 @@ describe('SkillsCommand', () => { expect(agentUpdater(null)).toEqual({ skills: true }); expect(agentUpdater({ skills: true })).toEqual({ skills: true }); - /* Cosmetic textarea insertion remains in place for user feedback. */ - expect(textAreaRef.current?.value).toBe('$brand-guidelines '); + /* Textarea is cleared of the `$` trigger but no `$skill-name ` cue is + inserted — visual confirmation is the `ManualSkillPills` row that + renders on the submitted user message, and injecting text would + mislead users into thinking free-form `$name` invocation works. */ + expect(textAreaRef.current?.value).toBe(''); /* Popover dismisses on selection. */ expect(mockSetShowSkillsPopover).toHaveBeenCalledWith(false); diff --git a/client/src/components/Chat/Messages/Content/Container.tsx b/client/src/components/Chat/Messages/Content/Container.tsx index 7e208f3bd1ba..6a34016c48da 100644 --- a/client/src/components/Chat/Messages/Content/Container.tsx +++ b/client/src/components/Chat/Messages/Content/Container.tsx @@ -1,5 +1,6 @@ import { TMessage } from 'librechat-data-provider'; import Files from './Files'; +import ManualSkillPills from './ManualSkillPills'; const Container = ({ children, message }: { children: React.ReactNode; message?: TMessage }) => (
{message?.isCreatedByUser === true && } + {message?.isCreatedByUser === true && } {children}
); diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx index 65ebc669083f..0380e77f80b3 100644 --- a/client/src/components/Chat/Messages/Content/ContentParts.tsx +++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx @@ -10,6 +10,7 @@ import { ParallelContentRenderer, type PartWithIndex } from './ParallelContent'; import { mapAttachments, groupSequentialToolCalls } from '~/utils'; import { MessageContext, SearchContext } from '~/Providers'; import { EditTextPart, EmptyText } from './Parts'; +import PendingSkillCall from './Parts/PendingSkillCall'; import MemoryArtifacts from './MemoryArtifacts'; import ToolCallGroup from './ToolCallGroup'; import Container from './Container'; @@ -73,6 +74,15 @@ const PartWithContext = memo(function PartWithContext({ type ContentPartsProps = { content: Array | undefined; messageId: string; + /** + * Skill names the user invoked manually via the `$` popover on this turn. + * `createdHandler` seeds this on the assistant placeholder from + * `submission.manualSkills`, and `finalHandler`'s server-backed + * `responseMessage` replacement drops it — so the field is naturally + * present only for the lifetime of the stream. Scalar string array (not + * the full message object) so `React.memo` stays shallow-happy. + */ + manualSkills?: string[]; conversationId?: string | null; attachments?: TAttachment[]; searchResults?: { [key: string]: SearchResultData }; @@ -99,6 +109,7 @@ const ContentParts = memo(function ContentParts({ edit, isLast, content, + manualSkills, messageId, enterEdit, siblingIdx, @@ -113,6 +124,63 @@ const ContentParts = memo(function ContentParts({ const attachmentMap = useMemo(() => mapAttachments(attachments ?? []), [attachments]); const effectiveIsSubmitting = isLatestMessage ? isSubmitting : false; + /** + * Interim skill cards — rendered in a separate slot ABOVE the Parts + * iteration based purely on the `manualSkills` message field. `content` + * is only read to determine the "Running → Ran" visual transition + * (`hasRealContent`), never to gate visibility, so backend deltas / + * optimistic emissions can't race the pending cards off the screen. + * + * Lifecycle: + * - `useChatFunctions` seeds `manualSkills` on the assistant placeholder + * at construction → cards appear immediately on submit, with the + * shimmering "Running X" state (no content yet). + * - Through the stream, `useStepHandler` spreads the response on every + * update so `manualSkills` rides along; once the first real content + * part lands, `hasRealContent` flips true and the cards switch to + * the static "Ran X" state — matching what users see for + * model-invoked skills as they finish priming. + * - At finalize, `finalHandler` replaces the message with the server + * response (no `manualSkills` field) → interim cards disappear and + * the real `skill` tool_call part in `content` takes over. + * + * Skipped on the user side (they get `ManualSkillPills` on the user + * bubble) and when no skills were invoked on this turn. + */ + const pendingSkills = useMemo( + () => (!isCreatedByUser && manualSkills != null ? manualSkills : []), + [isCreatedByUser, manualSkills], + ); + const hasPendingSkills = pendingSkills.length > 0; + + /** + * True once the assistant has started streaming something meaningful — + * any non-text part, OR a text part with non-empty content. Drives the + * "Running X → Ran X" transition on pending cards. An empty-text + * placeholder (some endpoints seed one in `initialResponse.content` on + * assistant-side) does NOT count as real content, to avoid flipping + * the transition before the model has actually produced anything. + */ + const hasRealContent = useMemo( + () => + (content ?? []).some((part) => { + if (part == null) { + return false; + } + if (part.type !== ContentTypes.TEXT) { + return true; + } + const text = typeof part.text === 'string' ? part.text : (part.text?.value ?? ''); + return text.length > 0; + }), + [content], + ); + + const renderPendingSkills = () => + pendingSkills.map((name) => ( + + )); + const renderPart = useCallback( (part: TMessageContentParts, idx: number, isLastPart: boolean) => { const toolCallId = (part?.[ContentTypes.TOOL_CALL] as Agents.ToolCall | undefined)?.id ?? ''; @@ -145,16 +213,17 @@ const ContentParts = memo(function ContentParts({ ], ); - // Early return: no content - if (!content) { + // Early return: no content to render AND no pending skill cards + if (!content && !hasPendingSkills) { return null; } - // Edit mode: render editable text parts + // Edit mode: render editable text parts. Interim skill cards are a + // mid-stream concern, not relevant in edit mode. if (edit === true && enterEdit && setSiblingIdx) { return ( <> - {content.map((part, idx) => { + {(content ?? []).map((part, idx) => { if (!part) { return null; } @@ -190,28 +259,32 @@ const ContentParts = memo(function ContentParts({ ); } - const showEmptyCursor = content.length === 0 && effectiveIsSubmitting; - const lastContentIdx = content.length - 1; + const safeContent = content ?? []; + const showEmptyCursor = safeContent.length === 0 && effectiveIsSubmitting; + const lastContentIdx = safeContent.length - 1; // Parallel content: use dedicated renderer with columns (TMessageContentParts includes ContentMetadata) - const hasParallelContent = content.some((part) => part?.groupId != null); + const hasParallelContent = safeContent.some((part) => part?.groupId != null); if (hasParallelContent) { return ( - + <> + {renderPendingSkills()} + + ); } // Sequential content: render parts in order (90% of cases) const sequentialParts: PartWithIndex[] = []; - content.forEach((part, idx) => { + safeContent.forEach((part, idx) => { if (part) { sequentialParts.push({ part, idx }); } @@ -221,6 +294,7 @@ const ContentParts = memo(function ContentParts({ return ( + {renderPendingSkills()} {showEmptyCursor && ( diff --git a/client/src/components/Chat/Messages/Content/EditMessage.tsx b/client/src/components/Chat/Messages/Content/EditMessage.tsx index 0b4a15f7cbf8..53fa4f880091 100644 --- a/client/src/components/Chat/Messages/Content/EditMessage.tsx +++ b/client/src/components/Chat/Messages/Content/EditMessage.tsx @@ -61,6 +61,10 @@ const EditMessage = ({ }, { overrideFiles: message.files, + /** Pills on the edited user message stay visible after save-and-submit; + * carry the picks forward so the new turn primes the same skills + * instead of running unprimed. */ + overrideManualSkills: message.manualSkills, addedConvo: getAddedConvo() || undefined, }, ); @@ -80,6 +84,10 @@ const EditMessage = ({ editedMessageId: messageId, isRegenerate: true, isEdited: true, + /** Edit-assistant-response flow replays the parent user turn; keep + * the same manual skills so the regenerated response is primed + * identically. */ + overrideManualSkills: parentMessage.manualSkills, addedConvo: getAddedConvo() || undefined, }, ); diff --git a/client/src/components/Chat/Messages/Content/ManualSkillPills.tsx b/client/src/components/Chat/Messages/Content/ManualSkillPills.tsx new file mode 100644 index 000000000000..de5fcd6322e1 --- /dev/null +++ b/client/src/components/Chat/Messages/Content/ManualSkillPills.tsx @@ -0,0 +1,42 @@ +import { memo } from 'react'; +import { ScrollText } from 'lucide-react'; +import { useLocalize } from '~/hooks'; + +/** + * Compact pill row rendered on a submitted user message, one chip per skill + * the user invoked via the `$` popover. Presentational component — takes + * only the scalar `skills` array, no full message object (keeps + * `React.memo` comparisons on parent wrappers shallow and cheap). + * + * Backend persists the source field (`message.manualSkills`), so callers + * reading from the message pass `skills={message.manualSkills}` and pills + * survive page reloads / history renders. + */ +function ManualSkillPills({ skills }: { skills?: string[] }) { + const localize = useLocalize(); + + if (!skills || skills.length === 0) { + return null; + } + + return ( +
+ {skills.map((name) => ( + + + ))} +
+ ); +} + +export default memo(ManualSkillPills); diff --git a/client/src/components/Chat/Messages/Content/ParallelContent.tsx b/client/src/components/Chat/Messages/Content/ParallelContent.tsx index b66720cad752..2c56bc04e3e4 100644 --- a/client/src/components/Chat/Messages/Content/ParallelContent.tsx +++ b/client/src/components/Chat/Messages/Content/ParallelContent.tsx @@ -191,7 +191,7 @@ export const ParallelColumns = memo(function ParallelColumns({ }); type ParallelContentRendererProps = { - content: Array; + content?: Array; messageId: string; conversationId?: string | null; attachments?: TAttachment[]; @@ -218,7 +218,7 @@ export const ParallelContentRenderer = memo(function ParallelContentRenderer({ [content], ); - const lastContentIdx = content.length - 1; + const lastContentIdx = (content?.length ?? 0) - 1; // Split sequential parts into before/after parallel sections const { before, after } = useMemo(() => { diff --git a/client/src/components/Chat/Messages/Content/Parts/PendingSkillCall.tsx b/client/src/components/Chat/Messages/Content/Parts/PendingSkillCall.tsx new file mode 100644 index 000000000000..15b0d1f2a771 --- /dev/null +++ b/client/src/components/Chat/Messages/Content/Parts/PendingSkillCall.tsx @@ -0,0 +1,53 @@ +import { ScrollText } from 'lucide-react'; +import { useLocalize } from '~/hooks'; +import { cn } from '~/utils'; + +/** + * Minimal variant of `SkillCall` used for the interim window between + * submit and the backend's real `skill` tool_call content part landing + * on the message. + * + * Strips the expand/collapse affordance that `SkillCall` inherits from + * `ProgressText` — there's nothing to expand during priming (empty + * output), and leaving the chevron + cursor-pointer on a non-functional + * button is misleading. Same ScrollText icon, same shimmer, same + * localized label strings (`com_ui_skill_running` / `com_ui_skill_finished`) + * so the visual language matches the real card it'll be replaced by. + * + * `loaded=false` → "Running X" with pulsing icon + shimmer. + * `loaded=true` → "Ran X" with static icon; driven by + * `ContentParts.hasRealContent`, which flips true as + * soon as any streamed content part arrives, matching + * the transition users see for model-invoked skills. + */ +export default function PendingSkillCall({ + skillName, + loaded, +}: { + skillName: string; + loaded: boolean; +}) { + const localize = useLocalize(); + const text = loaded + ? localize('com_ui_skill_finished', { 0: skillName }) + : localize('com_ui_skill_running', { 0: skillName }); + + return ( +
+
+
+
+
+
+
+
+ ); +} diff --git a/client/src/components/Chat/Messages/Content/__tests__/ContentParts.test.tsx b/client/src/components/Chat/Messages/Content/__tests__/ContentParts.test.tsx new file mode 100644 index 000000000000..55312cb8d531 --- /dev/null +++ b/client/src/components/Chat/Messages/Content/__tests__/ContentParts.test.tsx @@ -0,0 +1,140 @@ +import React from 'react'; +import { render, screen } from '@testing-library/react'; +import { ContentTypes } from 'librechat-data-provider'; +import type { TMessageContentParts } from 'librechat-data-provider'; + +jest.mock('~/utils', () => ({ + mapAttachments: () => ({}), + groupSequentialToolCalls: (parts: Array<{ part: unknown; idx: number }>) => + parts.map((p) => ({ type: 'single' as const, part: p })), +})); + +jest.mock('~/Providers', () => ({ + MessageContext: { + Provider: ({ children }: { children: React.ReactNode }) => <>{children}, + }, + SearchContext: { + Provider: ({ children }: { children: React.ReactNode }) => <>{children}, + }, +})); + +jest.mock('../Parts', () => ({ + EditTextPart: () =>
, + EmptyText: () =>
, +})); + +jest.mock('../MemoryArtifacts', () => ({ + __esModule: true, + default: () =>
, +})); + +jest.mock('../Parts/PendingSkillCall', () => ({ + __esModule: true, + default: ({ skillName, loaded }: { skillName: string; loaded: boolean }) => ( +
+ ), +})); + +jest.mock('../ToolCallGroup', () => ({ + __esModule: true, + default: () =>
, +})); + +jest.mock('../Container', () => ({ + __esModule: true, + default: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), +})); + +jest.mock('../Part', () => ({ + __esModule: true, + default: ({ part }: { part: TMessageContentParts }) => ( +
+ ), +})); + +jest.mock('../ParallelContent', () => ({ + ParallelContentRenderer: () =>
, +})); + +import ContentParts from '../ContentParts'; + +const baseProps = { + messageId: 'msg-1', + isLast: false, + isSubmitting: false, + isLatestMessage: false, + isCreatedByUser: false, + content: [], +}; + +describe('ContentParts — interim skill cards', () => { + it('renders a PendingSkillCall per manual skill on assistant messages', () => { + render(); + const cards = screen.getAllByTestId('pending-skill-call'); + expect(cards).toHaveLength(2); + expect(cards[0]).toHaveAttribute('data-skill', 'brand-guidelines'); + expect(cards[1]).toHaveAttribute('data-skill', 'pptx'); + }); + + it('starts pending skill cards in the not-loaded state (no real content yet)', () => { + render(); + expect(screen.getByTestId('pending-skill-call')).toHaveAttribute('data-loaded', 'false'); + }); + + it('flips pending cards to loaded once any real content part arrives', () => { + const content: TMessageContentParts[] = [ + { type: ContentTypes.TEXT, text: 'streamed' } as unknown as TMessageContentParts, + ]; + render(); + expect(screen.getByTestId('pending-skill-call')).toHaveAttribute('data-loaded', 'true'); + }); + + it('does NOT render skill cards on user messages', () => { + render(); + expect(screen.queryByTestId('pending-skill-call')).toBeNull(); + }); + + it('renders nothing when manualSkills is empty and content is undefined', () => { + const { container } = render( + , + ); + expect(container.firstChild).toBeNull(); + }); + + it('renders pending skill cards even when content is undefined', () => { + render(); + expect(screen.getAllByTestId('pending-skill-call')).toHaveLength(1); + }); + + it('renders pending skill cards above parallel content', () => { + const parallelContent: TMessageContentParts[] = [ + { + type: ContentTypes.TEXT, + text: 'parallel', + groupId: 'group-1', + } as unknown as TMessageContentParts, + ]; + render(); + const skillCard = screen.getByTestId('pending-skill-call'); + const parallelRenderer = screen.getByTestId('parallel-renderer'); + expect(skillCard).toBeTruthy(); + expect(parallelRenderer).toBeTruthy(); + expect(skillCard.compareDocumentPosition(parallelRenderer)).toBe( + Node.DOCUMENT_POSITION_FOLLOWING, + ); + }); + + it('renders pending skill cards above sequential content', () => { + const sequentialContent: TMessageContentParts[] = [ + { type: ContentTypes.TEXT, text: 'streamed' } as unknown as TMessageContentParts, + ]; + render(); + const skillCard = screen.getByTestId('pending-skill-call'); + const textPart = screen.getByTestId(`real-part-${ContentTypes.TEXT}`); + expect(skillCard).toBeTruthy(); + expect(textPart).toBeTruthy(); + expect(skillCard.compareDocumentPosition(textPart)).toBe(Node.DOCUMENT_POSITION_FOLLOWING); + }); +}); diff --git a/client/src/components/Chat/Messages/Content/__tests__/ManualSkillPills.test.tsx b/client/src/components/Chat/Messages/Content/__tests__/ManualSkillPills.test.tsx new file mode 100644 index 000000000000..8066c507f3f9 --- /dev/null +++ b/client/src/components/Chat/Messages/Content/__tests__/ManualSkillPills.test.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import { render, screen } from '@testing-library/react'; + +jest.mock('~/hooks', () => ({ + useLocalize: () => (key: string, params?: Record) => + `${key}:${params?.[0] ?? ''}`, +})); + +import ManualSkillPills from '../ManualSkillPills'; + +describe('ManualSkillPills', () => { + it('renders nothing when skills is undefined', () => { + const { container } = render(); + expect(container.firstChild).toBeNull(); + }); + + it('renders nothing when skills is empty', () => { + const { container } = render(); + expect(container.firstChild).toBeNull(); + }); + + it('renders one pill per entry', () => { + render(); + const items = screen.getAllByRole('listitem'); + expect(items).toHaveLength(2); + expect(items[0]).toHaveTextContent('brand-guidelines'); + expect(items[1]).toHaveTextContent('pptx'); + }); + + it('localizes the list aria-label', () => { + render(); + expect(screen.getByRole('list')).toHaveAttribute('aria-label', 'com_ui_skills_manual_invoked:'); + }); +}); diff --git a/client/src/components/Chat/Messages/MessageParts.tsx b/client/src/components/Chat/Messages/MessageParts.tsx index 2d0e3d512bf7..ff6b01b2ddbd 100644 --- a/client/src/components/Chat/Messages/MessageParts.tsx +++ b/client/src/components/Chat/Messages/MessageParts.tsx @@ -141,6 +141,7 @@ export default function Message(props: TMessageProps) { attachments={attachments} isSubmitting={isSubmitting} searchResults={searchResults} + manualSkills={message.manualSkills} messageId={message.messageId} setSiblingIdx={setSiblingIdx} isCreatedByUser={message.isCreatedByUser} diff --git a/client/src/components/Messages/ContentRender.tsx b/client/src/components/Messages/ContentRender.tsx index 4ba8db36f8eb..bb7925cf1a1f 100644 --- a/client/src/components/Messages/ContentRender.tsx +++ b/client/src/components/Messages/ContentRender.tsx @@ -78,7 +78,8 @@ function areContentRenderPropsEqual(prev: ContentRenderProps, next: ContentRende prevMsg.endpoint === nextMsg.endpoint && prevMsg.iconURL === nextMsg.iconURL && prevMsg.feedback?.rating === nextMsg.feedback?.rating && - (prevMsg.attachments?.length ?? 0) === (nextMsg.attachments?.length ?? 0) + (prevMsg.attachments?.length ?? 0) === (nextMsg.attachments?.length ?? 0) && + (prevMsg.manualSkills?.length ?? 0) === (nextMsg.manualSkills?.length ?? 0) ); } @@ -216,6 +217,7 @@ const ContentRender = memo(function ContentRender({ messageId={msg.messageId} attachments={attachments} searchResults={searchResults} + manualSkills={msg.manualSkills} setSiblingIdx={setSiblingIdx} isLatestMessage={isLatestMessage} isSubmitting={isSubmitting} diff --git a/client/src/hooks/Chat/useChatFunctions.ts b/client/src/hooks/Chat/useChatFunctions.ts index 18aaf0daee33..34c6990b072b 100644 --- a/client/src/hooks/Chat/useChatFunctions.ts +++ b/client/src/hooks/Chat/useChatFunctions.ts @@ -2,7 +2,7 @@ import { v4 } from 'uuid'; import { cloneDeep } from 'lodash'; import { useNavigate } from 'react-router-dom'; import { useQueryClient } from '@tanstack/react-query'; -import { useSetRecoilState, useResetRecoilState, useRecoilValue } from 'recoil'; +import { useSetRecoilState, useResetRecoilState, useRecoilValue, useRecoilCallback } from 'recoil'; import { Constants, QueryKeys, @@ -76,6 +76,30 @@ export default function useChatFunctions({ const setShowStopButton = useSetRecoilState(store.showStopButtonByIndex(index)); const resetLatestMultiMessage = useResetRecoilState(store.latestMessageFamily(index + 1)); + /** + * Atomically read + reset the per-conversation queue of manually-invoked + * skills from the `$` popover. Reading and resetting in a single Recoil + * snapshot guarantees that if the user selects more skills between here and + * the next submission, their picks are never silently lost into a reset atom. + * + * The `hasValue` guard is defensive: this atom has a synchronous default of + * `[]` so `.contents` is always the resolved value in practice, but reading + * `.contents` on a loading/errored loadable yields a Promise/Error, which + * would make the `string[]` cast unsound. + */ + const drainPendingManualSkills = useRecoilCallback( + ({ snapshot, reset }) => + (convoId: string): string[] => { + const loadable = snapshot.getLoadable(store.pendingManualSkillsByConvoId(convoId)); + const skills = loadable.state === 'hasValue' ? (loadable.contents as string[]) : []; + if (skills.length > 0) { + reset(store.pendingManualSkillsByConvoId(convoId)); + } + return skills; + }, + [], + ); + const ask: TAskFunction = ( { text, @@ -93,6 +117,7 @@ export default function useChatFunctions({ isEdited = false, overrideMessages, overrideFiles, + overrideManualSkills, addedConvo, } = {}, ) => { @@ -124,6 +149,23 @@ export default function useChatFunctions({ } const ephemeralAgent = getEphemeralAgent(conversationId ?? Constants.NEW_CONVO); + /** + * Manual skill selection resolution: + * - Explicit `overrideManualSkills` wins (regenerate / save-and-submit + * pass the original user message's persisted `manualSkills` so the + * resubmitted turn primes the same skills — the pills are still + * visible to the user, it would be strange to quietly drop them). + * - Regenerate / continue / edit without an override → empty, and the + * compose-time atom is deliberately NOT drained (those flows replay + * a prior turn, not compose a new one). + * - Fresh submit → drain the per-convo atom into the message. + */ + const manualSkills = + overrideManualSkills != null + ? overrideManualSkills + : isRegenerate || isContinued || isEdited + ? [] + : drainPendingManualSkills(conversationId ?? Constants.NEW_CONVO); const isEditOrContinue = isEdited || isContinued; let currentMessages: TMessage[] | null = overrideMessages ?? getMessages() ?? []; @@ -215,6 +257,13 @@ export default function useChatFunctions({ messageId: isContinued && messageId != null && messageId ? messageId : intermediateId, thread_id, error: false, + /** + * UI-only metadata. Survives reload because the backend persists the + * field on the message schema, and `ManualSkillPills` reads straight + * off the message so there's no Recoil state to clean up. Runtime + * skill resolution reads the top-level `manualSkills` payload field. + */ + manualSkills: manualSkills.length > 0 ? manualSkills : undefined, }; const submissionFiles = overrideFiles ?? targetParentMessage?.files; @@ -261,6 +310,16 @@ export default function useChatFunctions({ model: convo?.model, error: false, iconURL, + /** + * Seed the assistant placeholder with the turn's manually-invoked + * skill names so `ContentParts` can render interim `SkillCall` cards + * from the very first render — no round-trip through the `created` + * SSE event required. Rides along with every subsequent spread + * (`useStepHandler` response construction, `updateContent` result + * spreads) and drops out naturally at `finalHandler` when the + * server-backed `responseMessage` replacement takes over. + */ + manualSkills: manualSkills.length > 0 ? manualSkills : undefined, }; if (isAssistantsEndpoint(endpoint)) { @@ -332,6 +391,7 @@ export default function useChatFunctions({ ephemeralAgent, editedContent, addedConvo, + manualSkills: manualSkills.length > 0 ? manualSkills : undefined, }; if (isRegenerate) { @@ -354,7 +414,16 @@ export default function useChatFunctions({ if (parentMessage && parentMessage.isCreatedByUser) { ask( { ...parentMessage }, - { isRegenerate: true, addedConvo: options?.addedConvo ?? undefined }, + { + isRegenerate: true, + addedConvo: options?.addedConvo ?? undefined, + /** Carry the original user message's manual skill picks forward + * so the regenerated response is primed with the same skills. + * The compose-time atom was drained on the first submit; without + * this the model sees an unprimed turn even though the pills + * still show on the user bubble. */ + overrideManualSkills: parentMessage.manualSkills, + }, ); } else { console.error( diff --git a/client/src/hooks/SSE/useEventHandlers.ts b/client/src/hooks/SSE/useEventHandlers.ts index 366775c4c132..2759fb052725 100644 --- a/client/src/hooks/SSE/useEventHandlers.ts +++ b/client/src/hooks/SSE/useEventHandlers.ts @@ -347,6 +347,16 @@ export default function useEventHandlers({ queryClient.invalidateQueries([QueryKeys.mcpConnectionStatus]); queryClient.invalidateQueries([QueryKeys.mcpTools]); const { messages, userMessage, isRegenerate = false, isTemporary = false } = submission; + /** + * The spread carries `manualSkills` through from + * `submission.initialResponse` — `useChatFunctions` seeds the field + * there at construction so the assistant placeholder already has it + * by the time this handler fires. Subsequent `useStepHandler` + * spreads and `updateContent` spreads preserve it, and + * `finalHandler`'s server-backed `responseMessage` replacement + * drops it, which is the right behavior: by finalize the real + * `skill` tool_call is in `content` and takes over rendering. + */ const initialResponse = { ...submission.initialResponse, parentMessageId: userMessage.messageId, diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 19b0d9c76e6f..63924100cd65 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1523,6 +1523,8 @@ "com_ui_skills_command_placeholder": "Select a Skill by name", "com_ui_skills_empty": "No skills yet", "com_ui_skills_load_error": "Failed to load skills", + "com_ui_skills_manual_invoked": "Manually invoked skills", + "com_ui_skills_queued": "Skills queued for next submission", "com_ui_sr_public_skill": "Public skill", "com_ui_special": "special", "com_ui_special_var_current_date": "Current Date", diff --git a/client/src/store/families.ts b/client/src/store/families.ts index f35bebc0d6d3..7e754e05ce4a 100644 --- a/client/src/store/families.ts +++ b/client/src/store/families.ts @@ -307,13 +307,12 @@ const showSkillsPopoverFamily = atomFamily({ /** * Per-conversation queue of skill names the user invoked manually via the - * `$` popover for the next submission. Acts as the structured channel - * paired with the cosmetic `$skill-name ` text inserted into the textarea. - * - * Phase 1: only the writer (SkillsCommand) is wired; the submit pipeline - * does not yet read or clear this atom. The follow-up PR will read this - * at `ask()` time, attach to the payload, and reset to `[]`. Until then - * the backend continues to receive only the textual `$name` reference. + * `$` popover for the next submission. Structured channel that the submit + * pipeline (`useChatFunctions.ask`) drains and pins onto the user message's + * `manualSkills` field (also echoed at the top of the payload for the + * runtime resolver), then resets to `[]`. Compose-time chips above the + * textarea read this atom directly so users see (and can dismiss) their + * current selection before hitting send. */ const pendingManualSkillsByConvoId = atomFamily({ key: 'pendingManualSkillsByConvoId', diff --git a/packages/api/src/agents/__tests__/initialize.test.ts b/packages/api/src/agents/__tests__/initialize.test.ts index be7377954ac9..f73e8953f288 100644 --- a/packages/api/src/agents/__tests__/initialize.test.ts +++ b/packages/api/src/agents/__tests__/initialize.test.ts @@ -422,3 +422,158 @@ describe('initializeAgent — maxContextTokens', () => { expect(result.maxContextTokens).toBe(1024); }); }); + +describe('initializeAgent — manual skill priming (Phase 3)', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + /** + * Minimal listSkillsByAccess that satisfies `injectSkillCatalog` so the + * manualSkills resolver branch runs. Returns an empty page — we don't care + * about the catalog here, only that `accessibleSkillIds` is non-empty so + * the manual-invocation block gets reached. + */ + const emptyListSkillsByAccess: InitializeAgentDbMethods['listSkillsByAccess'] = async () => ({ + skills: [], + has_more: false, + after: null, + }); + + it('attaches resolved manual skill primes to the initialized agent', async () => { + const { agent, req, res, loadTools, db } = createMocks(); + const { Types } = await import('mongoose'); + const skillId = new Types.ObjectId(); + /** + * Ownership-based active-state default only kicks in when + * `skill.author.toString() === userId`. The default mock user id is a + * literal string, not an ObjectId, so align the skill author with it so + * `resolveSkillActive` treats the skill as owned and active. + */ + const ownerAuthor = { + toString: () => req.user?.id, + } as unknown as import('mongoose').Types.ObjectId; + + const getSkillByName: InitializeAgentDbMethods['getSkillByName'] = jest.fn().mockResolvedValue({ + _id: skillId, + name: 'brand-guidelines', + body: '# Brand guidelines\nUse blue.', + author: ownerAuthor, + }); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + accessibleSkillIds: [skillId], + manualSkills: ['brand-guidelines'], + }, + { ...db, listSkillsByAccess: emptyListSkillsByAccess, getSkillByName }, + ); + + expect(result.manualSkillPrimes).toEqual([ + { name: 'brand-guidelines', body: '# Brand guidelines\nUse blue.' }, + ]); + expect(getSkillByName).toHaveBeenCalledWith('brand-guidelines', [skillId]); + }); + + it('leaves manualSkillPrimes undefined when no manualSkills are provided', async () => { + const { agent, req, res, loadTools, db } = createMocks(); + const { Types } = await import('mongoose'); + const skillId = new Types.ObjectId(); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + accessibleSkillIds: [skillId], + }, + { ...db, listSkillsByAccess: emptyListSkillsByAccess }, + ); + + expect(result.manualSkillPrimes).toBeUndefined(); + }); + + it('returns empty array when every manual skill is unresolvable (no primes, no throw)', async () => { + const { agent, req, res, loadTools, db } = createMocks(); + const { Types } = await import('mongoose'); + const skillId = new Types.ObjectId(); + + const getSkillByName: InitializeAgentDbMethods['getSkillByName'] = jest + .fn() + .mockResolvedValue(null); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + accessibleSkillIds: [skillId], + manualSkills: ['does-not-exist'], + }, + { ...db, listSkillsByAccess: emptyListSkillsByAccess, getSkillByName }, + ); + + expect(result.manualSkillPrimes).toEqual([]); + }); + + it('skips resolution entirely when accessibleSkillIds is empty (user has no skill access)', async () => { + const { agent, req, res, loadTools, db } = createMocks(); + const getSkillByName: InitializeAgentDbMethods['getSkillByName'] = jest.fn(); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + accessibleSkillIds: [], + manualSkills: ['anything'], + }, + { ...db, getSkillByName }, + ); + + expect(result.manualSkillPrimes).toBeUndefined(); + expect(getSkillByName).not.toHaveBeenCalled(); + }); + + it('silently no-ops when getSkillByName is not provided in db methods', async () => { + const { agent, req, res, loadTools, db } = createMocks(); + const { Types } = await import('mongoose'); + const skillId = new Types.ObjectId(); + + const result = await initializeAgent( + { + req, + res, + agent, + loadTools, + endpointOption: { endpoint: EModelEndpoint.agents }, + allowedProviders: new Set([Providers.OPENAI]), + isInitialAgent: true, + accessibleSkillIds: [skillId], + manualSkills: ['foo'], + }, + { ...db, listSkillsByAccess: emptyListSkillsByAccess }, + ); + + expect(result.manualSkillPrimes).toBeUndefined(); + }); +}); diff --git a/packages/api/src/agents/__tests__/skills.test.ts b/packages/api/src/agents/__tests__/skills.test.ts index 739502de31e8..6dfa10060465 100644 --- a/packages/api/src/agents/__tests__/skills.test.ts +++ b/packages/api/src/agents/__tests__/skills.test.ts @@ -27,7 +27,19 @@ jest.mock('@librechat/agents', () => ({ })); import { Types } from 'mongoose'; -import { scopeSkillIds, resolveSkillActive, injectSkillCatalog } from '../skills'; +import { HumanMessage, AIMessage } from '@langchain/core/messages'; +import { + scopeSkillIds, + resolveSkillActive, + injectSkillCatalog, + buildSkillPrimeMessage, + resolveManualSkills, + injectManualSkillPrimes, + extractManualSkills, + isSkillPrimeMessage, + buildSkillPrimeContentParts, + MAX_MANUAL_SKILLS, +} from '../skills'; import { extractInvokedSkillsFromPayload } from '../run'; type PageSkill = { @@ -559,3 +571,475 @@ describe('injectSkillCatalog', () => { expect(result.activeSkillIds).toEqual([]); }); }); + +describe('buildSkillPrimeMessage', () => { + it('produces a meta user message carrying SKILL.md body and skillName marker', () => { + const msg = buildSkillPrimeMessage({ + name: 'brand-guidelines', + body: '# Brand guidelines\nUse blue.', + }); + expect(msg).toEqual({ + role: 'user', + content: '# Brand guidelines\nUse blue.', + isMeta: true, + source: 'skill', + skillName: 'brand-guidelines', + }); + }); + + it('preserves empty-body content verbatim (resolver should filter before, but helper is agnostic)', () => { + const msg = buildSkillPrimeMessage({ name: 'bare', body: '' }); + expect(msg.content).toBe(''); + expect(msg.isMeta).toBe(true); + }); +}); + +describe('resolveManualSkills', () => { + const userId = new Types.ObjectId().toString(); + const userOid = new Types.ObjectId(userId); + const otherAuthor = new Types.ObjectId(); + + type SkillDoc = { + _id: Types.ObjectId; + name: string; + body: string; + author: Types.ObjectId; + }; + + const buildGetSkillByName = + (skills: Record) => + async (name: string, _accessibleIds: Types.ObjectId[]) => + skills[name] ?? null; + + const mkSkill = (name: string, author: Types.ObjectId, body = `body of ${name}`): SkillDoc => ({ + _id: new Types.ObjectId(), + name, + body, + author, + }); + + it('returns empty when no names supplied', async () => { + const result = await resolveManualSkills({ + names: [], + getSkillByName: buildGetSkillByName({}), + accessibleSkillIds: [new Types.ObjectId()], + userId, + }); + expect(result).toEqual([]); + }); + + it('returns empty when no accessible IDs (ACL empty)', async () => { + const result = await resolveManualSkills({ + names: ['foo'], + getSkillByName: buildGetSkillByName({ foo: mkSkill('foo', userOid) }), + accessibleSkillIds: [], + userId, + }); + expect(result).toEqual([]); + }); + + it('resolves owned skills to { name, body } pairs by default', async () => { + const owned = mkSkill('my-skill', userOid, 'MY SKILL BODY'); + const result = await resolveManualSkills({ + names: ['my-skill'], + getSkillByName: buildGetSkillByName({ 'my-skill': owned }), + accessibleSkillIds: [owned._id], + userId, + }); + expect(result).toEqual([{ name: 'my-skill', body: 'MY SKILL BODY' }]); + }); + + it('silently skips names with no backing skill (typo / ACL miss) without failing the batch', async () => { + const real = mkSkill('real', userOid); + const result = await resolveManualSkills({ + names: ['real', 'typo'], + getSkillByName: buildGetSkillByName({ real }), + accessibleSkillIds: [real._id], + userId, + }); + expect(result).toEqual([{ name: 'real', body: 'body of real' }]); + }); + + it('dedupes repeated names, preserving first occurrence order', async () => { + const a = mkSkill('a', userOid); + const b = mkSkill('b', userOid); + const result = await resolveManualSkills({ + names: ['a', 'b', 'a', 'b', 'a'], + getSkillByName: buildGetSkillByName({ a, b }), + accessibleSkillIds: [a._id, b._id], + userId, + }); + expect(result.map((r) => r.name)).toEqual(['a', 'b']); + }); + + it('filters shared skills when defaultActiveOnShare is false (unless override active=true)', async () => { + const shared = mkSkill('shared', otherAuthor); + const result = await resolveManualSkills({ + names: ['shared'], + getSkillByName: buildGetSkillByName({ shared }), + accessibleSkillIds: [shared._id], + userId, + defaultActiveOnShare: false, + }); + expect(result).toEqual([]); + }); + + it('allows shared skills when defaultActiveOnShare is true', async () => { + const shared = mkSkill('shared', otherAuthor, 'shared-body'); + const result = await resolveManualSkills({ + names: ['shared'], + getSkillByName: buildGetSkillByName({ shared }), + accessibleSkillIds: [shared._id], + userId, + defaultActiveOnShare: true, + }); + expect(result).toEqual([{ name: 'shared', body: 'shared-body' }]); + }); + + it('drops explicitly-deactivated skills (skillStates override wins over ownership default)', async () => { + const owned = mkSkill('owned-off', userOid); + const result = await resolveManualSkills({ + names: ['owned-off'], + getSkillByName: buildGetSkillByName({ 'owned-off': owned }), + accessibleSkillIds: [owned._id], + userId, + skillStates: { [owned._id.toString()]: false }, + }); + expect(result).toEqual([]); + }); + + it('allows explicitly-activated shared skill even when defaultActiveOnShare is false', async () => { + const shared = mkSkill('shared-on', otherAuthor, 'on-body'); + const result = await resolveManualSkills({ + names: ['shared-on'], + getSkillByName: buildGetSkillByName({ 'shared-on': shared }), + accessibleSkillIds: [shared._id], + userId, + defaultActiveOnShare: false, + skillStates: { [shared._id.toString()]: true }, + }); + expect(result).toEqual([{ name: 'shared-on', body: 'on-body' }]); + }); + + it('skips skills with empty bodies (priming nothing adds no value)', async () => { + const empty = mkSkill('empty', userOid, ''); + const result = await resolveManualSkills({ + names: ['empty'], + getSkillByName: buildGetSkillByName({ empty }), + accessibleSkillIds: [empty._id], + userId, + }); + expect(result).toEqual([]); + }); + + it('swallows getSkillByName errors per-name so one bad lookup does not drop the rest', async () => { + const good = mkSkill('good', userOid, 'good-body'); + const getSkillByName = async (name: string) => { + if (name === 'boom') { + throw new Error('db exploded'); + } + return name === 'good' ? good : null; + }; + const result = await resolveManualSkills({ + names: ['boom', 'good'], + getSkillByName, + accessibleSkillIds: [good._id], + userId, + }); + expect(result).toEqual([{ name: 'good', body: 'good-body' }]); + }); + + it('truncates manual skill lists above MAX_MANUAL_SKILLS to bound concurrent DB lookups', async () => { + const skills: Record = {}; + const names: string[] = []; + for (let i = 0; i < MAX_MANUAL_SKILLS + 5; i++) { + const name = `s${i}`; + skills[name] = mkSkill(name, userOid, `body-${i}`); + names.push(name); + } + const getSkillByName = jest.fn(buildGetSkillByName(skills)); + const result = await resolveManualSkills({ + names, + getSkillByName, + accessibleSkillIds: names.map((n) => skills[n]._id), + userId, + }); + expect(result).toHaveLength(MAX_MANUAL_SKILLS); + expect(getSkillByName).toHaveBeenCalledTimes(MAX_MANUAL_SKILLS); + // Preserves input order — drops the tail, keeps the head. + expect(result.map((r) => r.name)).toEqual(names.slice(0, MAX_MANUAL_SKILLS)); + }); + + it('applies the cap AFTER dedup so repeated names do not consume slots', async () => { + const skills: Record = {}; + const uniqueCount = MAX_MANUAL_SKILLS; + for (let i = 0; i < uniqueCount; i++) { + const name = `u${i}`; + skills[name] = mkSkill(name, userOid, `body-${i}`); + } + // Duplicate every name 3x — total length 3 × MAX_MANUAL_SKILLS, unique = MAX_MANUAL_SKILLS. + const names: string[] = []; + for (let pass = 0; pass < 3; pass++) { + for (let i = 0; i < uniqueCount; i++) { + names.push(`u${i}`); + } + } + const result = await resolveManualSkills({ + names, + getSkillByName: buildGetSkillByName(skills), + accessibleSkillIds: Object.values(skills).map((s) => s._id), + userId, + }); + expect(result).toHaveLength(uniqueCount); + }); +}); + +describe('injectManualSkillPrimes', () => { + const prime = (name: string, body: string) => ({ name, body }); + + it('no-ops when manualSkillPrimes is empty', () => { + const messages = [new HumanMessage('hello')]; + const map = { 0: 10 }; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: map, + manualSkillPrimes: [], + }); + expect(result.inserted).toBe(0); + expect(result.insertIdx).toBe(-1); + expect(messages).toHaveLength(1); + expect(result.indexTokenCountMap).toBe(map); + }); + + it('no-ops when initialMessages is empty', () => { + const messages: HumanMessage[] = []; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: undefined, + manualSkillPrimes: [prime('foo', 'body')], + }); + expect(result.inserted).toBe(0); + expect(result.insertIdx).toBe(-1); + expect(messages).toHaveLength(0); + }); + + it('inserts a single prime right before the last message when there is only one message', () => { + const userMsg = new HumanMessage('What is X?'); + const messages = [userMsg]; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: { 0: 7 }, + manualSkillPrimes: [prime('x', 'X means...')], + }); + expect(result.inserted).toBe(1); + expect(result.insertIdx).toBe(0); + expect(messages).toHaveLength(2); + expect(messages[0].content).toBe('X means...'); + expect(messages[1]).toBe(userMsg); + // The prior-index-0 count follows the message it was attached to: now at idx 1. + expect(result.indexTokenCountMap).toEqual({ 1: 7 }); + }); + + it('inserts multiple primes in input order right before the last message', () => { + const messages: (HumanMessage | AIMessage)[] = [ + new HumanMessage('turn 1 user'), + new AIMessage('turn 1 reply'), + new HumanMessage('turn 2 user'), + ]; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: { 0: 1, 1: 2, 2: 3 }, + manualSkillPrimes: [prime('a', 'A body'), prime('b', 'B body')], + }); + expect(result.inserted).toBe(2); + expect(result.insertIdx).toBe(2); + expect(messages).toHaveLength(5); + expect(messages[0].content).toBe('turn 1 user'); + expect(messages[1].content).toBe('turn 1 reply'); + expect(messages[2].content).toBe('A body'); + expect(messages[3].content).toBe('B body'); + expect(messages[4].content).toBe('turn 2 user'); + // idx 0 unchanged, idx 1 unchanged, idx 2 shifts to 4 (+ numPrimes). + expect(result.indexTokenCountMap).toEqual({ 0: 1, 1: 2, 4: 3 }); + }); + + it('primes carry isMeta / source / skillName advisory markers in additional_kwargs', () => { + const messages = [new HumanMessage('hi')]; + injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: undefined, + manualSkillPrimes: [prime('brand', 'brand-body')], + }); + const primed = messages[0] as HumanMessage; + expect(primed.additional_kwargs).toEqual({ + isMeta: true, + source: 'skill', + skillName: 'brand', + }); + }); + + it('uses `>=` when shifting the map so the message originally at insertIdx follows its count forward', () => { + // Regression guard: a `>` bug here would leave the last user message's + // token count attached to one of the new primes instead. + const messages = [new HumanMessage('older'), new HumanMessage('latest')]; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: { 0: 5, 1: 11 }, + manualSkillPrimes: [prime('x', 'x body')], + }); + // insertIdx = 1, numPrimes = 1. Entry at idx 1 must move to idx 2. + expect(result.indexTokenCountMap).toEqual({ 0: 5, 2: 11 }); + }); + + it('handles undefined indexTokenCountMap (formatAgentMessages omitted the map)', () => { + const messages = [new HumanMessage('hi')]; + const result = injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: undefined, + manualSkillPrimes: [prime('x', 'body')], + }); + expect(result.inserted).toBe(1); + expect(result.indexTokenCountMap).toBeUndefined(); + }); +}); + +describe('extractManualSkills', () => { + it('returns undefined for null / non-object bodies', () => { + expect(extractManualSkills(null)).toBeUndefined(); + expect(extractManualSkills(undefined)).toBeUndefined(); + expect(extractManualSkills('string')).toBeUndefined(); + expect(extractManualSkills(42)).toBeUndefined(); + }); + + it('returns undefined when manualSkills is missing or not an array', () => { + expect(extractManualSkills({})).toBeUndefined(); + expect(extractManualSkills({ manualSkills: 'foo' })).toBeUndefined(); + expect(extractManualSkills({ manualSkills: { 0: 'foo' } })).toBeUndefined(); + }); + + it('passes through valid string[] input untouched', () => { + expect(extractManualSkills({ manualSkills: ['a', 'b'] })).toEqual(['a', 'b']); + }); + + it('filters out non-string elements that a TS-oblivious payload might include', () => { + expect( + extractManualSkills({ + manualSkills: ['ok', 123, null, { $gt: '' }, undefined, 'also-ok'], + }), + ).toEqual(['ok', 'also-ok']); + }); + + it('filters out empty strings', () => { + expect(extractManualSkills({ manualSkills: ['', 'real', ''] })).toEqual(['real']); + }); + + it('returns undefined when the array is present but contains no valid strings', () => { + expect(extractManualSkills({ manualSkills: [123, null, ''] })).toBeUndefined(); + expect(extractManualSkills({ manualSkills: [] })).toBeUndefined(); + }); +}); + +describe('isSkillPrimeMessage', () => { + it('returns true for a prime produced by injectManualSkillPrimes', () => { + const messages = [new HumanMessage('user turn')]; + injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: undefined, + manualSkillPrimes: [{ name: 'x', body: 'x body' }], + }); + expect(isSkillPrimeMessage(messages[0])).toBe(true); + }); + + it('returns false for plain HumanMessage / AIMessage without the marker', () => { + expect(isSkillPrimeMessage(new HumanMessage('hi'))).toBe(false); + expect(isSkillPrimeMessage(new AIMessage('reply'))).toBe(false); + }); + + it('returns false for additional_kwargs with a different source', () => { + const m = new HumanMessage({ + content: 'x', + additional_kwargs: { source: 'not-a-skill' }, + }); + expect(isSkillPrimeMessage(m)).toBe(false); + }); + + it('returns false for non-object / null / undefined inputs', () => { + expect(isSkillPrimeMessage(null)).toBe(false); + expect(isSkillPrimeMessage(undefined)).toBe(false); + expect(isSkillPrimeMessage('just a string')).toBe(false); + expect(isSkillPrimeMessage(42)).toBe(false); + }); + + it('filter() on a mixed array keeps the non-prime messages', () => { + const user = new HumanMessage('real turn'); + const messages: (HumanMessage | AIMessage)[] = [new AIMessage('reply'), user]; + injectManualSkillPrimes({ + initialMessages: messages, + indexTokenCountMap: undefined, + manualSkillPrimes: [{ name: 'x', body: 'x body' }], + }); + // After priming: [AIMessage, primeHumanMessage, user] + const stripped = messages.filter((m) => !isSkillPrimeMessage(m)); + expect(stripped).toHaveLength(2); + expect(stripped[1]).toBe(user); + }); +}); + +describe('buildSkillPrimeContentParts', () => { + it('produces one completed tool_call content part per prime', () => { + const parts = buildSkillPrimeContentParts( + [ + { name: 'brand', body: '# Brand' }, + { name: 'legal', body: '# Legal' }, + ], + { runId: 'msg_abc' }, + ); + expect(parts).toHaveLength(2); + expect(parts[0].type).toBe('tool_call'); + expect(parts[0].tool_call.name).toBe('skill'); + expect(parts[0].tool_call.progress).toBe(1); + expect(parts[0].tool_call.type).toBe('tool_call'); + }); + + it('encodes skillName as a JSON string in args (matches the SkillCall renderer contract)', () => { + const [part] = buildSkillPrimeContentParts([{ name: 'pdf-reader', body: '...' }], { + runId: 'msg_1', + }); + expect(JSON.parse(part.tool_call.args)).toEqual({ skillName: 'pdf-reader' }); + }); + + it('produces a human-readable output string the frontend can expand', () => { + const [part] = buildSkillPrimeContentParts([{ name: 'research', body: '...' }], { + runId: 'msg_1', + }); + expect(part.tool_call.output).toContain('research'); + expect(part.tool_call.output).toContain('loaded'); + }); + + it('assigns unique tool_call IDs seeded from runId + index', () => { + const parts = buildSkillPrimeContentParts( + [ + { name: 'a', body: 'a' }, + { name: 'b', body: 'b' }, + ], + { runId: 'msg_xyz' }, + ); + const ids = parts.map((p) => p.tool_call.id); + expect(new Set(ids).size).toBe(2); + expect(ids[0]).toContain('msg_xyz'); + expect(ids[1]).toContain('msg_xyz'); + }); + + it('returns an empty array for empty input', () => { + expect(buildSkillPrimeContentParts([], { runId: 'msg_1' })).toEqual([]); + }); + + it('respects startOffset when seeding IDs (callers prepending to a populated list)', () => { + const first = buildSkillPrimeContentParts([{ name: 'x', body: 'x' }], { runId: 'msg_1' }); + const second = buildSkillPrimeContentParts([{ name: 'x', body: 'x' }], { + runId: 'msg_1', + startOffset: 5, + }); + expect(first[0].tool_call.id).not.toBe(second[0].tool_call.id); + }); +}); diff --git a/packages/api/src/agents/handlers.ts b/packages/api/src/agents/handlers.ts index 8ec138ffe453..778540d38426 100644 --- a/packages/api/src/agents/handlers.ts +++ b/packages/api/src/agents/handlers.ts @@ -14,6 +14,7 @@ import type { StructuredToolInterface } from '@langchain/core/tools'; import type { ServerRequest } from '~/types'; import { primeSkillFiles } from './skillFiles'; import type { SkillFileRecord } from './skillFiles'; +import { buildSkillPrimeMessage } from './skills'; import { runOutsideTracing } from '~/utils'; export interface ToolEndCallbackData { @@ -437,9 +438,7 @@ async function handleSkillToolCall( body = body.replace(/\$ARGUMENTS/g, args.args); } - const injectedMessages: InjectedMessage[] = [ - { role: 'user', content: body, isMeta: true, source: 'skill', skillName: skill.name }, - ]; + const injectedMessages: InjectedMessage[] = [buildSkillPrimeMessage({ name: skill.name, body })]; const contentText = `Skill "${args.skillName}" loaded. Follow the instructions below.`; let artifact: diff --git a/packages/api/src/agents/initialize.ts b/packages/api/src/agents/initialize.ts index 4e887caecb63..f94eb6c01201 100644 --- a/packages/api/src/agents/initialize.ts +++ b/packages/api/src/agents/initialize.ts @@ -30,7 +30,8 @@ import { import { filterFilesByEndpointConfig } from '~/files'; import { generateArtifactsPrompt } from '~/prompts'; import { getProviderConfig } from '~/endpoints'; -import { injectSkillCatalog } from './skills'; +import { injectSkillCatalog, resolveManualSkills } from './skills'; +import type { ResolvedManualSkill } from './skills'; import { primeResources } from './resources'; import type { TFilterFilesByAgentAccess } from './resources'; @@ -71,6 +72,13 @@ export type InitializedAgent = Agent & { accessibleSkillIds?: import('mongoose').Types.ObjectId[]; /** Number of skills in the catalog (used to determine if SkillTool should be registered) */ skillCount?: number; + /** + * Skills the user manually invoked for this turn via the `$` popover, resolved + * to their SKILL.md bodies. The AgentClient injects these as meta user + * messages right before the latest user message in the LLM's formatted + * message array — deterministic priming without a tool roundtrip. + */ + manualSkillPrimes?: ResolvedManualSkill[]; }; export const DEFAULT_MAX_CONTEXT_TOKENS = 32000; @@ -127,6 +135,13 @@ export interface InitializeAgentParams { skillStates?: Record; /** Admin-configured default for shared skills (`true` = shared skills auto-activate). */ defaultActiveOnShare?: boolean; + /** + * Skill names the user invoked manually for this turn via the `$` popover. + * Resolved here (ACL + active-state filtered) and attached to the returned + * InitializedAgent as `manualSkillPrimes` for the AgentClient to inject as + * meta user messages before the LLM call. + */ + manualSkills?: string[]; } /** @@ -173,6 +188,20 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods { has_more?: boolean; after?: string | null; }>; + /** + * Load a single skill by name, constrained to an ACL-accessible ID set. + * Returns the full document (including `body`) so manual invocation can + * prime SKILL.md without a second DB round-trip. + */ + getSkillByName?: ( + name: string, + accessibleIds: import('mongoose').Types.ObjectId[], + ) => Promise<{ + _id: import('mongoose').Types.ObjectId; + name: string; + body: string; + author: import('mongoose').Types.ObjectId; + } | null>; } /** @@ -454,6 +483,7 @@ export async function initializeAgent( * LLM (or a direct-invocation path) names one. */ let executableSkillIds = params.accessibleSkillIds; + let manualSkillPrimes: ResolvedManualSkill[] | undefined; const { accessibleSkillIds } = params; if (accessibleSkillIds && accessibleSkillIds.length > 0) { const skillResult = await injectSkillCatalog({ @@ -471,6 +501,25 @@ export async function initializeAgent( toolDefinitions = skillResult.toolDefinitions; skillCount = skillResult.skillCount; executableSkillIds = skillResult.activeSkillIds; + + /** + * Resolve manually-invoked skills against the same ACL set used for the + * catalog. We use `accessibleSkillIds` (not `activeSkillIds`) because the + * catalog is capped at a token budget — a skill that fell outside that cap + * can still be authorized for direct manual invocation. The active-state + * filter is re-applied inside `resolveManualSkills` so deactivated skills + * never leak through this path either. + */ + if (params.manualSkills?.length && db.getSkillByName) { + manualSkillPrimes = await resolveManualSkills({ + names: params.manualSkills, + getSkillByName: db.getSkillByName, + accessibleSkillIds, + userId: req.user?.id, + skillStates: params.skillStates, + defaultActiveOnShare: params.defaultActiveOnShare, + }); + } } const agentMaxContextNum = Number(agentMaxContextTokens) || DEFAULT_MAX_CONTEXT_TOKENS; @@ -506,6 +555,7 @@ export async function initializeAgent( baseContextTokens, skillCount, accessibleSkillIds: executableSkillIds, + manualSkillPrimes, attachments: finalAttachments, toolContextMap: toolContextMap ?? {}, useLegacyContent: !!options.useLegacyContent, diff --git a/packages/api/src/agents/skills.ts b/packages/api/src/agents/skills.ts index 5227875c027f..6d9eafbff32d 100644 --- a/packages/api/src/agents/skills.ts +++ b/packages/api/src/agents/skills.ts @@ -1,13 +1,15 @@ +import { logger } from '@librechat/data-schemas'; +import { HumanMessage } from '@langchain/core/messages'; import { formatSkillCatalog, SkillToolDefinition, ReadFileToolDefinition, BashExecutionToolDefinition, } from '@librechat/agents'; -import type { LCToolRegistry, LCTool } from '@librechat/agents'; -import { logger } from '@librechat/data-schemas'; -import type { Types } from 'mongoose'; +import type { LCToolRegistry, LCTool, InjectedMessage } from '@librechat/agents'; +import type { BaseMessage } from '@langchain/core/messages'; import type { Agent } from 'librechat-data-provider'; +import type { Types } from 'mongoose'; import type { InitializeAgentDbMethods } from './initialize'; const SKILL_CATALOG_LIMIT = 100; @@ -15,6 +17,45 @@ const SKILL_CATALOG_LIMIT = 100; const MAX_CATALOG_PAGES = 10; /** Page size used when paginating to fill the active-skill quota. */ const CATALOG_PAGE_SIZE = 100; +/** + * Hard ceiling on skill names resolved per request via `$` popover or + * `always-apply`. The popover realistically surfaces only a few per turn; + * the cap is a defense-in-depth against a crafted payload fanning out into + * many concurrent `getSkillByName` DB lookups. + */ +export const MAX_MANUAL_SKILLS = 10; + +/** + * Hard ceiling on individual skill name length. Real skill names are + * short slugs (e.g. `pptx`, `brand-guidelines`); anything beyond this is + * a crafted payload. Filtered out before the DB round-trip so pathological + * strings can't reach `getSkillByName` / Mongo's query planner. + */ +export const MAX_SKILL_NAME_LENGTH = 200; + +/** + * Marker tagged onto every skill-primed message (as `additional_kwargs.source` + * on a LangChain `HumanMessage`, or as `source` on the `InjectedMessage` that + * `handleSkillToolCall` emits). Downstream filtering/telemetry keys off this, + * so both construction paths must agree — keep the literal exported from one + * place rather than repeated inline. + */ +export const SKILL_MESSAGE_SOURCE = 'skill'; + +/** + * Predicate that identifies a LangChain message as one we spliced in via + * `injectManualSkillPrimes` (or the equivalent model-invoked path). Callers + * like `runMemory` use this to strip synthetic skill content from windows + * that should only contain real user chat — without this filter, SKILL.md + * bodies pollute memory extraction and crowd out genuine turns. + */ +export function isSkillPrimeMessage(msg: unknown): boolean { + if (!msg || typeof msg !== 'object') { + return false; + } + const kwargs = (msg as { additional_kwargs?: { source?: unknown } }).additional_kwargs; + return !!kwargs && kwargs.source === SKILL_MESSAGE_SOURCE; +} /** * Scopes user-accessible skill IDs to only those configured on the agent. @@ -240,3 +281,322 @@ export async function injectSkillCatalog( activeSkillIds: activeSkills.map((s) => s._id), }; } + +/** + * Builds the meta user message that carries a skill's SKILL.md body into a + * turn's context. Shape mirrors what `handleSkillToolCall` emits when the + * model invokes the skill tool directly, so downstream message handling + * treats all three paths identically. + * + * Used by: + * - Phase 3 manual invocation (`$skill-name` popover) — called at turn start. + * - Phase 5 `always-apply` frontmatter — called at turn start. + * - `handleSkillToolCall` for model-invoked skills — called from the tool + * execution handler. + */ +export function buildSkillPrimeMessage(skill: { name: string; body: string }): InjectedMessage { + return { + role: 'user', + content: skill.body, + isMeta: true, + source: SKILL_MESSAGE_SOURCE, + skillName: skill.name, + }; +} + +export interface ResolveManualSkillsParams { + /** Skill names the user invoked (via `$` popover or `always-apply`). */ + names: string[]; + /** DB lookup: name → skill doc, constrained to ACL-accessible IDs. */ + getSkillByName: ( + name: string, + accessibleIds: Types.ObjectId[], + ) => Promise<{ + _id: Types.ObjectId; + name: string; + body: string; + author: Types.ObjectId | string; + } | null>; + /** ACL-accessible skill IDs for this user (already scoped by `scopeSkillIds`). */ + accessibleSkillIds: Types.ObjectId[]; + /** Current user ID — required for ownership-based active-state defaults. */ + userId?: string; + /** Per-user skill active/inactive overrides. */ + skillStates?: Record; + /** Admin-configured default for shared skills. */ + defaultActiveOnShare?: boolean; +} + +export interface ResolvedManualSkill { + name: string; + body: string; +} + +/** + * Resolves user-provided skill names to `{ name, body }` pairs ready for + * priming. Filters out: + * - names not backed by an accessible skill (ACL miss or typo), + * - skills the user has toggled inactive (respects ownership-aware defaults). + * + * The active-state filter is intentional even for explicit `$` selections: + * 1. Phase 2 closes the loop on the UI side by hiding inactive skills from + * the popover, so a deactivated skill shouldn't be reachable through the + * normal flow in the first place. + * 2. For API-direct callers bypassing the popover, honoring the user's + * own activation toggle is the safer default — an opt-out toggle that + * stops working the moment someone crafts a raw payload isn't much of + * a toggle. + * + * Silently skips unresolvable names with a warn log — a missing `$skill` must + * never block the user's actual message from going through. + * + * Preserves input order and drops duplicate names (first wins) so a user who + * types `$foo $foo` doesn't end up with the body primed twice. + */ +export async function resolveManualSkills( + params: ResolveManualSkillsParams, +): Promise { + const { names, getSkillByName, accessibleSkillIds, userId, skillStates, defaultActiveOnShare } = + params; + + if (!names.length || accessibleSkillIds.length === 0) { + return []; + } + + const seen = new Set(); + const uniqueNames = names.filter((n) => { + if (!n || seen.has(n)) { + return false; + } + seen.add(n); + return true; + }); + + /** + * Truncate after dedup so a user repeating the same name doesn't consume + * cap slots. Kept inside the function (not at the controller) so every + * caller — including future internal ones — inherits the protection. + */ + let boundedNames = uniqueNames; + if (uniqueNames.length > MAX_MANUAL_SKILLS) { + const droppedAll = uniqueNames.slice(MAX_MANUAL_SKILLS); + const DROPPED_LOG_SAMPLE = 5; + const droppedSample = droppedAll.slice(0, DROPPED_LOG_SAMPLE).join(', '); + const droppedSuffix = + droppedAll.length > DROPPED_LOG_SAMPLE + ? `, ... (${droppedAll.length - DROPPED_LOG_SAMPLE} more)` + : ''; + logger.warn( + `[resolveManualSkills] Truncating manual skill list from ${uniqueNames.length} to ${MAX_MANUAL_SKILLS}: dropped [${droppedSample}${droppedSuffix}]`, + ); + boundedNames = uniqueNames.slice(0, MAX_MANUAL_SKILLS); + } + + const resolved = await Promise.all( + boundedNames.map(async (name) => { + try { + const skill = await getSkillByName(name, accessibleSkillIds); + if (!skill) { + logger.warn(`[resolveManualSkills] Skill "${name}" not found or not accessible`); + return null; + } + if (!skill.body) { + logger.warn(`[resolveManualSkills] Skill "${name}" has empty body — skipping`); + return null; + } + const active = resolveSkillActive({ + skill: { _id: skill._id, author: skill.author }, + skillStates, + userId, + defaultActiveOnShare, + }); + if (!active) { + logger.warn(`[resolveManualSkills] Skill "${name}" is inactive for this user — skipping`); + return null; + } + return { name: skill.name, body: skill.body }; + } catch (err) { + logger.warn( + `[resolveManualSkills] Failed to resolve skill "${name}":`, + err instanceof Error ? err.message : err, + ); + return null; + } + }), + ); + + return resolved.filter((r): r is ResolvedManualSkill => r !== null); +} + +export interface InjectManualSkillPrimesParams { + /** Formatted LangChain messages produced by `formatAgentMessages`. Mutated in place. */ + initialMessages: BaseMessage[]; + /** Per-index token count map returned by `formatAgentMessages`. */ + indexTokenCountMap: Record | undefined; + /** Resolved skill primes to splice in. */ + manualSkillPrimes: ResolvedManualSkill[]; +} + +export interface InjectManualSkillPrimesResult { + /** Same array reference as input (mutated). */ + initialMessages: BaseMessage[]; + /** New token map with indices shifted; same reference as input when no-op. */ + indexTokenCountMap: Record | undefined; + /** Number of prime messages actually inserted (for logging by the caller). */ + inserted: number; + /** Position where primes were inserted, or -1 when no-op. */ + insertIdx: number; +} + +/** + * Splices skill prime messages into a formatted message array just before + * the latest user message, and shifts the `indexTokenCountMap` so downstream + * `hydrateMissingIndexTokenCounts` fills counts for the new positions + * without corrupting accounting for pre-existing ones. + * + * Insertion semantics: + * - `insertIdx = initialMessages.length - 1`. Empty input → no-op. + * - Single-message input → `insertIdx = 0`, primes appear before the lone message. + * - Map shift: entries with `idx >= insertIdx` move forward by `numPrimes`. + * Using `>=` (not `>`) is load-bearing: the message that was at `insertIdx` + * is pushed to `insertIdx + numPrimes` by the splice, so its count must + * follow it. + * + * Callers are responsible for scoping (e.g. single-agent runs only — see + * `AgentClient.chatCompletion`). This helper is agent-agnostic. + */ +export function injectManualSkillPrimes( + params: InjectManualSkillPrimesParams, +): InjectManualSkillPrimesResult { + const { initialMessages, manualSkillPrimes } = params; + let { indexTokenCountMap } = params; + + if (manualSkillPrimes.length === 0 || initialMessages.length === 0) { + return { initialMessages, indexTokenCountMap, inserted: 0, insertIdx: -1 }; + } + + const insertIdx = initialMessages.length - 1; + const numPrimes = manualSkillPrimes.length; + + if (indexTokenCountMap) { + const shifted: Record = {}; + for (const [idxStr, count] of Object.entries(indexTokenCountMap)) { + const idx = Number(idxStr); + shifted[idx >= insertIdx ? idx + numPrimes : idx] = count; + } + indexTokenCountMap = shifted; + } + + const primeMessages = manualSkillPrimes.map( + (p) => + new HumanMessage({ + content: p.body, + additional_kwargs: { isMeta: true, source: SKILL_MESSAGE_SOURCE, skillName: p.name }, + }), + ); + initialMessages.splice(insertIdx, 0, ...primeMessages); + + return { initialMessages, indexTokenCountMap, inserted: numPrimes, insertIdx }; +} + +export interface SkillPrimeContentPart { + type: 'tool_call'; + tool_call: { + id: string; + name: string; + args: string; + output: string; + progress: number; + type: 'tool_call'; + }; +} + +export interface BuildSkillPrimeContentPartsParams { + /** Run / response message ID. Used as a stable seed for synthetic tool_call IDs. */ + runId: string; + /** + * Optional index offset. When the primes are prepended to an existing + * contentParts array, callers can leave this 0 (IDs still stay unique + * within the message). Exposed for explicitness only. + */ + startOffset?: number; +} + +/** + * Build completed tool_call content parts for each manually-invoked skill. + * + * Matches the shape the aggregator produces for a model-invoked skill call + * on `ON_RUN_STEP_COMPLETED` — `{ type: 'tool_call', tool_call: { id, name, + * args, output, progress: 1, type: 'tool_call' } }` — so the existing + * `SkillCall` renderer on the frontend shows identical "Skill X loaded" + * cards without any new client work. + * + * Callers (e.g. `AgentClient.chatCompletion`) prepend these to + * `this.contentParts` after `runAgents` returns so the cards persist on + * the response message and appear ahead of the model's content — matching + * the semantic that priming ran before the LLM turn. + * + * ────────────────────────────────────────────────────────────────────── + * Intentional: sticky re-priming across turns. + * ────────────────────────────────────────────────────────────────────── + * Shape parity with model-invoked tool_calls means `extractInvokedSkills + * FromPayload` (packages/api/src/agents/run.ts) treats these as ordinary + * skill invocations when scanning history, so `primeInvokedSkills` re- + * prepares the files + body on every subsequent turn for the rest of + * the conversation. This is deliberate: the skill-card UX on the assistant + * message is the user's persistent visual cue that the skill is active, + * and re-priming keeps the LLM's view consistent with what the user sees + * (and matches how model-invoked skills behave — there is no separate + * "one-shot vs. conversation-scoped" mode). To opt out of a sticky skill, + * users need to start a new conversation or edit the originating user + * message to remove the pills and resubmit — regenerate alone preserves + * the picks (`useChatFunctions.regenerate` forwards them via + * `overrideManualSkills`). A future "one-shot manual prime" mode would + * need a distinct marker on the synthetic tool_call so the history + * scanner could skip it. + */ +export function buildSkillPrimeContentParts( + primes: ResolvedManualSkill[], + { runId, startOffset = 0 }: BuildSkillPrimeContentPartsParams, +): SkillPrimeContentPart[] { + return primes.map((prime, i) => { + const args = JSON.stringify({ skillName: prime.name }); + return { + type: 'tool_call', + tool_call: { + id: `call_manual_skill_${runId}_${startOffset + i}`, + name: SkillToolDefinition.name, + args, + output: `Skill "${prime.name}" loaded. Follow the instructions below.`, + progress: 1, + type: 'tool_call', + }, + }; + }); +} + +/** + * Safely pulls `manualSkills` from an untyped request body. + * + * Accepts only string[] in practice: filters out non-string elements, + * empty strings, and pathologically long names so a crafted payload + * (numbers, objects, nulls, giga-strings) can't reach `getSkillByName` + * and waste DB round-trips, matching the TypeScript contract + * (`manualSkills?: string[]` on TPayload). Returns `undefined` for + * missing / non-array inputs OR for arrays that contain no valid strings — + * callers treat undefined as "no manual invocation this turn." + */ +export function extractManualSkills(body: unknown): string[] | undefined { + if (!body || typeof body !== 'object') { + return undefined; + } + const raw = (body as { manualSkills?: unknown }).manualSkills; + if (!Array.isArray(raw)) { + return undefined; + } + const filtered = raw.filter( + (entry): entry is string => + typeof entry === 'string' && entry.length > 0 && entry.length <= MAX_SKILL_NAME_LENGTH, + ); + return filtered.length > 0 ? filtered : undefined; +} diff --git a/packages/data-provider/src/createPayload.ts b/packages/data-provider/src/createPayload.ts index 3056a7021b99..783ad7f9dd86 100644 --- a/packages/data-provider/src/createPayload.ts +++ b/packages/data-provider/src/createPayload.ts @@ -14,6 +14,7 @@ export default function createPayload(submission: t.TSubmission) { editedContent, ephemeralAgent, endpointOption, + manualSkills, } = submission; const { conversationId } = s.tConvoUpdateSchema.parse(conversation); const { endpoint: _e, endpointType } = endpointOption as { @@ -40,6 +41,7 @@ export default function createPayload(submission: t.TSubmission) { conversationId, isContinued: !!(isEdited && isContinued), ephemeralAgent: s.isAssistantsEndpoint(endpoint) ? undefined : ephemeralAgent, + manualSkills: s.isAssistantsEndpoint(endpoint) ? undefined : manualSkills, }; return { server, payload }; diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index 6a36e25f9dbd..529278b9c2ba 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -680,6 +680,15 @@ export const tMessageSchema = z.object({ ), }) .optional(), + /** + * Skill names the user invoked manually via the `$` popover on this turn. + * Purely UI metadata — `ManualSkillPills` renders these above the message + * bubble so users can see which skills they asked for in history and on + * reload. Runtime resolution uses the top-level payload field with the + * same name. Empty / absent for model-invoked skills (shown as tool_call + * content parts on the assistant message instead). + */ + manualSkills: z.array(z.string()).optional(), }); export type MemoryArtifact = { diff --git a/packages/data-provider/src/types.ts b/packages/data-provider/src/types.ts index b5f032cd799e..a142f720a216 100644 --- a/packages/data-provider/src/types.ts +++ b/packages/data-provider/src/types.ts @@ -116,6 +116,13 @@ export type TPayload = Partial & editedContent?: TEditedContent | null; /** Added conversation for multi-convo feature */ addedConvo?: TConversation; + /** + * Skills the user selected via the `$` popover for this turn. Names, not IDs + * — the backend resolves them against the user's ACL-accessible skill set, + * loads each SKILL.md body, and prepends one meta user message per skill + * before the LLM turn runs. + */ + manualSkills?: string[]; }; export type TEditedContent = @@ -145,6 +152,8 @@ export type TSubmission = { editedContent?: TEditedContent | null; /** Added conversation for multi-convo feature */ addedConvo?: TConversation; + /** Skills the user invoked via the `$` popover for this submission. */ + manualSkills?: string[]; }; export type EventSubmission = Omit & { initialResponse: TMessage }; diff --git a/packages/data-schemas/src/schema/message.ts b/packages/data-schemas/src/schema/message.ts index 9879efae557c..aeba714b89d3 100644 --- a/packages/data-schemas/src/schema/message.ts +++ b/packages/data-schemas/src/schema/message.ts @@ -122,6 +122,14 @@ const messageSchema: Schema = new Schema( default: undefined, }, attachments: { type: [{ type: mongoose.Schema.Types.Mixed }], default: undefined }, + /** + * Skill names the user invoked manually via the `$` popover on this turn. + * UI metadata only — `ManualSkillPills` on the frontend renders these on + * the user message bubble so the selection persists through reload and + * shows in history. Runtime skill resolution lives separately on the + * request body, not on the message itself. + */ + manualSkills: { type: [String], default: undefined }, /* attachments: { type: [ diff --git a/packages/data-schemas/src/types/message.ts b/packages/data-schemas/src/types/message.ts index 201e5650efbc..7d607d61d96f 100644 --- a/packages/data-schemas/src/types/message.ts +++ b/packages/data-schemas/src/types/message.ts @@ -44,6 +44,8 @@ export interface IMessage extends Document { encoding?: string; }; attachments?: unknown[]; + /** Skills the user invoked manually via the `$` popover on this turn. UI-only metadata for `ManualSkillPills`. */ + manualSkills?: string[]; expiredAt?: Date | null; createdAt?: Date; updatedAt?: Date;