Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions packages/api/src/agents/__tests__/initialize.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1231,3 +1231,130 @@ describe('initializeAgent — execute_code capability expansion', () => {
).rejects.toThrow(/google_tool_conflict/);
});
});

describe('initializeAgent — code-generated file thread filter (regression)', () => {
/* Sibling-branched conversation regression. Pre-fix the priming chain
* filtered code-generated files by `messageId IN threadMessageIds`,
* which excluded files whose creator messageId lived on a sibling
* branch (preserved on the File record by `processCodeOutput` for
* provenance). The fix anchors `getCodeGeneratedFiles` on
* `threadFileIds` instead — file_ids referenced by the thread's
* `messages.files[]` arrays. This block locks the new contract at
* the integration boundary: assert the right call shape, not the
* underlying Mongo query (covered separately by
* `data-schemas/methods/file.spec`). */

beforeEach(() => {
jest.clearAllMocks();
mockExtractLibreChatParams.mockReset();
mockGetThreadData.mockReset();
});

function setupExecuteCodeAgent() {
const { agent, req, res, loadTools, db } = createMocks({
provider: Providers.OPENAI,
});
agent.tools = ['execute_code'];

/* `resendFiles: true` is the gate that opens the thread-file
* priming block in initialize.ts. Without it the whole
* codeGeneratedFiles fetch is skipped. */
mockExtractLibreChatParams.mockReturnValue({
resendFiles: true,
maxContextTokens: undefined,
modelOptions: { model: 'test-model' },
});

return { agent, req, res, loadTools, db };
}

it('passes threadFileIds (not threadMessageIds) to getCodeGeneratedFiles', async () => {
const { agent, req, res, loadTools, db } = setupExecuteCodeAgent();

/* Simulate the branched scenario: parent message N is a sibling
* regeneration. `getThreadData` walks back from N and collects
* messageIds [N, root] plus fileIds referenced by N.files[]. */
mockGetThreadData.mockReturnValue({
messageIds: ['msgN', 'msgRoot'],
fileIds: ['file-pptx-skill', 'file-output-csv'],
});

const getCodeGeneratedFiles = jest.fn().mockResolvedValue([]);
const getUserCodeFiles = jest.fn().mockResolvedValue([]);
const getMessages = jest
.fn()
.mockResolvedValue([{ messageId: 'msgN', parentMessageId: 'msgRoot', files: [] }]);

const dbWithThreadCalls: InitializeAgentDbMethods = {
...db,
getMessages,
getCodeGeneratedFiles,
getUserCodeFiles,
};

await initializeAgent(
{
req,
res,
agent,
loadTools,
endpointOption: { endpoint: EModelEndpoint.agents },
conversationId: 'conv-1',
parentMessageId: 'msgN',
allowedProviders: new Set([Providers.OPENAI]),
isInitialAgent: true,
codeEnvAvailable: true,
},
dbWithThreadCalls,
);

expect(getCodeGeneratedFiles).toHaveBeenCalledTimes(1);
expect(getCodeGeneratedFiles).toHaveBeenCalledWith('conv-1', [
'file-pptx-skill',
'file-output-csv',
]);
/* Both functions now share the same primary anchor — symmetric
* design that closes the sibling-branch hole. */
expect(getUserCodeFiles).toHaveBeenCalledWith(['file-pptx-skill', 'file-output-csv']);
});

it('skips the code-generated fetch entirely when threadFileIds is empty', async () => {
/* Empty `messages.files[]` across the thread — nothing to look up.
* The function returns early without hitting Mongo, mirroring the
* pre-fix behavior for empty-thread cases. */
const { agent, req, res, loadTools, db } = setupExecuteCodeAgent();

mockGetThreadData.mockReturnValue({
messageIds: ['msgN', 'msgRoot'],
fileIds: [],
});

const getCodeGeneratedFiles = jest.fn().mockResolvedValue([]);
const getUserCodeFiles = jest.fn().mockResolvedValue([]);
const getMessages = jest
.fn()
.mockResolvedValue([{ messageId: 'msgN', parentMessageId: 'msgRoot', files: [] }]);

await initializeAgent(
{
req,
res,
agent,
loadTools,
endpointOption: { endpoint: EModelEndpoint.agents },
conversationId: 'conv-1',
parentMessageId: 'msgN',
allowedProviders: new Set([Providers.OPENAI]),
isInitialAgent: true,
codeEnvAvailable: true,
},
{ ...db, getMessages, getCodeGeneratedFiles, getUserCodeFiles },
);

expect(getCodeGeneratedFiles).toHaveBeenCalledWith('conv-1', []);
/* `getUserCodeFiles` is gated on a non-empty array at the call site,
* so it shouldn't be invoked at all. `getCodeGeneratedFiles`'s own
* empty-guard is exercised by data-schemas tests. */
expect(getUserCodeFiles).not.toHaveBeenCalled();
});
});
28 changes: 18 additions & 10 deletions packages/api/src/agents/initialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,10 @@ export interface InitializeAgentDbMethods extends EndpointDbMethods {
getToolFilesByIds: (fileIds: string[], toolSet: Set<EToolResources>) => Promise<unknown[]>;
/** Get conversation file IDs */
getConvoFiles: (conversationId: string) => Promise<string[] | null>;
/** Get code-generated files by conversation ID and optional message IDs */
getCodeGeneratedFiles?: (conversationId: string, messageIds?: string[]) => Promise<unknown[]>;
/** Get code-generated files by conversation ID and the file_ids
* referenced from messages in the current thread (collected via
* `messages.files[].file_id` during thread walk). */
getCodeGeneratedFiles?: (conversationId: string, threadFileIds?: string[]) => Promise<unknown[]>;
/** Get user-uploaded execute_code files by file IDs (from message.files in thread) */
getUserCodeFiles?: (fileIds: string[]) => Promise<unknown[]>;
/** Get messages for a conversation (supports select for field projection) */
Expand Down Expand Up @@ -423,7 +425,6 @@ export async function initializeAgent(
let userCodeFiles: IMongoFile[] = [];

if (toolResourceSet.has(EToolResources.execute_code)) {
let threadMessageIds: string[] | undefined;
let threadFileIds: string[] | undefined;

if (parentMessageId && parentMessageId !== Constants.NO_PARENT && db.getMessages) {
Expand All @@ -433,22 +434,29 @@ export async function initializeAgent(
'messageId parentMessageId files',
);
if (messages && messages.length > 0) {
/** Single O(n) pass: build Map, traverse thread, collect both IDs */
const threadData = getThreadData(messages, parentMessageId);
threadMessageIds = threadData.messageIds;
threadFileIds = threadData.fileIds;
/** Walk the parent chain and collect file_ids referenced by
* any message in the thread (`messages.files[].file_id`).
* Used as the primary anchor for both
* `getCodeGeneratedFiles` and `getUserCodeFiles` —
* message ids no longer needed at this layer. */
threadFileIds = getThreadData(messages, parentMessageId).fileIds;
}
}

/** Code-generated files (context: execute_code) filtered by messageId */
/** Code-generated and user-uploaded execute_code files share the
* same primary anchor: file_ids referenced by messages in the
* current thread. The two queries differ only by `context`
* (`execute_code` for generated outputs, others for uploads).
* Anchoring both on `threadFileIds` reaches files regardless of
* which sibling first generated them — see `getCodeGeneratedFiles`
* for the branched-conversation rationale. */
if (db.getCodeGeneratedFiles) {
codeGeneratedFiles = (await db.getCodeGeneratedFiles(
conversationId,
threadMessageIds,
threadFileIds,
)) as IMongoFile[];
}

/** User-uploaded execute_code files (context: agents/message_attachment) from thread messages */
if (db.getUserCodeFiles && threadFileIds && threadFileIds.length > 0) {
userCodeFiles = (await db.getUserCodeFiles(threadFileIds)) as IMongoFile[];
}
Expand Down
148 changes: 148 additions & 0 deletions packages/data-schemas/src/methods/file.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,154 @@ describe('File Methods', () => {
});
});

describe('getCodeGeneratedFiles', () => {
/* The function filters by `file_id IN threadFileIds` — the file_ids
* referenced by messages in the current conversation thread —
* rather than by `messageId IN threadMessageIds`. The change
* matters when a code-output file is shared across sibling branches
* (regenerations); the File record's own `messageId` points at
* whichever sibling FIRST created it (preserved deliberately by
* processCodeOutput for provenance), but `threadFileIds` reaches
* any sibling that references the file via `messages.files[]`. */

it('finds a code-output file referenced by the current thread', async () => {
const userId = new mongoose.Types.ObjectId();
const conversationId = uuidv4();
const fileId = uuidv4();

await fileMethods.createFile({
file_id: fileId,
user: userId,
conversationId,
messageId: 'msg-original-creator',
filename: 'output.csv',
filepath: '/uploads/output.csv',
type: 'text/csv',
bytes: 100,
context: FileContext.execute_code,
metadata: {
codeEnvRef: {
kind: 'user',
id: userId.toString(),
storage_session_id: 'sess',
file_id: fileId,
},
},
});

const files = await fileMethods.getCodeGeneratedFiles(conversationId, [fileId]);
expect(files).toHaveLength(1);
expect(files[0].file_id).toBe(fileId);
});

it('reaches a file whose creator messageId is on a sibling branch (regression)', async () => {
/* Branched-conversation case: sibling A creates the file (its
* messageId is preserved on the File record), sibling N
* recreates the same filename — claimCodeFile finds the existing
* record and the messageId stays at A. The current thread (parent
* = N) doesn't include A. Filtering by threadFileIds (which
* includes the file_id N's message references) reaches it. */
const userId = new mongoose.Types.ObjectId();
const conversationId = uuidv4();
const fileId = uuidv4();

await fileMethods.createFile({
file_id: fileId,
user: userId,
conversationId,
/* The file's messageId points at sibling A — NOT in the
* current thread [siblingN, root]. The old `messageId IN`
* filter would have excluded the file here. */
messageId: 'siblingA',
filename: 'output.csv',
filepath: '/uploads/output.csv',
type: 'text/csv',
bytes: 100,
context: FileContext.execute_code,
metadata: {
codeEnvRef: {
kind: 'user',
id: userId.toString(),
storage_session_id: 'sess',
file_id: fileId,
},
},
});

const files = await fileMethods.getCodeGeneratedFiles(conversationId, [fileId]);
expect(files).toHaveLength(1);
expect(files[0].file_id).toBe(fileId);
});

it('returns empty when threadFileIds is missing or empty', async () => {
const conversationId = uuidv4();
expect(await fileMethods.getCodeGeneratedFiles(conversationId)).toEqual([]);
expect(await fileMethods.getCodeGeneratedFiles(conversationId, [])).toEqual([]);
});

it('does not cross conversation boundaries even with matching file_id', async () => {
const userId = new mongoose.Types.ObjectId();
const fileId = uuidv4();

await fileMethods.createFile({
file_id: fileId,
user: userId,
conversationId: 'other-conv',
messageId: 'msg-creator',
filename: 'output.csv',
filepath: '/uploads/output.csv',
type: 'text/csv',
bytes: 100,
context: FileContext.execute_code,
metadata: {
codeEnvRef: {
kind: 'user',
id: userId.toString(),
storage_session_id: 'sess',
file_id: fileId,
},
},
});

const files = await fileMethods.getCodeGeneratedFiles('this-conv', [fileId]);
expect(files).toEqual([]);
});

it('excludes non-execute_code files even when file_id matches', async () => {
/* `tool_resources.execute_code.file_ids` is the source of
* threadFileIds, but `messages.files[]` includes files of
Comment on lines +435 to +436
* every context. The `context: execute_code` filter prevents
* a user-uploaded chat file from being mistakenly fetched via
* this function (it'd go through getUserCodeFiles instead). */
const userId = new mongoose.Types.ObjectId();
const conversationId = uuidv4();
const fileId = uuidv4();

await fileMethods.createFile({
file_id: fileId,
user: userId,
conversationId,
messageId: 'msg-1',
filename: 'note.txt',
filepath: '/uploads/note.txt',
type: 'text/plain',
bytes: 100,
context: FileContext.message_attachment,
metadata: {
codeEnvRef: {
kind: 'user',
id: userId.toString(),
storage_session_id: 'sess',
file_id: fileId,
},
},
});

const files = await fileMethods.getCodeGeneratedFiles(conversationId, [fileId]);
expect(files).toEqual([]);
});
});

describe('updateFile', () => {
it('should update file data and remove TTL', async () => {
const fileId = uuidv4();
Expand Down
Loading
Loading