Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
"klona": "^2.0.6",
"librechat-data-provider": "*",
"lodash": "^4.17.23",
"mammoth": "^1.11.0",
"mathjs": "^15.1.0",
"meilisearch": "^0.38.0",
"memorystore": "^1.6.7",
Expand All @@ -102,6 +103,7 @@
"passport-jwt": "^4.0.1",
"passport-ldapauth": "^3.0.1",
"passport-local": "^1.0.0",
"pdfjs-dist": "^5.4.624",
"rate-limit-redis": "^4.2.0",
"sharp": "^0.33.5",
"tiktoken": "^1.0.15",
Expand All @@ -110,6 +112,7 @@
"undici": "^7.18.2",
"winston": "^3.11.0",
"winston-daily-rotate-file": "^5.0.0",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz",
"zod": "^3.22.4"
Comment on lines 103 to 116
Copy link

Copilot AI Feb 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pdfjs-dist is pinned here at ^5.4.530, but the workspace lockfile resolves pdfjs-dist to 5.4.624 (pulled in by @librechat/api). Keeping the version spec in sync with the resolved version (and with @librechat/api's peer range) will avoid multiple installs/deduping surprises and makes engine requirements more predictable.

Copilot uses AI. Check for mistakes.
},
"devDependencies": {
Expand Down
68 changes: 52 additions & 16 deletions api/server/services/Files/process.js
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,12 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
* @return {Promise<void>}
*/
const createTextFile = async ({ text, bytes, filepath, type = 'text/plain' }) => {
const textBytes = Buffer.byteLength(text, 'utf8');
if (textBytes > 15 * megabyte) {
throw new Error(
`Extracted text from "${file.originalname}" exceeds the 15MB storage limit (${Math.round(textBytes / megabyte)}MB). Try a shorter document.`,
);
}
const fileInfo = removeNullishValues({
text,
bytes,
Expand Down Expand Up @@ -553,29 +559,59 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {

const fileConfig = mergeFileConfig(appConfig.fileConfig);

const shouldUseOCR =
const documentParserMimeTypes = [
'application/pdf',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
];

const shouldUseConfiguredOCR =
appConfig?.ocr != null &&
fileConfig.checkType(file.mimetype, fileConfig.ocr?.supportedMimeTypes || []);

if (shouldUseOCR && !(await checkCapability(req, AgentCapabilities.ocr))) {
throw new Error('OCR capability is not enabled for Agents');
} else if (shouldUseOCR) {
const shouldUseDocumentParser =
!shouldUseConfiguredOCR && documentParserMimeTypes.includes(file.mimetype);

const shouldUseOCR = shouldUseConfiguredOCR || shouldUseDocumentParser;

const resolveDocumentText = async () => {
if (shouldUseConfiguredOCR) {
try {
const ocrStrategy = appConfig?.ocr?.strategy ?? FileSources.document_parser;
const { handleFileUpload } = getStrategyFunctions(ocrStrategy);
return await handleFileUpload({ req, file, loadAuthValues });
} catch (err) {
logger.error(
`[processAgentFileUpload] Configured OCR failed for "${file.originalname}", falling back to document_parser:`,
err,
);
}
}
try {
const { handleFileUpload: uploadOCR } = getStrategyFunctions(
appConfig?.ocr?.strategy ?? FileSources.mistral_ocr,
);
const {
text,
bytes,
filepath: ocrFileURL,
} = await uploadOCR({ req, file, loadAuthValues });
return await createTextFile({ text, bytes, filepath: ocrFileURL });
} catch (ocrError) {
const { handleFileUpload } = getStrategyFunctions(FileSources.document_parser);
return await handleFileUpload({ req, file, loadAuthValues });
} catch (err) {
logger.error(
`[processAgentFileUpload] OCR processing failed for file "${file.originalname}", falling back to text extraction:`,
ocrError,
`[processAgentFileUpload] Document parser failed for "${file.originalname}":`,
err,
);
}
};

if (shouldUseConfiguredOCR && !(await checkCapability(req, AgentCapabilities.ocr))) {
throw new Error('OCR capability is not enabled for Agents');
}

if (shouldUseOCR) {
const ocrResult = await resolveDocumentText();
if (ocrResult) {
const { text, bytes, filepath: ocrFileURL } = ocrResult;
return await createTextFile({ text, bytes, filepath: ocrFileURL });
}
throw new Error(
`Unable to extract text from "${file.originalname}". The document may be image-based and requires an OCR service to process.`,
);
}

const shouldUseSTT = fileConfig.checkType(
Expand Down
Loading