diff --git a/api/config/__tests__/parsers.spec.js b/api/config/__tests__/parsers.spec.js new file mode 100644 index 000000000000..f54675ce3aeb --- /dev/null +++ b/api/config/__tests__/parsers.spec.js @@ -0,0 +1,358 @@ +jest.unmock('winston'); + +const { formatConsoleMeta, redactMessage, redactFormat, debugTraverse } = + jest.requireActual('../parsers'); +const SPLAT_SYMBOL = Symbol.for('splat'); + +describe('formatConsoleMeta', () => { + it('returns empty string when there is no user metadata', () => { + expect( + formatConsoleMeta({ + level: 'error', + message: 'oops', + timestamp: '2026-04-18 02:25:22', + }), + ).toBe(''); + }); + + it('serializes user-supplied metadata keys', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: '[agents:summarize] Summarization LLM call failed', + timestamp: '2026-04-18 02:25:22', + provider: 'azureOpenAI', + model: 'gpt-5.4-mini', + messagesToRefineCount: 42, + }); + + expect(meta).toContain('"provider":"azureOpenAI"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('"messagesToRefineCount":42'); + }); + + it('ignores reserved winston keys but preserves legitimate fields like _id', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'boom', + timestamp: 'ts', + splat: [1, 2], + _id: '507f191e810c19729de860ea', + userField: 'keep', + }); + + expect(meta).toContain('"_id":"507f191e810c19729de860ea"'); + expect(meta).toContain('"userField":"keep"'); + expect(meta).not.toContain('"splat"'); + }); + + it('drops numeric-index-like keys (splat artifacts from primitive args)', () => { + const meta = formatConsoleMeta({ + level: 'warn', + message: 'Unhandled step:', + timestamp: 'ts', + 0: 'f', + 1: 'o', + 2: 'o', + realField: 'real', + }); + + expect(meta).toBe('{"realField":"real"}'); + }); + + it('drops empty, null, undefined, function, and symbol values', () => { + const meta = formatConsoleMeta({ + level: 'warn', + message: 'noise', + timestamp: 'ts', + empty: '', + nullish: null, + undef: undefined, + fn: () => 1, + sym: Symbol('x'), + kept: 'yes', + }); + + expect(meta).toBe('{"kept":"yes"}'); + }); + + it('truncates very long string values to avoid console spam', () => { + const longString = 'x'.repeat(5000); + const meta = formatConsoleMeta({ + level: 'error', + message: 'long', + timestamp: 'ts', + errorStack: longString, + }); + + expect(meta.length).toBeLessThan(longString.length); + expect(meta).toContain('...'); + }); + + it('preserves non-circular fields when one value is circular', () => { + const circular = {}; + circular.self = circular; + const meta = formatConsoleMeta({ + level: 'error', + message: 'circular', + timestamp: 'ts', + provider: 'openai', + model: 'gpt-5.4-mini', + circular, + }); + + expect(meta).toContain('"provider":"openai"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('[Circular]'); + }); + + it('falls back to per-field serialization when a value toJSON throws', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'crash', + timestamp: 'ts', + provider: 'azure', + model: 'gpt-5.4-mini', + broken: { + toJSON() { + throw new Error('nope'); + }, + }, + }); + + expect(meta).toContain('"provider":"azure"'); + expect(meta).toContain('"model":"gpt-5.4-mini"'); + expect(meta).toContain('[Unserializable]'); + }); + + it('redacts sensitive strings nested inside metadata objects', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'nested leak', + timestamp: 'ts', + config: { + headers: { + authorization: 'Bearer eyJhbGciOi.nestedTokenValue', + }, + query: 'https://example.com/?key=AIzaNested', + }, + openaiKey: 'sk-outerKey123', + }); + + expect(meta).not.toContain('eyJhbGciOi.nestedTokenValue'); + expect(meta).not.toContain('AIzaNested'); + expect(meta).not.toContain('sk-outerKey123'); + expect(meta).toContain('Bearer [REDACTED]'); + expect(meta).toContain('key=[REDACTED]'); + expect(meta).toContain('sk-[REDACTED]'); + }); + + it('redacts the Azure-style mixed-case Api-Key header', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'azure call', + timestamp: 'ts', + headers: 'Api-Key: 0123456789abcdef', + }); + + expect(meta).not.toContain('0123456789abcdef'); + expect(meta).toContain('Api-Key: [REDACTED]'); + }); + + it('redacts sensitive patterns inside string metadata values', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'leak test', + timestamp: 'ts', + openaiKey: 'sk-abc123def456', + auth: 'Bearer eyJhbGciOi...tokenvalue', + google: 'https://example.com/?key=AIzaSyXX', + }); + + expect(meta).not.toContain('sk-abc123def456'); + expect(meta).not.toContain('eyJhbGciOi...tokenvalue'); + expect(meta).not.toContain('AIzaSyXX'); + expect(meta).toContain('sk-[REDACTED]'); + expect(meta).toContain('Bearer [REDACTED]'); + expect(meta).toContain('key=[REDACTED]'); + }); + + it('redacts multiple occurrences of the same pattern in one value', () => { + const meta = formatConsoleMeta({ + level: 'error', + message: 'two keys', + timestamp: 'ts', + combined: 'first sk-aaa and then sk-bbb', + }); + + expect(meta).not.toContain('sk-aaa'); + expect(meta).not.toContain('sk-bbb'); + expect(meta.match(/sk-\[REDACTED\]/g)?.length).toBe(2); + }); +}); + +describe('redactMessage', () => { + it('redacts sk- keys that are not at line start (inside JSON-like text)', () => { + const input = '{"apiKey":"sk-abc123"}'; + expect(redactMessage(input)).toBe('{"apiKey":"sk-[REDACTED]"}'); + }); + + it('redacts all sk- occurrences in a single pass', () => { + const input = 'sk-one sk-two sk-three'; + expect(redactMessage(input)).toBe('sk-[REDACTED] sk-[REDACTED] sk-[REDACTED]'); + }); + + it('trims redacted output when trimLength is provided', () => { + const input = 'Bearer supersecretvalue'; + expect(redactMessage(input, 10)).toBe('Bearer [RE...'); + }); + + it('returns empty string for falsy input', () => { + expect(redactMessage('')).toBe(''); + expect(redactMessage(undefined)).toBe(''); + }); + + it('does not redact ordinary words that contain "sk-" inside them', () => { + expect(redactMessage('task-runner failed')).toBe('task-runner failed'); + expect(redactMessage('mask-value computed')).toBe('mask-value computed'); + expect(redactMessage('desk-lamp is on')).toBe('desk-lamp is on'); + }); + + it('does not redact words that contain "key=" inside them', () => { + expect(redactMessage('monkey=10 bananas')).toBe('monkey=10 bananas'); + }); + + it('still redacts standalone sk- keys at word boundaries', () => { + expect(redactMessage('token: sk-abc123def')).toBe('token: sk-[REDACTED]'); + expect(redactMessage('"sk-abc123def"')).toBe('"sk-[REDACTED]"'); + }); +}); + +describe('redactFormat', () => { + const runFormat = (info) => redactFormat().transform(info) || info; + + it('redacts info.message for error level before any colorize step runs', () => { + const info = runFormat({ level: 'error', message: 'Bearer secretvalue' }); + expect(info.message).toBe('Bearer [REDACTED]'); + }); + + it('redacts info.message for warn level too (avoids ANSI boundary issues later)', () => { + const info = runFormat({ level: 'warn', message: 'apiKey=sk-abc123def' }); + expect(info.message).toContain('sk-[REDACTED]'); + }); + + it('leaves info.message untouched for info and debug levels', () => { + const infoInfo = runFormat({ level: 'info', message: 'Bearer looksSensitive' }); + expect(infoInfo.message).toBe('Bearer looksSensitive'); + + const infoDebug = runFormat({ level: 'debug', message: 'Bearer looksSensitive' }); + expect(infoDebug.message).toBe('Bearer looksSensitive'); + }); +}); + +describe('debugTraverse', () => { + const runFormatter = (info) => { + const transformed = debugTraverse.transform(info); + const MESSAGE = Symbol.for('message'); + if (transformed && typeof transformed === 'object') { + return transformed[MESSAGE] ?? String(transformed); + } + return String(transformed); + }; + + const buildInfo = (level, meta) => { + const info = { + level, + message: 'test', + timestamp: 'ts', + ...meta, + }; + info[SPLAT_SYMBOL] = [meta]; + return info; + }; + + it('redacts sensitive strings in metadata for error level', () => { + const out = runFormatter(buildInfo('error', { auth: 'Bearer eyJabc123', openai: 'sk-abc123' })); + expect(out).not.toContain('eyJabc123'); + expect(out).not.toContain('sk-abc123'); + expect(out).toContain('Bearer [REDACTED]'); + expect(out).toContain('sk-[REDACTED]'); + }); + + it('redacts sensitive strings in metadata for warn level', () => { + const out = runFormatter(buildInfo('warn', { header: 'Bearer supersecrettoken' })); + expect(out).not.toContain('supersecrettoken'); + expect(out).toContain('Bearer [REDACTED]'); + }); + + it('preserves debug-level metadata unmodified (existing behavior)', () => { + const out = runFormatter(buildInfo('debug', { someField: 'not-sensitive' })); + expect(out).toContain('not-sensitive'); + }); + + it('prefers structured metadata over a consumed printf arg in SPLAT[0]', () => { + const info = { + level: 'warn', + message: 'failed for tenant-7', + timestamp: 'ts', + provider: 'openai', + [SPLAT_SYMBOL]: ['tenant-7', { provider: 'openai' }], + }; + const out = runFormatter(info); + expect(out).toContain('openai'); + const tenantMatches = out.match(/tenant-7/g) ?? []; + expect(tenantMatches.length).toBeLessThanOrEqual(1); + }); + + it('does not duplicate a consumed %s arg when there is no structured metadata', () => { + const info = { + level: 'warn', + message: 'failed for tenant-7', + timestamp: 'ts', + [SPLAT_SYMBOL]: ['tenant-7'], + }; + const out = runFormatter(info); + const tenantMatches = out.match(/tenant-7/g) ?? []; + expect(tenantMatches.length).toBe(1); + }); + + it('omits numeric splat-artifact keys from the traversed output', () => { + const info = { + level: 'error', + message: 'boom', + timestamp: 'ts', + 0: 'x', + 1: 'y', + realField: 'keep', + [SPLAT_SYMBOL]: [{ realField: 'keep' }], + }; + const out = runFormatter(info); + expect(out).toContain('realField'); + expect(out).toContain('keep'); + expect(out).not.toMatch(/^\s*0:/m); + expect(out).not.toMatch(/^\s*1:/m); + }); + + it('surfaces unconsumed primitive SPLAT[0] (no %s in message) for debug level', () => { + const info = { + level: 'debug', + message: 'prefix:', + timestamp: 'ts', + [SPLAT_SYMBOL]: ['detailValueXYZ'], + }; + const out = runFormatter(info); + expect(out).toContain('detailValueXYZ'); + }); + + it('still surfaces array metadata in SPLAT[0] when no object is extracted', () => { + const info = { + level: 'debug', + message: 'list', + timestamp: 'ts', + [SPLAT_SYMBOL]: [['alpha', 'beta', 'gamma']], + }; + const out = runFormatter(info); + expect(out).toContain('alpha'); + expect(out).toContain('beta'); + expect(out).toContain('gamma'); + }); +}); diff --git a/api/config/parsers.js b/api/config/parsers.js index 46394a9ff8d5..111f3f6a7db6 100644 --- a/api/config/parsers.js +++ b/api/config/parsers.js @@ -8,25 +8,16 @@ const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENG const DEBUG_MESSAGE_LENGTH = parseInt(process.env.DEBUG_MESSAGE_LENGTH) || 150; const sensitiveKeys = [ - /^(sk-)[^\s]+/, // OpenAI API key pattern - /(Bearer )[^\s]+/, // Header: Bearer token pattern - /(api-key:? )[^\s]+/, // Header: API key pattern - /(key=)[^\s]+/, // URL query param: sensitive key pattern (Google) + // OpenAI API key: `sk-` at a word boundary, followed by the documented + // charset for keys. `\b` keeps `task-runner`, `mask-value`, etc. from + // being mis-redacted. + /\b(sk-)[a-zA-Z0-9_-]+/g, + /\b(Bearer )[^\s"']+/g, // Header: Bearer token pattern + /\b(api-key:? )[^\s"']+/gi, // Header: API key pattern (case-insensitive; covers `Api-Key:`, `API-KEY:`) + /\b(key=)[^\s"'&]+/g, // URL query param: sensitive key pattern (Google) ]; -/** - * Determines if a given value string is sensitive and returns matching regex patterns. - * - * @param {string} valueStr - The value string to check. - * @returns {Array} An array of regex patterns that match the value string. - */ -function getMatchingSensitivePatterns(valueStr) { - if (valueStr) { - // Filter and return all regex patterns that match the value string - return sensitiveKeys.filter((regex) => regex.test(valueStr)); - } - return []; -} +const NUMERIC_KEY_RE = /^\d+$/; /** * Redacts sensitive information from a console message and trims it to a specified length if provided. @@ -39,28 +30,35 @@ function redactMessage(str, trimLength) { return ''; } - const patterns = getMatchingSensitivePatterns(str); - patterns.forEach((pattern) => { - str = str.replace(pattern, '$1[REDACTED]'); - }); + let redacted = str; + for (const pattern of sensitiveKeys) { + redacted = redacted.replace(pattern, '$1[REDACTED]'); + } - if (trimLength !== undefined && str.length > trimLength) { - return `${str.substring(0, trimLength)}...`; + if (trimLength !== undefined && redacted.length > trimLength) { + return `${redacted.substring(0, trimLength)}...`; } - return str; + return redacted; } /** - * Redacts sensitive information from log messages if the log level is 'error'. + * Redacts sensitive information from log messages when the log level is + * `error` or `warn`. Runs on the raw `info.message` before any colorize / + * splat transforms so the sensitive-token regexes don't have to contend + * with ANSI escape sequences (whose trailing `m` would otherwise defeat + * `\b` anchors). + * * Note: Intentionally mutates the object. * @param {Object} info - The log information object. * @returns {Object} - The modified log information object. */ const redactFormat = winston.format((info) => { - if (info.level === 'error') { - info.message = redactMessage(info.message); - if (info[MESSAGE_SYMBOL]) { + if (info.level === 'error' || info.level === 'warn') { + if (typeof info.message === 'string') { + info.message = redactMessage(info.message); + } + if (typeof info[MESSAGE_SYMBOL] === 'string') { info[MESSAGE_SYMBOL] = redactMessage(info[MESSAGE_SYMBOL]); } } @@ -96,12 +94,120 @@ const condenseArray = (item) => { return item; }; +const RESERVED_LOG_KEYS = new Set(['level', 'message', 'timestamp', 'splat']); + +/** + * Extracts user-supplied metadata from a winston info object. Filters out: + * - Reserved winston keys (`level`, `message`, `timestamp`, `splat`). + * - Numeric-string keys (`"0"`, `"1"`, ...) that `format.splat()` can + * synthesize when a primitive is passed as an extra log argument. + * - Values that are undefined, null, empty strings, functions, or symbols. + * + * Underscore-prefixed keys are intentionally preserved so legitimate + * fields like MongoDB `_id` survive. + * + * @param {Record} source - The object to extract metadata from. + * @returns {Record | undefined} - The extracted metadata, or undefined if empty. + */ +function extractMetaObject(source) { + if (source == null || typeof source !== 'object') { + return undefined; + } + const meta = {}; + for (const key of Object.keys(source)) { + if (RESERVED_LOG_KEYS.has(key)) { + continue; + } + if (NUMERIC_KEY_RE.test(key)) { + continue; + } + const value = source[key]; + if (value === undefined || value === null || value === '') { + continue; + } + if (typeof value === 'function' || typeof value === 'symbol') { + continue; + } + meta[key] = value; + } + return Object.keys(meta).length > 0 ? meta : undefined; +} + +/** + * Formats the metadata portion of a winston info object as a compact + * single-line JSON trailer, suitable for appending to the console message. + * Returns an empty string when there is no meaningful metadata. + * + * @param {Record} info - The winston info object. + * @returns {string} - The serialized metadata, or an empty string. + */ +function formatConsoleMeta(info) { + const meta = extractMetaObject(info); + if (!meta) { + return ''; + } + const seen = new WeakSet(); + const replacer = (_key, value) => { + if (typeof value === 'string') { + const safe = redactMessage(value); + return safe.length > CONSOLE_JSON_STRING_LENGTH + ? `${safe.substring(0, CONSOLE_JSON_STRING_LENGTH)}...` + : safe; + } + if (value !== null && typeof value === 'object') { + if (seen.has(value)) { + return '[Circular]'; + } + seen.add(value); + } + return value; + }; + + try { + return JSON.stringify(meta, replacer); + } catch { + /* + * Fall back to per-field serialization: a single unserializable field + * shouldn't drop every other scalar in the trailer. Scalars are emitted + * as-is; values that still fail serialization are replaced with a + * placeholder so `provider`, `model`, etc. continue to surface. + */ + const parts = []; + for (const key of Object.keys(meta)) { + const perFieldSeen = new WeakSet(); + const perFieldReplacer = (k, value) => { + if (typeof value === 'string') { + return replacer(k, value); + } + if (value !== null && typeof value === 'object') { + if (perFieldSeen.has(value)) { + return '[Circular]'; + } + perFieldSeen.add(value); + } + return value; + }; + try { + parts.push(`${JSON.stringify(key)}:${JSON.stringify(meta[key], perFieldReplacer)}`); + } catch { + parts.push(`${JSON.stringify(key)}:"[Unserializable]"`); + } + } + return parts.length > 0 ? `{${parts.join(',')}}` : ''; + } +} + /** - * Formats log messages for debugging purposes. - * - Truncates long strings within log messages. - * - Condenses arrays by truncating long strings and objects as strings within array items. - * - Redacts sensitive information from log messages if the log level is 'error'. - * - Converts log information object to a formatted string. + * Formats log messages for file and debug-console transports. Three paths: + * - `warn` / `error`: append a compact single-line JSON metadata trailer + * (via `formatConsoleMeta`) and pass the full line through `redactMessage` + * so sensitive patterns are scrubbed. + * - `debug`: perform the detailed multi-line object traversal of + * `SPLAT_SYMBOL[0]`, with long-string truncation and array condensation. + * Redaction on this path is not applied here (debug-file consumers + * historically accept raw detail). + * - Other levels: return the truncated `" : "` + * line with no metadata. * * @param {Object} options - The options for formatting log messages. * @param {string} options.level - The log level. @@ -120,6 +226,23 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met } let msg = `${timestamp} ${level}: ${truncateLongStrings(message?.trim(), DEBUG_MESSAGE_LENGTH)}`; + const levelStr = typeof level === 'string' ? level : String(level); + const isErrorOrWarn = levelStr.includes('error') || levelStr.includes('warn'); + + /* + * Warn/error follow a simpler code path: append a single-line JSON + * metadata trailer (same shape as the console formatter) and pass the + * result through `redactMessage`. The complex object-traversal below is + * kept for debug level only, where detailed multi-line output is the + * intended behavior and its splat/interpolation interactions were + * already tolerated. + */ + if (isErrorOrWarn) { + const trailer = formatConsoleMeta(metadata); + const line = trailer ? `${msg} ${trailer}` : msg; + return redactMessage(line); + } + try { if (level !== 'debug') { return msg; @@ -229,4 +352,5 @@ module.exports = { redactMessage, debugTraverse, jsonTruncateFormat, + formatConsoleMeta, }; diff --git a/api/config/winston.js b/api/config/winston.js index 93b84f7c463e..c077cf5bf847 100644 --- a/api/config/winston.js +++ b/api/config/winston.js @@ -2,7 +2,13 @@ const path = require('path'); const fs = require('fs'); const winston = require('winston'); require('winston-daily-rotate-file'); -const { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat } = require('./parsers'); +const { + redactFormat, + redactMessage, + debugTraverse, + jsonTruncateFormat, + formatConsoleMeta, +} = require('./parsers'); /** * Determine the log directory. @@ -110,12 +116,16 @@ const consoleFormat = winston.format.combine( winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), // redactErrors(), winston.format.printf((info) => { - const message = `${info.timestamp} ${info.level}: ${info.message}`; - if (info.level.includes('error')) { - return redactMessage(message); + const base = `${info.timestamp} ${info.level}: ${info.message}`; + const isErrorOrWarn = info.level.includes('error') || info.level.includes('warn'); + + if (isErrorOrWarn) { + const metaTrailer = formatConsoleMeta(info); + const line = metaTrailer ? `${base} ${metaTrailer}` : base; + return redactMessage(line); } - return message; + return base; }), ); diff --git a/api/test/__mocks__/logger.js b/api/test/__mocks__/logger.js index 56fb28cbab51..62f9bee93a6d 100644 --- a/api/test/__mocks__/logger.js +++ b/api/test/__mocks__/logger.js @@ -56,5 +56,6 @@ jest.mock('~/config/parsers', () => { redactMessage: jest.fn(), redactFormat: jest.fn(), debugTraverse: jest.fn(), + formatConsoleMeta: jest.fn(() => ''), }; });