Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions api/config/__tests__/parsers.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
jest.unmock('winston');

const { formatConsoleMeta, redactMessage, debugTraverse } = jest.requireActual('../parsers');
const SPLAT_SYMBOL = Symbol.for('splat');

describe('formatConsoleMeta', () => {
it('returns empty string when there is no user metadata', () => {
expect(
formatConsoleMeta({
level: 'error',
message: 'oops',
timestamp: '2026-04-18 02:25:22',
}),
).toBe('');
});

it('serializes user-supplied metadata keys', () => {
const meta = formatConsoleMeta({
level: 'error',
message: '[agents:summarize] Summarization LLM call failed',
timestamp: '2026-04-18 02:25:22',
provider: 'azureOpenAI',
model: 'gpt-5.4-mini',
messagesToRefineCount: 42,
});

expect(meta).toContain('"provider":"azureOpenAI"');
expect(meta).toContain('"model":"gpt-5.4-mini"');
expect(meta).toContain('"messagesToRefineCount":42');
});

it('ignores reserved winston keys and underscore-prefixed internals', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'boom',
timestamp: 'ts',
splat: [1, 2],
_internal: 'skip',
userField: 'keep',
});

expect(meta).toBe('{"userField":"keep"}');
});

it('drops empty, null, undefined, function, and symbol values', () => {
const meta = formatConsoleMeta({
level: 'warn',
message: 'noise',
timestamp: 'ts',
empty: '',
nullish: null,
undef: undefined,
fn: () => 1,
sym: Symbol('x'),
kept: 'yes',
});

expect(meta).toBe('{"kept":"yes"}');
});

it('truncates very long string values to avoid console spam', () => {
const longString = 'x'.repeat(5000);
const meta = formatConsoleMeta({
level: 'error',
message: 'long',
timestamp: 'ts',
errorStack: longString,
});

expect(meta.length).toBeLessThan(longString.length);
expect(meta).toContain('...');
});

it('gracefully handles circular objects', () => {
const circular = {};
circular.self = circular;
const meta = formatConsoleMeta({
level: 'error',
message: 'circular',
timestamp: 'ts',
circular,
});

expect(meta).toBe('');
});

it('redacts sensitive patterns inside string metadata values', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'leak test',
timestamp: 'ts',
openaiKey: 'sk-abc123def456',
auth: 'Bearer eyJhbGciOi...tokenvalue',
google: 'https://example.com/?key=AIzaSyXX',
});

expect(meta).not.toContain('sk-abc123def456');
expect(meta).not.toContain('eyJhbGciOi...tokenvalue');
expect(meta).not.toContain('AIzaSyXX');
expect(meta).toContain('sk-[REDACTED]');
expect(meta).toContain('Bearer [REDACTED]');
expect(meta).toContain('key=[REDACTED]');
});

it('redacts multiple occurrences of the same pattern in one value', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'two keys',
timestamp: 'ts',
combined: 'first sk-aaa and then sk-bbb',
});

expect(meta).not.toContain('sk-aaa');
expect(meta).not.toContain('sk-bbb');
expect(meta.match(/sk-\[REDACTED\]/g)?.length).toBe(2);
});
});

describe('redactMessage', () => {
it('redacts sk- keys that are not at line start (inside JSON-like text)', () => {
const input = '{"apiKey":"sk-abc123"}';
expect(redactMessage(input)).toBe('{"apiKey":"sk-[REDACTED]"}');
});

it('redacts all sk- occurrences in a single pass', () => {
const input = 'sk-one sk-two sk-three';
expect(redactMessage(input)).toBe('sk-[REDACTED] sk-[REDACTED] sk-[REDACTED]');
});

it('trims redacted output when trimLength is provided', () => {
const input = 'Bearer supersecretvalue';
expect(redactMessage(input, 10)).toBe('Bearer [RE...');
});

it('returns empty string for falsy input', () => {
expect(redactMessage('')).toBe('');
expect(redactMessage(undefined)).toBe('');
});

it('does not redact ordinary words that contain "sk-" inside them', () => {
expect(redactMessage('task-runner failed')).toBe('task-runner failed');
expect(redactMessage('mask-value computed')).toBe('mask-value computed');
expect(redactMessage('desk-lamp is on')).toBe('desk-lamp is on');
});

it('does not redact words that contain "key=" inside them', () => {
expect(redactMessage('monkey=10 bananas')).toBe('monkey=10 bananas');
});

it('still redacts standalone sk- keys at word boundaries', () => {
expect(redactMessage('token: sk-abc123def')).toBe('token: sk-[REDACTED]');
expect(redactMessage('"sk-abc123def"')).toBe('"sk-[REDACTED]"');
});
});

describe('debugTraverse', () => {
const runFormatter = (info) => {
const transformed = debugTraverse.transform(info);
const MESSAGE = Symbol.for('message');
if (transformed && typeof transformed === 'object') {
return transformed[MESSAGE] ?? String(transformed);
}
return String(transformed);
};

const buildInfo = (level, meta) => {
const info = {
level,
message: 'test',
timestamp: 'ts',
...meta,
};
info[SPLAT_SYMBOL] = [meta];
return info;
};

it('redacts sensitive strings in metadata for error level', () => {
const out = runFormatter(buildInfo('error', { auth: 'Bearer eyJabc123', openai: 'sk-abc123' }));
expect(out).not.toContain('eyJabc123');
expect(out).not.toContain('sk-abc123');
expect(out).toContain('Bearer [REDACTED]');
expect(out).toContain('sk-[REDACTED]');
});

it('redacts sensitive strings in metadata for warn level', () => {
const out = runFormatter(buildInfo('warn', { header: 'Bearer supersecrettoken' }));
expect(out).not.toContain('supersecrettoken');
expect(out).toContain('Bearer [REDACTED]');
});

it('preserves debug-level metadata unmodified (existing behavior)', () => {
const out = runFormatter(buildInfo('debug', { someField: 'not-sensitive' }));
expect(out).toContain('not-sensitive');
});
});
119 changes: 85 additions & 34 deletions api/config/parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,15 @@ const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENG
const DEBUG_MESSAGE_LENGTH = parseInt(process.env.DEBUG_MESSAGE_LENGTH) || 150;

const sensitiveKeys = [
/^(sk-)[^\s]+/, // OpenAI API key pattern
/(Bearer )[^\s]+/, // Header: Bearer token pattern
/(api-key:? )[^\s]+/, // Header: API key pattern
/(key=)[^\s]+/, // URL query param: sensitive key pattern (Google)
// OpenAI API key: `sk-` at a word boundary, followed by the documented
// charset for keys. `\b` keeps `task-runner`, `mask-value`, etc. from
// being mis-redacted.
/\b(sk-)[a-zA-Z0-9_-]+/g,
/\b(Bearer )[^\s"']+/g, // Header: Bearer token pattern
/\b(api-key:? )[^\s"']+/g, // Header: API key pattern
/\b(key=)[^\s"'&]+/g, // URL query param: sensitive key pattern (Google)
Comment on lines +15 to +17
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Redact ANSI-colored secrets without relying on word boundaries

The new \b anchors in the sensitive-token regexes can miss secrets once log text is colorized, because consoleFormat applies colorize({ all: true }) before calling redactMessage, and ANSI color prefixes end with m (a word character). In that flow, values that begin a colored segment (for example Bearer ... at the start of a warn message) may no longer match and can be printed unredacted.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 1e43d63. Extended redactFormat to cover warn in addition to error, operating on the raw pre-colorize info.message so the regex runs on plain text instead of contending with ANSI escapes. The in-printf redactMessage(line) stays as a backstop. Added regression tests for the new warn path and the info/debug no-op path.

];

/**
* Determines if a given value string is sensitive and returns matching regex patterns.
*
* @param {string} valueStr - The value string to check.
* @returns {Array<RegExp>} An array of regex patterns that match the value string.
*/
function getMatchingSensitivePatterns(valueStr) {
if (valueStr) {
// Filter and return all regex patterns that match the value string
return sensitiveKeys.filter((regex) => regex.test(valueStr));
}
return [];
}

/**
* Redacts sensitive information from a console message and trims it to a specified length if provided.
* @param {string} str - The console message to be redacted.
Expand All @@ -39,16 +28,16 @@ function redactMessage(str, trimLength) {
return '';
}

const patterns = getMatchingSensitivePatterns(str);
patterns.forEach((pattern) => {
str = str.replace(pattern, '$1[REDACTED]');
});
let redacted = str;
for (const pattern of sensitiveKeys) {
redacted = redacted.replace(pattern, '$1[REDACTED]');
}

if (trimLength !== undefined && str.length > trimLength) {
return `${str.substring(0, trimLength)}...`;
if (trimLength !== undefined && redacted.length > trimLength) {
return `${redacted.substring(0, trimLength)}...`;
}

return str;
return redacted;
}

/**
Expand Down Expand Up @@ -96,6 +85,64 @@ const condenseArray = (item) => {
return item;
};

const RESERVED_LOG_KEYS = new Set(['level', 'message', 'timestamp', 'splat']);

/**
* Extracts user-supplied metadata from a winston info object, filtering out
* reserved keys, internal underscore-prefixed keys, and non-serializable values.
*
* @param {Record<string, unknown>} source - The object to extract metadata from.
* @returns {Record<string, unknown> | undefined} - The extracted metadata, or undefined if empty.
*/
function extractMetaObject(source) {
if (source == null || typeof source !== 'object') {
return undefined;
}
const meta = {};
for (const key of Object.keys(source)) {
if (RESERVED_LOG_KEYS.has(key) || key.startsWith('_')) {
continue;
Comment on lines +117 to +119
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Filter numeric splat keys before serializing warn/error metadata

extractMetaObject currently accepts every enumerable key except a small reserved set, so when winston/logform has processed a warn/error call with a primitive extra arg (e.g. logger.warn('Unhandled step creation type:', step.type)), format.splat() can leave character-index keys ("0", "1", …) on info. Because this commit now emits warn/error metadata, those synthetic keys get logged as bogus metadata, producing noisy and misleading output in debug/file logs instead of useful context. Please exclude numeric/index-like keys (or otherwise guard against primitive splat artifacts) when collecting metadata.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 6bf9548. Added a numeric-key filter to extractMetaObject so synthetic splat artifacts (like the '0','1','2' keys that appear when a primitive is passed as the second log arg) don't get emitted as bogus metadata. Regression test covers the case.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep underscore metadata when filtering log fields

extractMetaObject now skips every key that starts with _, and debugTraverse routes debug/warn/error metadata through this filter. That drops legitimate diagnostic fields like Mongo _id (for example logger.debug('doc', {_id, ...})), which is a regression from the previous formatter behavior and makes log correlation harder during debugging incidents.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in d63742a. Dropped the underscore-prefix filter so legitimate fields like MongoDB _id pass through. Only reserved winston keys and numeric splat artifacts remain filtered.

}
const value = source[key];
if (value === undefined || value === null || value === '') {
continue;
}
if (typeof value === 'function' || typeof value === 'symbol') {
continue;
}
meta[key] = value;
}
return Object.keys(meta).length > 0 ? meta : undefined;
}

/**
* Formats the metadata portion of a winston info object as a compact
* single-line JSON trailer, suitable for appending to the console message.
* Returns an empty string when there is no meaningful metadata.
*
* @param {Record<string, unknown>} info - The winston info object.
* @returns {string} - The serialized metadata, or an empty string.
*/
function formatConsoleMeta(info) {
const meta = extractMetaObject(info);
if (!meta) {
return '';
}
try {
return JSON.stringify(meta, (_key, value) => {
if (typeof value !== 'string') {
return value;
}
const safe = redactMessage(value);
return safe.length > CONSOLE_JSON_STRING_LENGTH
? `${safe.substring(0, CONSOLE_JSON_STRING_LENGTH)}...`
: safe;
});
} catch {
return '';
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve non-circular metadata when JSON serialization fails

If any warn/error metadata value is circular (for example, an attached request/response object), JSON.stringify throws and the catch returns an empty string, which drops all metadata including useful scalar fields like provider, model, or status. This undermines the goal of making failures diagnosable, because one problematic field suppresses the entire trailer instead of just being omitted/safely serialized.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 58c2dc8. Added a WeakSet-based circular replacer (replaces repeated visits with [Circular]) and a per-field fallback if the whole-object serialization still fails β€” scalar fields like provider/model/status always land, and only the offending value becomes [Unserializable]. Regression test covers a shape with both scalars and a circular object.

}
}

/**
* Formats log messages for debugging purposes.
* - Truncates long strings within log messages.
Expand All @@ -120,28 +167,31 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met
}

let msg = `${timestamp} ${level}: ${truncateLongStrings(message?.trim(), DEBUG_MESSAGE_LENGTH)}`;
const levelStr = typeof level === 'string' ? level : String(level);
const isErrorOrWarn = levelStr.includes('error') || levelStr.includes('warn');
const finalize = (text) => (isErrorOrWarn ? redactMessage(text) : text);
try {
if (level !== 'debug') {
return msg;
if (level !== 'debug' && !isErrorOrWarn) {
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In debugTraverse, the gate uses level !== 'debug' even though you normalize levelStr right above. If level is non-string (or gets decorated in the future), this check can incorrectly treat debug logs as non-debug and skip metadata printing. Use the normalized levelStr for the comparison (or normalize once and only use that variable) to keep the gating consistent with the includes() checks.

Suggested change
if (level !== 'debug' && !isErrorOrWarn) {
if (levelStr !== 'debug' && !isErrorOrWarn) {

Copilot uses AI. Check for mistakes.
return finalize(msg);
}

if (!metadata) {
return msg;
return finalize(msg);
}

const debugValue = metadata[SPLAT_SYMBOL]?.[0];
const debugValue = metadata[SPLAT_SYMBOL]?.[0] ?? extractMetaObject(metadata);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ignore consumed splat args for warn/error traversal

When warn/error levels now enter debugTraverse, this line prefers metadata[Symbol.for('splat')][0] even if it is just a consumed printf argument string (for example logger.warn('failed for %s', tenant, { provider }) after format.splat()). In that case the formatter appends the tenant a second time (failed for tenant tenant) and never surfaces the structured metadata object, which regresses debug-file/DEBUG_CONSOLE log quality in the winston.format.combine(fileFormat, debugTraverse) path.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in bccbf11. Swapped the preference order in debugTraverse: it now tries the structured metadata object (extractMetaObject) first and only falls back to SPLAT[0] if nothing else is available. That way consumed printf args don't get appended a second time, and the real metadata object still surfaces. Added a regression test for the logger.warn('failed for %s', tenant, { provider }) shape.


if (!debugValue) {
return msg;
return finalize(msg);
}

if (debugValue && Array.isArray(debugValue)) {
msg += `\n${JSON.stringify(debugValue.map(condenseArray))}`;
return msg;
return finalize(msg);
}

if (typeof debugValue !== 'object') {
return (msg += ` ${debugValue}`);
return finalize((msg += ` ${debugValue}`));
}

msg += '\n{';
Expand Down Expand Up @@ -182,9 +232,9 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met
});

msg += '\n}';
return msg;
return finalize(msg);
} catch (e) {
return (msg += `\n[LOGGER PARSING ERROR] ${e.message}`);
return finalize((msg += `\n[LOGGER PARSING ERROR] ${e.message}`));
}
});

Expand Down Expand Up @@ -229,4 +279,5 @@ module.exports = {
redactMessage,
debugTraverse,
jsonTruncateFormat,
formatConsoleMeta,
};
Loading
Loading