Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions api/config/__tests__/parsers.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
const { formatConsoleMeta, redactMessage } = jest.requireActual('../parsers');

describe('formatConsoleMeta', () => {
it('returns empty string when there is no user metadata', () => {
expect(
formatConsoleMeta({
level: 'error',
message: 'oops',
timestamp: '2026-04-18 02:25:22',
}),
).toBe('');
});

it('serializes user-supplied metadata keys', () => {
const meta = formatConsoleMeta({
level: 'error',
message: '[agents:summarize] Summarization LLM call failed',
timestamp: '2026-04-18 02:25:22',
provider: 'azureOpenAI',
model: 'gpt-5.4-mini',
messagesToRefineCount: 42,
});

expect(meta).toContain('"provider":"azureOpenAI"');
expect(meta).toContain('"model":"gpt-5.4-mini"');
expect(meta).toContain('"messagesToRefineCount":42');
});

it('ignores reserved winston keys and underscore-prefixed internals', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'boom',
timestamp: 'ts',
splat: [1, 2],
_internal: 'skip',
userField: 'keep',
});

expect(meta).toBe('{"userField":"keep"}');
});

it('drops empty, null, undefined, function, and symbol values', () => {
const meta = formatConsoleMeta({
level: 'warn',
message: 'noise',
timestamp: 'ts',
empty: '',
nullish: null,
undef: undefined,
fn: () => 1,
sym: Symbol('x'),
kept: 'yes',
});

expect(meta).toBe('{"kept":"yes"}');
});

it('truncates very long string values to avoid console spam', () => {
const longString = 'x'.repeat(5000);
const meta = formatConsoleMeta({
level: 'error',
message: 'long',
timestamp: 'ts',
errorStack: longString,
});

expect(meta.length).toBeLessThan(longString.length);
expect(meta).toContain('...');
});

it('gracefully handles circular objects', () => {
const circular = {};
circular.self = circular;
const meta = formatConsoleMeta({
level: 'error',
message: 'circular',
timestamp: 'ts',
circular,
});

expect(meta).toBe('');
});

it('redacts sensitive patterns inside string metadata values', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'leak test',
timestamp: 'ts',
openaiKey: 'sk-abc123def456',
auth: 'Bearer eyJhbGciOi...tokenvalue',
google: 'https://example.com/?key=AIzaSyXX',
});

expect(meta).not.toContain('sk-abc123def456');
expect(meta).not.toContain('eyJhbGciOi...tokenvalue');
expect(meta).not.toContain('AIzaSyXX');
expect(meta).toContain('sk-[REDACTED]');
expect(meta).toContain('Bearer [REDACTED]');
expect(meta).toContain('key=[REDACTED]');
});

it('redacts multiple occurrences of the same pattern in one value', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'two keys',
timestamp: 'ts',
combined: 'first sk-aaa and then sk-bbb',
});

expect(meta).not.toContain('sk-aaa');
expect(meta).not.toContain('sk-bbb');
expect(meta.match(/sk-\[REDACTED\]/g)?.length).toBe(2);
});
});

describe('redactMessage', () => {
it('redacts sk- keys that are not at line start (inside JSON-like text)', () => {
const input = '{"apiKey":"sk-abc123"}';
expect(redactMessage(input)).toBe('{"apiKey":"sk-[REDACTED]"}');
});

it('redacts all sk- occurrences in a single pass', () => {
const input = 'sk-one sk-two sk-three';
expect(redactMessage(input)).toBe('sk-[REDACTED] sk-[REDACTED] sk-[REDACTED]');
});

it('trims redacted output when trimLength is provided', () => {
const input = 'Bearer supersecretvalue';
expect(redactMessage(input, 10)).toBe('Bearer [RE...');
});

it('returns empty string for falsy input', () => {
expect(redactMessage('')).toBe('');
expect(redactMessage(undefined)).toBe('');
});
});
101 changes: 74 additions & 27 deletions api/config/parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,12 @@ const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENG
const DEBUG_MESSAGE_LENGTH = parseInt(process.env.DEBUG_MESSAGE_LENGTH) || 150;

const sensitiveKeys = [
/^(sk-)[^\s]+/, // OpenAI API key pattern
/(Bearer )[^\s]+/, // Header: Bearer token pattern
/(api-key:? )[^\s]+/, // Header: API key pattern
/(key=)[^\s]+/, // URL query param: sensitive key pattern (Google)
/(sk-)[^\s"']+/g, // OpenAI API key pattern (also catches keys embedded in JSON/quoted strings)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Anchor sk- secret regex to avoid redacting normal words

The new pattern /(sk-)[^\s"']+/g now matches any sk- substring inside ordinary text, so non-secret messages like task-runner failed or mask-token are rewritten to task-[REDACTED].... Because redactMessage is used by both file/error redaction and warn/error console formatting, this introduces broad false-positive redaction that can hide real log content and make troubleshooting harder. Restrict the match to actual token boundaries (or known key formats) instead of any in-word sk- occurrence.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in fdb6b36. Anchored every sensitive pattern with \b, constrained the OpenAI-key charset to the documented alphabet, and added regression tests for both false positives (task-runner, monkey=10) and still-firing true positives.

/(Bearer )[^\s"']+/g, // Header: Bearer token pattern
/(api-key:? )[^\s"']+/g, // Header: API key pattern
/(key=)[^\s"']+/g, // URL query param: sensitive key pattern (Google)
];

/**
* Determines if a given value string is sensitive and returns matching regex patterns.
*
* @param {string} valueStr - The value string to check.
* @returns {Array<RegExp>} An array of regex patterns that match the value string.
*/
function getMatchingSensitivePatterns(valueStr) {
if (valueStr) {
// Filter and return all regex patterns that match the value string
return sensitiveKeys.filter((regex) => regex.test(valueStr));
}
return [];
}

/**
* Redacts sensitive information from a console message and trims it to a specified length if provided.
* @param {string} str - The console message to be redacted.
Expand All @@ -39,16 +25,16 @@ function redactMessage(str, trimLength) {
return '';
}

const patterns = getMatchingSensitivePatterns(str);
patterns.forEach((pattern) => {
str = str.replace(pattern, '$1[REDACTED]');
});
let redacted = str;
for (const pattern of sensitiveKeys) {
redacted = redacted.replace(pattern, '$1[REDACTED]');
}

if (trimLength !== undefined && str.length > trimLength) {
return `${str.substring(0, trimLength)}...`;
if (trimLength !== undefined && redacted.length > trimLength) {
return `${redacted.substring(0, trimLength)}...`;
}

return str;
return redacted;
}

/**
Expand Down Expand Up @@ -96,6 +82,64 @@ const condenseArray = (item) => {
return item;
};

const RESERVED_LOG_KEYS = new Set(['level', 'message', 'timestamp', 'splat']);

/**
* Extracts user-supplied metadata from a winston info object, filtering out
* reserved keys, internal underscore-prefixed keys, and non-serializable values.
*
* @param {Record<string, unknown>} source - The object to extract metadata from.
* @returns {Record<string, unknown> | undefined} - The extracted metadata, or undefined if empty.
*/
function extractMetaObject(source) {
if (source == null || typeof source !== 'object') {
return undefined;
}
const meta = {};
for (const key of Object.keys(source)) {
if (RESERVED_LOG_KEYS.has(key) || key.startsWith('_')) {
continue;
Comment on lines +117 to +119
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Filter numeric splat keys before serializing warn/error metadata

extractMetaObject currently accepts every enumerable key except a small reserved set, so when winston/logform has processed a warn/error call with a primitive extra arg (e.g. logger.warn('Unhandled step creation type:', step.type)), format.splat() can leave character-index keys ("0", "1", …) on info. Because this commit now emits warn/error metadata, those synthetic keys get logged as bogus metadata, producing noisy and misleading output in debug/file logs instead of useful context. Please exclude numeric/index-like keys (or otherwise guard against primitive splat artifacts) when collecting metadata.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 6bf9548. Added a numeric-key filter to extractMetaObject so synthetic splat artifacts (like the '0','1','2' keys that appear when a primitive is passed as the second log arg) don't get emitted as bogus metadata. Regression test covers the case.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep underscore metadata when filtering log fields

extractMetaObject now skips every key that starts with _, and debugTraverse routes debug/warn/error metadata through this filter. That drops legitimate diagnostic fields like Mongo _id (for example logger.debug('doc', {_id, ...})), which is a regression from the previous formatter behavior and makes log correlation harder during debugging incidents.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in d63742a. Dropped the underscore-prefix filter so legitimate fields like MongoDB _id pass through. Only reserved winston keys and numeric splat artifacts remain filtered.

}
const value = source[key];
if (value === undefined || value === null || value === '') {
continue;
}
if (typeof value === 'function' || typeof value === 'symbol') {
continue;
}
meta[key] = value;
}
return Object.keys(meta).length > 0 ? meta : undefined;
}

/**
* Formats the metadata portion of a winston info object as a compact
* single-line JSON trailer, suitable for appending to the console message.
* Returns an empty string when there is no meaningful metadata.
*
* @param {Record<string, unknown>} info - The winston info object.
* @returns {string} - The serialized metadata, or an empty string.
*/
function formatConsoleMeta(info) {
const meta = extractMetaObject(info);
if (!meta) {
return '';
}
try {
return JSON.stringify(meta, (_key, value) => {
if (typeof value !== 'string') {
return value;
}
const safe = redactMessage(value);
return safe.length > CONSOLE_JSON_STRING_LENGTH
? `${safe.substring(0, CONSOLE_JSON_STRING_LENGTH)}...`
: safe;
});
} catch {
return '';
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve non-circular metadata when JSON serialization fails

If any warn/error metadata value is circular (for example, an attached request/response object), JSON.stringify throws and the catch returns an empty string, which drops all metadata including useful scalar fields like provider, model, or status. This undermines the goal of making failures diagnosable, because one problematic field suppresses the entire trailer instead of just being omitted/safely serialized.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 58c2dc8. Added a WeakSet-based circular replacer (replaces repeated visits with [Circular]) and a per-field fallback if the whole-object serialization still fails β€” scalar fields like provider/model/status always land, and only the offending value becomes [Unserializable]. Regression test covers a shape with both scalars and a circular object.

}
}

/**
* Formats log messages for debugging purposes.
* - Truncates long strings within log messages.
Expand All @@ -121,15 +165,17 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met

let msg = `${timestamp} ${level}: ${truncateLongStrings(message?.trim(), DEBUG_MESSAGE_LENGTH)}`;
try {
if (level !== 'debug') {
const levelStr = typeof level === 'string' ? level : String(level);
const isErrorOrWarn = levelStr.includes('error') || levelStr.includes('warn');
if (level !== 'debug' && !isErrorOrWarn) {
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In debugTraverse, the gate uses level !== 'debug' even though you normalize levelStr right above. If level is non-string (or gets decorated in the future), this check can incorrectly treat debug logs as non-debug and skip metadata printing. Use the normalized levelStr for the comparison (or normalize once and only use that variable) to keep the gating consistent with the includes() checks.

Suggested change
if (level !== 'debug' && !isErrorOrWarn) {
if (levelStr !== 'debug' && !isErrorOrWarn) {

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Redact warn/error metadata before debug traversal

This new gate now sends warn/error records through debugTraverse, but that path appends leaf string metadata with truncateLongStrings(value) and never calls redactMessage, so sensitive values in metadata are written verbatim. With DEBUG_LOGGING defaulting to true, the debug file transport uses this formatter, which means payloads like { auth: 'Bearer ...' } or { openaiKey: 'sk-...' } are now persisted unredacted in debug logs; please redact metadata values in this branch before formatting them.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in c09d293. Apply redactMessage to the final debugTraverse output when level is warn/error. Debug-level behavior is preserved (matches prior art, which did not redact). Added regression tests covering all three cases.

return msg;
}

if (!metadata) {
return msg;
}

const debugValue = metadata[SPLAT_SYMBOL]?.[0];
const debugValue = metadata[SPLAT_SYMBOL]?.[0] ?? extractMetaObject(metadata);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ignore consumed splat args for warn/error traversal

When warn/error levels now enter debugTraverse, this line prefers metadata[Symbol.for('splat')][0] even if it is just a consumed printf argument string (for example logger.warn('failed for %s', tenant, { provider }) after format.splat()). In that case the formatter appends the tenant a second time (failed for tenant tenant) and never surfaces the structured metadata object, which regresses debug-file/DEBUG_CONSOLE log quality in the winston.format.combine(fileFormat, debugTraverse) path.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in bccbf11. Swapped the preference order in debugTraverse: it now tries the structured metadata object (extractMetaObject) first and only falls back to SPLAT[0] if nothing else is available. That way consumed printf args don't get appended a second time, and the real metadata object still surfaces. Added a regression test for the logger.warn('failed for %s', tenant, { provider }) shape.


if (!debugValue) {
return msg;
Expand Down Expand Up @@ -229,4 +275,5 @@ module.exports = {
redactMessage,
debugTraverse,
jsonTruncateFormat,
formatConsoleMeta,
};
27 changes: 22 additions & 5 deletions api/config/winston.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@ const path = require('path');
const fs = require('fs');
const winston = require('winston');
require('winston-daily-rotate-file');
const { redactFormat, redactMessage, debugTraverse, jsonTruncateFormat } = require('./parsers');
const {
redactFormat,
redactMessage,
debugTraverse,
jsonTruncateFormat,
formatConsoleMeta,
} = require('./parsers');

/**
* Determine the log directory.
Expand Down Expand Up @@ -110,12 +116,23 @@ const consoleFormat = winston.format.combine(
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
// redactErrors(),
winston.format.printf((info) => {
const message = `${info.timestamp} ${info.level}: ${info.message}`;
if (info.level.includes('error')) {
return redactMessage(message);
const base = `${info.timestamp} ${info.level}: ${info.message}`;
const isError = info.level.includes('error');
const isWarn = info.level.includes('warn');

let line = base;
if (isError || isWarn) {
const metaTrailer = formatConsoleMeta(info);
if (metaTrailer) {
line = `${base} ${metaTrailer}`;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Redact metadata values before appending warn/error output

Appending formatConsoleMeta(info) directly to warn/error lines can leak secrets from structured metadata (for example {"apiKey":"sk-..."}), because the subsequent redactMessage pass does not reliably catch plain sk- tokens inside JSON payloads (its OpenAI-key regex is anchored to the start of the line in api/config/parsers.js). This commit newly exposes metadata on console warn/error paths, so sensitive fields that were previously dropped can now be emitted unredacted.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in e288f7f. Two fixes: (1) dropped the ^ anchor on the sk- regex (it was dead code since every console line starts with a timestamp) and added /g so multi-key JSON payloads get fully scrubbed; (2) moved redaction into formatConsoleMeta so every string value is run through redactMessage before JSON serialization, for defense in depth. Added regression tests for both cases.

}
}

return message;
if (isError || isWarn) {
return redactMessage(line);
}

return line;
}),
);

Expand Down
1 change: 1 addition & 0 deletions api/test/__mocks__/logger.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,6 @@ jest.mock('~/config/parsers', () => {
redactMessage: jest.fn(),
redactFormat: jest.fn(),
debugTraverse: jest.fn(),
formatConsoleMeta: jest.fn(() => ''),
};
});
Loading