Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions api/config/__tests__/parsers.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
jest.unmock('winston');

const { formatConsoleMeta, redactMessage, debugTraverse } = jest.requireActual('../parsers');
const SPLAT_SYMBOL = Symbol.for('splat');

describe('formatConsoleMeta', () => {
it('returns empty string when there is no user metadata', () => {
expect(
formatConsoleMeta({
level: 'error',
message: 'oops',
timestamp: '2026-04-18 02:25:22',
}),
).toBe('');
});

it('serializes user-supplied metadata keys', () => {
const meta = formatConsoleMeta({
level: 'error',
message: '[agents:summarize] Summarization LLM call failed',
timestamp: '2026-04-18 02:25:22',
provider: 'azureOpenAI',
model: 'gpt-5.4-mini',
messagesToRefineCount: 42,
});

expect(meta).toContain('"provider":"azureOpenAI"');
expect(meta).toContain('"model":"gpt-5.4-mini"');
expect(meta).toContain('"messagesToRefineCount":42');
});

it('ignores reserved winston keys and underscore-prefixed internals', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'boom',
timestamp: 'ts',
splat: [1, 2],
_internal: 'skip',
userField: 'keep',
});

expect(meta).toBe('{"userField":"keep"}');
});

it('drops empty, null, undefined, function, and symbol values', () => {
const meta = formatConsoleMeta({
level: 'warn',
message: 'noise',
timestamp: 'ts',
empty: '',
nullish: null,
undef: undefined,
fn: () => 1,
sym: Symbol('x'),
kept: 'yes',
});

expect(meta).toBe('{"kept":"yes"}');
});

it('truncates very long string values to avoid console spam', () => {
const longString = 'x'.repeat(5000);
const meta = formatConsoleMeta({
level: 'error',
message: 'long',
timestamp: 'ts',
errorStack: longString,
});

expect(meta.length).toBeLessThan(longString.length);
expect(meta).toContain('...');
});

it('gracefully handles circular objects', () => {
const circular = {};
circular.self = circular;
const meta = formatConsoleMeta({
level: 'error',
message: 'circular',
timestamp: 'ts',
circular,
});

expect(meta).toBe('');
});

it('redacts sensitive patterns inside string metadata values', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'leak test',
timestamp: 'ts',
openaiKey: 'sk-abc123def456',
auth: 'Bearer eyJhbGciOi...tokenvalue',
google: 'https://example.com/?key=AIzaSyXX',
});

expect(meta).not.toContain('sk-abc123def456');
expect(meta).not.toContain('eyJhbGciOi...tokenvalue');
expect(meta).not.toContain('AIzaSyXX');
expect(meta).toContain('sk-[REDACTED]');
expect(meta).toContain('Bearer [REDACTED]');
expect(meta).toContain('key=[REDACTED]');
});

it('redacts multiple occurrences of the same pattern in one value', () => {
const meta = formatConsoleMeta({
level: 'error',
message: 'two keys',
timestamp: 'ts',
combined: 'first sk-aaa and then sk-bbb',
});

expect(meta).not.toContain('sk-aaa');
expect(meta).not.toContain('sk-bbb');
expect(meta.match(/sk-\[REDACTED\]/g)?.length).toBe(2);
});
});

describe('redactMessage', () => {
it('redacts sk- keys that are not at line start (inside JSON-like text)', () => {
const input = '{"apiKey":"sk-abc123"}';
expect(redactMessage(input)).toBe('{"apiKey":"sk-[REDACTED]"}');
});

it('redacts all sk- occurrences in a single pass', () => {
const input = 'sk-one sk-two sk-three';
expect(redactMessage(input)).toBe('sk-[REDACTED] sk-[REDACTED] sk-[REDACTED]');
});

it('trims redacted output when trimLength is provided', () => {
const input = 'Bearer supersecretvalue';
expect(redactMessage(input, 10)).toBe('Bearer [RE...');
});

it('returns empty string for falsy input', () => {
expect(redactMessage('')).toBe('');
expect(redactMessage(undefined)).toBe('');
});
});

describe('debugTraverse', () => {
const runFormatter = (info) => {
const transformed = debugTraverse.transform(info);
const MESSAGE = Symbol.for('message');
if (transformed && typeof transformed === 'object') {
return transformed[MESSAGE] ?? String(transformed);
}
return String(transformed);
};

const buildInfo = (level, meta) => {
const info = {
level,
message: 'test',
timestamp: 'ts',
...meta,
};
info[SPLAT_SYMBOL] = [meta];
return info;
};

it('redacts sensitive strings in metadata for error level', () => {
const out = runFormatter(buildInfo('error', { auth: 'Bearer eyJabc123', openai: 'sk-abc123' }));
expect(out).not.toContain('eyJabc123');
expect(out).not.toContain('sk-abc123');
expect(out).toContain('Bearer [REDACTED]');
expect(out).toContain('sk-[REDACTED]');
});

it('redacts sensitive strings in metadata for warn level', () => {
const out = runFormatter(buildInfo('warn', { header: 'Bearer supersecrettoken' }));
expect(out).not.toContain('supersecrettoken');
expect(out).toContain('Bearer [REDACTED]');
});

it('preserves debug-level metadata unmodified (existing behavior)', () => {
const out = runFormatter(buildInfo('debug', { someField: 'not-sensitive' }));
expect(out).toContain('not-sensitive');
});
});
116 changes: 82 additions & 34 deletions api/config/parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,12 @@ const CONSOLE_JSON_STRING_LENGTH = parseInt(process.env.CONSOLE_JSON_STRING_LENG
const DEBUG_MESSAGE_LENGTH = parseInt(process.env.DEBUG_MESSAGE_LENGTH) || 150;

const sensitiveKeys = [
/^(sk-)[^\s]+/, // OpenAI API key pattern
/(Bearer )[^\s]+/, // Header: Bearer token pattern
/(api-key:? )[^\s]+/, // Header: API key pattern
/(key=)[^\s]+/, // URL query param: sensitive key pattern (Google)
/(sk-)[^\s"']+/g, // OpenAI API key pattern (also catches keys embedded in JSON/quoted strings)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Anchor sk- secret regex to avoid redacting normal words

The new pattern /(sk-)[^\s"']+/g now matches any sk- substring inside ordinary text, so non-secret messages like task-runner failed or mask-token are rewritten to task-[REDACTED].... Because redactMessage is used by both file/error redaction and warn/error console formatting, this introduces broad false-positive redaction that can hide real log content and make troubleshooting harder. Restrict the match to actual token boundaries (or known key formats) instead of any in-word sk- occurrence.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in fdb6b36. Anchored every sensitive pattern with \b, constrained the OpenAI-key charset to the documented alphabet, and added regression tests for both false positives (task-runner, monkey=10) and still-firing true positives.

/(Bearer )[^\s"']+/g, // Header: Bearer token pattern
/(api-key:? )[^\s"']+/g, // Header: API key pattern
/(key=)[^\s"']+/g, // URL query param: sensitive key pattern (Google)
];

/**
* Determines if a given value string is sensitive and returns matching regex patterns.
*
* @param {string} valueStr - The value string to check.
* @returns {Array<RegExp>} An array of regex patterns that match the value string.
*/
function getMatchingSensitivePatterns(valueStr) {
if (valueStr) {
// Filter and return all regex patterns that match the value string
return sensitiveKeys.filter((regex) => regex.test(valueStr));
}
return [];
}

/**
* Redacts sensitive information from a console message and trims it to a specified length if provided.
* @param {string} str - The console message to be redacted.
Expand All @@ -39,16 +25,16 @@ function redactMessage(str, trimLength) {
return '';
}

const patterns = getMatchingSensitivePatterns(str);
patterns.forEach((pattern) => {
str = str.replace(pattern, '$1[REDACTED]');
});
let redacted = str;
for (const pattern of sensitiveKeys) {
redacted = redacted.replace(pattern, '$1[REDACTED]');
}

if (trimLength !== undefined && str.length > trimLength) {
return `${str.substring(0, trimLength)}...`;
if (trimLength !== undefined && redacted.length > trimLength) {
return `${redacted.substring(0, trimLength)}...`;
}

return str;
return redacted;
}

/**
Expand Down Expand Up @@ -96,6 +82,64 @@ const condenseArray = (item) => {
return item;
};

const RESERVED_LOG_KEYS = new Set(['level', 'message', 'timestamp', 'splat']);

/**
* Extracts user-supplied metadata from a winston info object, filtering out
* reserved keys, internal underscore-prefixed keys, and non-serializable values.
*
* @param {Record<string, unknown>} source - The object to extract metadata from.
* @returns {Record<string, unknown> | undefined} - The extracted metadata, or undefined if empty.
*/
function extractMetaObject(source) {
if (source == null || typeof source !== 'object') {
return undefined;
}
const meta = {};
for (const key of Object.keys(source)) {
if (RESERVED_LOG_KEYS.has(key) || key.startsWith('_')) {
continue;
Comment on lines +117 to +119
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Filter numeric splat keys before serializing warn/error metadata

extractMetaObject currently accepts every enumerable key except a small reserved set, so when winston/logform has processed a warn/error call with a primitive extra arg (e.g. logger.warn('Unhandled step creation type:', step.type)), format.splat() can leave character-index keys ("0", "1", …) on info. Because this commit now emits warn/error metadata, those synthetic keys get logged as bogus metadata, producing noisy and misleading output in debug/file logs instead of useful context. Please exclude numeric/index-like keys (or otherwise guard against primitive splat artifacts) when collecting metadata.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 6bf9548. Added a numeric-key filter to extractMetaObject so synthetic splat artifacts (like the '0','1','2' keys that appear when a primitive is passed as the second log arg) don't get emitted as bogus metadata. Regression test covers the case.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep underscore metadata when filtering log fields

extractMetaObject now skips every key that starts with _, and debugTraverse routes debug/warn/error metadata through this filter. That drops legitimate diagnostic fields like Mongo _id (for example logger.debug('doc', {_id, ...})), which is a regression from the previous formatter behavior and makes log correlation harder during debugging incidents.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in d63742a. Dropped the underscore-prefix filter so legitimate fields like MongoDB _id pass through. Only reserved winston keys and numeric splat artifacts remain filtered.

}
const value = source[key];
if (value === undefined || value === null || value === '') {
continue;
}
if (typeof value === 'function' || typeof value === 'symbol') {
continue;
}
meta[key] = value;
}
return Object.keys(meta).length > 0 ? meta : undefined;
}

/**
* Formats the metadata portion of a winston info object as a compact
* single-line JSON trailer, suitable for appending to the console message.
* Returns an empty string when there is no meaningful metadata.
*
* @param {Record<string, unknown>} info - The winston info object.
* @returns {string} - The serialized metadata, or an empty string.
*/
function formatConsoleMeta(info) {
const meta = extractMetaObject(info);
if (!meta) {
return '';
}
try {
return JSON.stringify(meta, (_key, value) => {
if (typeof value !== 'string') {
return value;
}
const safe = redactMessage(value);
return safe.length > CONSOLE_JSON_STRING_LENGTH
? `${safe.substring(0, CONSOLE_JSON_STRING_LENGTH)}...`
: safe;
});
} catch {
return '';
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve non-circular metadata when JSON serialization fails

If any warn/error metadata value is circular (for example, an attached request/response object), JSON.stringify throws and the catch returns an empty string, which drops all metadata including useful scalar fields like provider, model, or status. This undermines the goal of making failures diagnosable, because one problematic field suppresses the entire trailer instead of just being omitted/safely serialized.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 58c2dc8. Added a WeakSet-based circular replacer (replaces repeated visits with [Circular]) and a per-field fallback if the whole-object serialization still fails β€” scalar fields like provider/model/status always land, and only the offending value becomes [Unserializable]. Regression test covers a shape with both scalars and a circular object.

}
}

/**
* Formats log messages for debugging purposes.
* - Truncates long strings within log messages.
Expand All @@ -120,28 +164,31 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met
}

let msg = `${timestamp} ${level}: ${truncateLongStrings(message?.trim(), DEBUG_MESSAGE_LENGTH)}`;
const levelStr = typeof level === 'string' ? level : String(level);
const isErrorOrWarn = levelStr.includes('error') || levelStr.includes('warn');
const finalize = (text) => (isErrorOrWarn ? redactMessage(text) : text);
try {
if (level !== 'debug') {
return msg;
if (level !== 'debug' && !isErrorOrWarn) {
Copy link

Copilot AI Apr 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In debugTraverse, the gate uses level !== 'debug' even though you normalize levelStr right above. If level is non-string (or gets decorated in the future), this check can incorrectly treat debug logs as non-debug and skip metadata printing. Use the normalized levelStr for the comparison (or normalize once and only use that variable) to keep the gating consistent with the includes() checks.

Suggested change
if (level !== 'debug' && !isErrorOrWarn) {
if (levelStr !== 'debug' && !isErrorOrWarn) {

Copilot uses AI. Check for mistakes.
return finalize(msg);
}

if (!metadata) {
return msg;
return finalize(msg);
}

const debugValue = metadata[SPLAT_SYMBOL]?.[0];
const debugValue = metadata[SPLAT_SYMBOL]?.[0] ?? extractMetaObject(metadata);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ignore consumed splat args for warn/error traversal

When warn/error levels now enter debugTraverse, this line prefers metadata[Symbol.for('splat')][0] even if it is just a consumed printf argument string (for example logger.warn('failed for %s', tenant, { provider }) after format.splat()). In that case the formatter appends the tenant a second time (failed for tenant tenant) and never surfaces the structured metadata object, which regresses debug-file/DEBUG_CONSOLE log quality in the winston.format.combine(fileFormat, debugTraverse) path.

Useful? React with πŸ‘Β / πŸ‘Ž.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in bccbf11. Swapped the preference order in debugTraverse: it now tries the structured metadata object (extractMetaObject) first and only falls back to SPLAT[0] if nothing else is available. That way consumed printf args don't get appended a second time, and the real metadata object still surfaces. Added a regression test for the logger.warn('failed for %s', tenant, { provider }) shape.


if (!debugValue) {
return msg;
return finalize(msg);
}

if (debugValue && Array.isArray(debugValue)) {
msg += `\n${JSON.stringify(debugValue.map(condenseArray))}`;
return msg;
return finalize(msg);
}

if (typeof debugValue !== 'object') {
return (msg += ` ${debugValue}`);
return finalize((msg += ` ${debugValue}`));
}

msg += '\n{';
Expand Down Expand Up @@ -182,9 +229,9 @@ const debugTraverse = winston.format.printf(({ level, message, timestamp, ...met
});

msg += '\n}';
return msg;
return finalize(msg);
} catch (e) {
return (msg += `\n[LOGGER PARSING ERROR] ${e.message}`);
return finalize((msg += `\n[LOGGER PARSING ERROR] ${e.message}`));
}
});

Expand Down Expand Up @@ -229,4 +276,5 @@ module.exports = {
redactMessage,
debugTraverse,
jsonTruncateFormat,
formatConsoleMeta,
};
Loading
Loading