diff --git a/logfire/_internal/scrubbing.py b/logfire/_internal/scrubbing.py index 5b3212582..1281637d1 100644 --- a/logfire/_internal/scrubbing.py +++ b/logfire/_internal/scrubbing.py @@ -205,7 +205,16 @@ class Scrubber(BaseScrubber): def __init__(self, patterns: Sequence[str] | None, callback: ScrubCallback | None = None): # See ScrubbingOptions for more info on these parameters. patterns = [*DEFAULT_PATTERNS, *(patterns or [])] - self._pattern = re.compile('|'.join(patterns), re.IGNORECASE | re.DOTALL) + default_patterns_count = len(DEFAULT_PATTERNS) + pattern_parts: list[str] = [] + pattern_reason_by_group: dict[str, str | None] = {} + for index, pattern in enumerate(patterns): + group_name = f'pattern_{index}' + pattern_parts.append(f'(?P<{group_name}>{pattern})') + pattern_reason_by_group[group_name] = None if index < default_patterns_count else pattern + + self._pattern = re.compile('|'.join(pattern_parts), re.IGNORECASE | re.DOTALL) + self._pattern_reason_by_group = pattern_reason_by_group self._callback = callback def scrub_log(self, log: LogRecord) -> LogRecord: @@ -246,6 +255,7 @@ class SpanScrubber: def __init__(self, parent: Scrubber): self._pattern = parent._pattern # pyright: ignore[reportPrivateUsage] + self._pattern_reason_by_group = parent._pattern_reason_by_group # pyright: ignore[reportPrivateUsage] self._callback = parent._callback # pyright: ignore[reportPrivateUsage] self.scrubbed: list[ScrubbedNote] = [] self.did_scrub = False @@ -343,8 +353,12 @@ def _redact(self, match: ScrubMatch) -> Any: return result self.did_scrub = True matched_substring = match.pattern_match.group(0) - self.scrubbed.append(ScrubbedNote(path=match.path, matched_substring=matched_substring)) - return f'[Scrubbed due to {matched_substring!r}]' + reason = ( + self._pattern_reason_by_group.get(match.pattern_match.lastgroup or '', matched_substring) + or matched_substring + ) + self.scrubbed.append(ScrubbedNote(path=match.path, matched_substring=reason)) + return f'[Scrubbed due to {reason!r}]' class MessageValueCleaner: diff --git a/tests/test_secret_scrubbing.py b/tests/test_secret_scrubbing.py index 95f9c4020..e3bad92e9 100644 --- a/tests/test_secret_scrubbing.py +++ b/tests/test_secret_scrubbing.py @@ -337,6 +337,27 @@ def callback(match: logfire.ScrubMatch): ) +def test_extra_pattern_redaction_reason_does_not_echo_secret(exporter: TestExporter, config_kwargs: dict[str, Any]): + logfire.configure( + scrubbing=logfire.ScrubbingOptions( + extra_patterns=[r'://[^:@/]+:[^@/]+@'], + ), + **config_kwargs, + ) + + secret = 'admin:s3cr3t_pass' + logfire.info('connect', config_url=f'postgresql://{secret}@db.internal:5432/mydb') + + span = exporter.exported_spans_as_dict()[0] + config_url = span['attributes']['config_url'] + scrubbed = span['attributes']['logfire.scrubbed'] + + assert secret not in config_url + assert secret not in scrubbed + assert config_url == "[Scrubbed due to '://[^:@/]+:[^@/]+@']" + assert scrubbed == IsJson([{'path': ['attributes', 'config_url'], 'matched_substring': '://[^:@/]+:[^@/]+@'}]) + + def test_dont_scrub_resource(exporter: TestExporter, config_kwargs: dict[str, Any]): os.environ[OTEL_RESOURCE_ATTRIBUTES] = 'my_password=hunter2,yours=your_password,other=safe=good' logfire.configure(**config_kwargs)