diff --git a/README.md b/README.md index d60b87b..69051ab 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,22 @@ Findings are aligned with [CVSS 4.0](https://www.first.org/cvss/v4.0/specificati For CRITICAL and HIGH severity findings, the exact score within the range is calculated based on the confidence level, providing granular risk assessment. +### Fingerprint Stability + +Claude Code Security Review uses content-based fingerprinting to ensure security findings remain stable across scans: + +- **Fingerprints based on actual code**: Uses the code content at the finding location, not LLM-generated labels +- **Stable across rescans**: The same vulnerability will maintain the same fingerprint even if Claude categorizes it differently between runs +- **Handles refactoring**: Normalizes whitespace to remain stable through code formatting changes (prettier, black, gofmt, etc.) +- **Graceful degradation**: Falls back to file:line fingerprinting if code cannot be read + +This ensures that: +- Findings persist correctly in GitHub Code Scanning between scans +- The same vulnerability won't be incorrectly marked as "fixed" and then reappear +- Severity changes from LLM variations won't create duplicate findings + +**Migration Note**: If upgrading from version 1.x, existing findings in GitHub Code Scanning will be marked as "fixed" once, and new findings will be created with stable fingerprints. This is a one-time migration effect to gain long-term stability. + ### Viewing Results Once uploaded, findings appear in: diff --git a/claudecode/sarif_generator.py b/claudecode/sarif_generator.py index e6b0353..cd2f934 100644 --- a/claudecode/sarif_generator.py +++ b/claudecode/sarif_generator.py @@ -6,7 +6,9 @@ import json import hashlib -from typing import Dict, Any, List +import os +import re +from typing import Dict, Any, List, Tuple from pathlib import Path @@ -43,6 +45,7 @@ def __init__(self, tool_name: str = "Claude Code Security Review", self.tool_name = tool_name self.tool_version = tool_version self.repo_root = Path(repo_root) if repo_root else Path.cwd() + self._file_cache = {} # Cache file contents to avoid repeated I/O def _calculate_severity_score(self, severity: str, confidence: float = None) -> float: """ @@ -121,12 +124,86 @@ def _get_relative_path(self, file_path: str) -> str: # If path is already relative or can't be made relative, return as-is return file_path + def _read_code_snippet(self, file_path: str, line_number: int, + context_lines: int = 2) -> Tuple[bool, str]: + """ + Read code snippet around a specific line for fingerprinting. + + Args: + file_path: Path to the file (relative or absolute) + line_number: Line number of the finding (1-indexed) + context_lines: Number of lines before/after to include + + Returns: + Tuple of (success, code_snippet) + """ + cache_key = file_path + + # Check cache first + if cache_key not in self._file_cache: + try: + # Use REPO_PATH if set, otherwise use repo_root + repo_path = os.environ.get('REPO_PATH') + if repo_path: + full_path = Path(repo_path) / file_path + else: + full_path = self.repo_root / file_path + + if not full_path.exists() or not full_path.is_file(): + self._file_cache[cache_key] = None + return False, "" + + # Read and cache entire file + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + self._file_cache[cache_key] = f.readlines() + except Exception: + self._file_cache[cache_key] = None + return False, "" + + lines = self._file_cache[cache_key] + if lines is None: + return False, "" + + # Calculate line range (convert 1-indexed to 0-indexed) + start_line = max(0, line_number - 1 - context_lines) + end_line = min(len(lines), line_number + context_lines) + + # Extract and return snippet + snippet_lines = lines[start_line:end_line] + return True, ''.join(snippet_lines) + + def _normalize_code_for_fingerprint(self, code: str) -> str: + """ + Normalize code for stable fingerprinting. + + Removes formatting variations while preserving code structure. + Handles whitespace changes from auto-formatters (prettier, black, gofmt). + + Args: + code: Raw code snippet + + Returns: + Normalized code string + """ + lines = code.split('\n') + normalized_lines = [] + + for line in lines: + # Strip leading/trailing whitespace + stripped = line.strip() + if stripped: # Skip empty lines + # Normalize internal whitespace to single spaces + normalized = re.sub(r'\s+', ' ', stripped) + normalized_lines.append(normalized) + + return '\n'.join(normalized_lines) + def _generate_fingerprint(self, finding: Dict[str, Any]) -> str: """ Generate a stable fingerprint for finding deduplication. - Uses a shorter fingerprint format to avoid conflicts with GitHub's - CodeQL fingerprint calculation. + Uses file path + code content for stability across LLM runs. + Falls back to file:line if code cannot be read. Args: finding: Finding dictionary @@ -134,9 +211,24 @@ def _generate_fingerprint(self, finding: Dict[str, Any]) -> str: Returns: First 16 characters of SHA-256 hash as fingerprint """ - # Create fingerprint from file, line, and category - fingerprint_string = f"{finding.get('file', '')}:{finding.get('line', 0)}:{finding.get('category', '')}" - # Use first 16 chars of hash to match GitHub's format expectations + file_path = finding.get('file', '') + line_number = finding.get('line', 0) + + # Try to read code snippet for content-based fingerprint + success, code_snippet = self._read_code_snippet(file_path, line_number) + + if success and code_snippet: + # Content-based fingerprint (preferred) + normalized_path = self._get_relative_path(file_path) + normalized_code = self._normalize_code_for_fingerprint(code_snippet) + fingerprint_string = f"{normalized_path}:{normalized_code}" + else: + # Fallback to file:line if code cannot be read + # This handles cases where files are moved/deleted after scanning + normalized_path = self._get_relative_path(file_path) + fingerprint_string = f"{normalized_path}:{line_number}" + + # Hash and truncate to 16 characters full_hash = hashlib.sha256(fingerprint_string.encode()).hexdigest() return full_hash[:16] diff --git a/claudecode/test_sarif_generator.py b/claudecode/test_sarif_generator.py index 313c526..20c71c6 100644 --- a/claudecode/test_sarif_generator.py +++ b/claudecode/test_sarif_generator.py @@ -345,6 +345,191 @@ def test_case_insensitive_severity(self, generator): assert score_upper == score_lower == score_mixed + def test_fingerprint_stability_across_categories(self, generator, tmp_path): + """Test that same code with different categorization produces same fingerprint.""" + # Create a test file + test_file = tmp_path / "test.py" + test_file.write_text("def vulnerable_function():\n return True\n") + + # Set REPO_PATH for the test + import os + original_repo_path = os.environ.get('REPO_PATH') + os.environ['REPO_PATH'] = str(tmp_path) + + try: + # Same file and line, different categories + finding1 = { + "file": "test.py", + "line": 1, + "category": "authentication_bypass", + "severity": "CRITICAL" + } + finding2 = { + "file": "test.py", + "line": 1, + "category": "missing_auth_check", + "severity": "HIGH" + } + + fingerprint1 = generator._generate_fingerprint(finding1) + fingerprint2 = generator._generate_fingerprint(finding2) + + # Fingerprints should be identical despite different categories + assert fingerprint1 == fingerprint2 + finally: + # Restore original REPO_PATH + if original_repo_path: + os.environ['REPO_PATH'] = original_repo_path + elif 'REPO_PATH' in os.environ: + del os.environ['REPO_PATH'] + + def test_fingerprint_changes_with_code_modification(self, generator, tmp_path): + """Test that different code produces different fingerprint.""" + # Create test files with different content + test_file1 = tmp_path / "test1.py" + test_file1.write_text("def function1():\n return True\n") + + test_file2 = tmp_path / "test2.py" + test_file2.write_text("def function2():\n return False\n") + + import os + original_repo_path = os.environ.get('REPO_PATH') + os.environ['REPO_PATH'] = str(tmp_path) + + try: + finding1 = {"file": "test1.py", "line": 1, "category": "vuln"} + finding2 = {"file": "test2.py", "line": 1, "category": "vuln"} + + fingerprint1 = generator._generate_fingerprint(finding1) + fingerprint2 = generator._generate_fingerprint(finding2) + + # Different code should produce different fingerprints + assert fingerprint1 != fingerprint2 + finally: + if original_repo_path: + os.environ['REPO_PATH'] = original_repo_path + elif 'REPO_PATH' in os.environ: + del os.environ['REPO_PATH'] + + def test_fingerprint_whitespace_normalization(self, generator, tmp_path): + """Test that whitespace-only changes don't affect fingerprint.""" + # Create files with same code but different whitespace + test_file1 = tmp_path / "test1.py" + test_file1.write_text("def function():\n return True\n") + + test_file2 = tmp_path / "test2.py" + test_file2.write_text("def function():\n return True\n") # Extra indentation + + import os + original_repo_path = os.environ.get('REPO_PATH') + os.environ['REPO_PATH'] = str(tmp_path) + + try: + finding1 = {"file": "test1.py", "line": 2, "category": "vuln"} + finding2 = {"file": "test2.py", "line": 2, "category": "vuln"} + + fingerprint1 = generator._generate_fingerprint(finding1) + fingerprint2 = generator._generate_fingerprint(finding2) + + # Fingerprints should be identical despite whitespace differences + assert fingerprint1 == fingerprint2 + finally: + if original_repo_path: + os.environ['REPO_PATH'] = original_repo_path + elif 'REPO_PATH' in os.environ: + del os.environ['REPO_PATH'] + + def test_fingerprint_fallback_on_file_read_error(self, generator): + """Test fallback to file:line when code cannot be read.""" + # Finding for non-existent file + finding = { + "file": "nonexistent.py", + "line": 42, + "category": "vuln" + } + + fingerprint = generator._generate_fingerprint(finding) + + # Should still generate a fingerprint (fallback mode) + assert len(fingerprint) == 16 + assert isinstance(fingerprint, str) + + def test_fingerprint_line_shift_stability(self, generator, tmp_path): + """Test that line shifts within context window preserve fingerprint.""" + # Create a test file with code + test_file = tmp_path / "test.py" + test_file.write_text("# Comment\ndef function():\n return True\n# Another comment\n") + + import os + original_repo_path = os.environ.get('REPO_PATH') + os.environ['REPO_PATH'] = str(tmp_path) + + try: + # Same function but reported at slightly different lines within context window + finding1 = {"file": "test.py", "line": 2, "category": "vuln"} + finding2 = {"file": "test.py", "line": 3, "category": "vuln"} + + fingerprint1 = generator._generate_fingerprint(finding1) + fingerprint2 = generator._generate_fingerprint(finding2) + + # With context_lines=2, both should capture overlapping code + # They may be different but should be stable across runs + assert len(fingerprint1) == 16 + assert len(fingerprint2) == 16 + finally: + if original_repo_path: + os.environ['REPO_PATH'] = original_repo_path + elif 'REPO_PATH' in os.environ: + del os.environ['REPO_PATH'] + + def test_fingerprint_file_caching(self, generator, tmp_path): + """Test that file cache works correctly for multiple findings.""" + test_file = tmp_path / "test.py" + test_file.write_text("line1\nline2\nline3\nline4\nline5\n") + + import os + original_repo_path = os.environ.get('REPO_PATH') + os.environ['REPO_PATH'] = str(tmp_path) + + try: + # Multiple findings in the same file + finding1 = {"file": "test.py", "line": 2, "category": "vuln1"} + finding2 = {"file": "test.py", "line": 4, "category": "vuln2"} + + # First call should cache the file + fingerprint1 = generator._generate_fingerprint(finding1) + assert "test.py" in generator._file_cache + + # Second call should use cached content + fingerprint2 = generator._generate_fingerprint(finding2) + + # Both should generate valid fingerprints + assert len(fingerprint1) == 16 + assert len(fingerprint2) == 16 + # They should be different (different line numbers) + assert fingerprint1 != fingerprint2 + finally: + if original_repo_path: + os.environ['REPO_PATH'] = original_repo_path + elif 'REPO_PATH' in os.environ: + del os.environ['REPO_PATH'] + + def test_code_normalization(self, generator): + """Test code normalization for fingerprinting.""" + # Test various whitespace scenarios + code1 = "def function():\n return True\n" + code2 = "def function():\n return True\n" # Extra indentation + code3 = "def function():\n\n return True\n" # Extra blank line + + normalized1 = generator._normalize_code_for_fingerprint(code1) + normalized2 = generator._normalize_code_for_fingerprint(code2) + normalized3 = generator._normalize_code_for_fingerprint(code3) + + # All should normalize to the same result + assert normalized1 == normalized2 == normalized3 + assert "def function():" in normalized1 + assert "return True" in normalized1 + if __name__ == "__main__": pytest.main([__file__, "-v"])