Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,22 @@ Findings are aligned with [CVSS 4.0](https://www.first.org/cvss/v4.0/specificati

For CRITICAL and HIGH severity findings, the exact score within the range is calculated based on the confidence level, providing granular risk assessment.

### Fingerprint Stability

Claude Code Security Review uses content-based fingerprinting to ensure security findings remain stable across scans:

- **Fingerprints based on actual code**: Uses the code content at the finding location, not LLM-generated labels
- **Stable across rescans**: The same vulnerability will maintain the same fingerprint even if Claude categorizes it differently between runs
- **Handles refactoring**: Normalizes whitespace to remain stable through code formatting changes (prettier, black, gofmt, etc.)
- **Graceful degradation**: Falls back to file:line fingerprinting if code cannot be read

This ensures that:
- Findings persist correctly in GitHub Code Scanning between scans
- The same vulnerability won't be incorrectly marked as "fixed" and then reappear
- Severity changes from LLM variations won't create duplicate findings

**Migration Note**: If upgrading from version 1.x, existing findings in GitHub Code Scanning will be marked as "fixed" once, and new findings will be created with stable fingerprints. This is a one-time migration effect to gain long-term stability.

### Viewing Results

Once uploaded, findings appear in:
Expand Down
104 changes: 98 additions & 6 deletions claudecode/sarif_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import json
import hashlib
from typing import Dict, Any, List
import os
import re
from typing import Dict, Any, List, Tuple
from pathlib import Path


Expand Down Expand Up @@ -43,6 +45,7 @@ def __init__(self, tool_name: str = "Claude Code Security Review",
self.tool_name = tool_name
self.tool_version = tool_version
self.repo_root = Path(repo_root) if repo_root else Path.cwd()
self._file_cache = {} # Cache file contents to avoid repeated I/O

def _calculate_severity_score(self, severity: str, confidence: float = None) -> float:
"""
Expand Down Expand Up @@ -121,22 +124,111 @@ def _get_relative_path(self, file_path: str) -> str:
# If path is already relative or can't be made relative, return as-is
return file_path

def _read_code_snippet(self, file_path: str, line_number: int,
context_lines: int = 2) -> Tuple[bool, str]:
"""
Read code snippet around a specific line for fingerprinting.

Args:
file_path: Path to the file (relative or absolute)
line_number: Line number of the finding (1-indexed)
context_lines: Number of lines before/after to include

Returns:
Tuple of (success, code_snippet)
"""
cache_key = file_path

# Check cache first
if cache_key not in self._file_cache:
try:
# Use REPO_PATH if set, otherwise use repo_root
repo_path = os.environ.get('REPO_PATH')
if repo_path:
full_path = Path(repo_path) / file_path
else:
full_path = self.repo_root / file_path

if not full_path.exists() or not full_path.is_file():
self._file_cache[cache_key] = None
return False, ""

# Read and cache entire file
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
self._file_cache[cache_key] = f.readlines()
except Exception:
self._file_cache[cache_key] = None
return False, ""

lines = self._file_cache[cache_key]
if lines is None:
return False, ""

# Calculate line range (convert 1-indexed to 0-indexed)
start_line = max(0, line_number - 1 - context_lines)
end_line = min(len(lines), line_number + context_lines)

# Extract and return snippet
snippet_lines = lines[start_line:end_line]
return True, ''.join(snippet_lines)

def _normalize_code_for_fingerprint(self, code: str) -> str:
"""
Normalize code for stable fingerprinting.

Removes formatting variations while preserving code structure.
Handles whitespace changes from auto-formatters (prettier, black, gofmt).

Args:
code: Raw code snippet

Returns:
Normalized code string
"""
lines = code.split('\n')
normalized_lines = []

for line in lines:
# Strip leading/trailing whitespace
stripped = line.strip()
if stripped: # Skip empty lines
# Normalize internal whitespace to single spaces
normalized = re.sub(r'\s+', ' ', stripped)
normalized_lines.append(normalized)

return '\n'.join(normalized_lines)

def _generate_fingerprint(self, finding: Dict[str, Any]) -> str:
"""
Generate a stable fingerprint for finding deduplication.

Uses a shorter fingerprint format to avoid conflicts with GitHub's
CodeQL fingerprint calculation.
Uses file path + code content for stability across LLM runs.
Falls back to file:line if code cannot be read.

Args:
finding: Finding dictionary

Returns:
First 16 characters of SHA-256 hash as fingerprint
"""
# Create fingerprint from file, line, and category
fingerprint_string = f"{finding.get('file', '')}:{finding.get('line', 0)}:{finding.get('category', '')}"
# Use first 16 chars of hash to match GitHub's format expectations
file_path = finding.get('file', '')
line_number = finding.get('line', 0)

# Try to read code snippet for content-based fingerprint
success, code_snippet = self._read_code_snippet(file_path, line_number)

if success and code_snippet:
# Content-based fingerprint (preferred)
normalized_path = self._get_relative_path(file_path)
normalized_code = self._normalize_code_for_fingerprint(code_snippet)
fingerprint_string = f"{normalized_path}:{normalized_code}"
else:
# Fallback to file:line if code cannot be read
# This handles cases where files are moved/deleted after scanning
normalized_path = self._get_relative_path(file_path)
fingerprint_string = f"{normalized_path}:{line_number}"

# Hash and truncate to 16 characters
full_hash = hashlib.sha256(fingerprint_string.encode()).hexdigest()
return full_hash[:16]

Expand Down
185 changes: 185 additions & 0 deletions claudecode/test_sarif_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,191 @@ def test_case_insensitive_severity(self, generator):

assert score_upper == score_lower == score_mixed

def test_fingerprint_stability_across_categories(self, generator, tmp_path):
"""Test that same code with different categorization produces same fingerprint."""
# Create a test file
test_file = tmp_path / "test.py"
test_file.write_text("def vulnerable_function():\n return True\n")

# Set REPO_PATH for the test
import os
original_repo_path = os.environ.get('REPO_PATH')
os.environ['REPO_PATH'] = str(tmp_path)

try:
# Same file and line, different categories
finding1 = {
"file": "test.py",
"line": 1,
"category": "authentication_bypass",
"severity": "CRITICAL"
}
finding2 = {
"file": "test.py",
"line": 1,
"category": "missing_auth_check",
"severity": "HIGH"
}

fingerprint1 = generator._generate_fingerprint(finding1)
fingerprint2 = generator._generate_fingerprint(finding2)

# Fingerprints should be identical despite different categories
assert fingerprint1 == fingerprint2
finally:
# Restore original REPO_PATH
if original_repo_path:
os.environ['REPO_PATH'] = original_repo_path
elif 'REPO_PATH' in os.environ:
del os.environ['REPO_PATH']

def test_fingerprint_changes_with_code_modification(self, generator, tmp_path):
"""Test that different code produces different fingerprint."""
# Create test files with different content
test_file1 = tmp_path / "test1.py"
test_file1.write_text("def function1():\n return True\n")

test_file2 = tmp_path / "test2.py"
test_file2.write_text("def function2():\n return False\n")

import os
original_repo_path = os.environ.get('REPO_PATH')
os.environ['REPO_PATH'] = str(tmp_path)

try:
finding1 = {"file": "test1.py", "line": 1, "category": "vuln"}
finding2 = {"file": "test2.py", "line": 1, "category": "vuln"}

fingerprint1 = generator._generate_fingerprint(finding1)
fingerprint2 = generator._generate_fingerprint(finding2)

# Different code should produce different fingerprints
assert fingerprint1 != fingerprint2
finally:
if original_repo_path:
os.environ['REPO_PATH'] = original_repo_path
elif 'REPO_PATH' in os.environ:
del os.environ['REPO_PATH']

def test_fingerprint_whitespace_normalization(self, generator, tmp_path):
"""Test that whitespace-only changes don't affect fingerprint."""
# Create files with same code but different whitespace
test_file1 = tmp_path / "test1.py"
test_file1.write_text("def function():\n return True\n")

test_file2 = tmp_path / "test2.py"
test_file2.write_text("def function():\n return True\n") # Extra indentation

import os
original_repo_path = os.environ.get('REPO_PATH')
os.environ['REPO_PATH'] = str(tmp_path)

try:
finding1 = {"file": "test1.py", "line": 2, "category": "vuln"}
finding2 = {"file": "test2.py", "line": 2, "category": "vuln"}

fingerprint1 = generator._generate_fingerprint(finding1)
fingerprint2 = generator._generate_fingerprint(finding2)

# Fingerprints should be identical despite whitespace differences
assert fingerprint1 == fingerprint2
finally:
if original_repo_path:
os.environ['REPO_PATH'] = original_repo_path
elif 'REPO_PATH' in os.environ:
del os.environ['REPO_PATH']

def test_fingerprint_fallback_on_file_read_error(self, generator):
"""Test fallback to file:line when code cannot be read."""
# Finding for non-existent file
finding = {
"file": "nonexistent.py",
"line": 42,
"category": "vuln"
}

fingerprint = generator._generate_fingerprint(finding)

# Should still generate a fingerprint (fallback mode)
assert len(fingerprint) == 16
assert isinstance(fingerprint, str)

def test_fingerprint_line_shift_stability(self, generator, tmp_path):
"""Test that line shifts within context window preserve fingerprint."""
# Create a test file with code
test_file = tmp_path / "test.py"
test_file.write_text("# Comment\ndef function():\n return True\n# Another comment\n")

import os
original_repo_path = os.environ.get('REPO_PATH')
os.environ['REPO_PATH'] = str(tmp_path)

try:
# Same function but reported at slightly different lines within context window
finding1 = {"file": "test.py", "line": 2, "category": "vuln"}
finding2 = {"file": "test.py", "line": 3, "category": "vuln"}

fingerprint1 = generator._generate_fingerprint(finding1)
fingerprint2 = generator._generate_fingerprint(finding2)

# With context_lines=2, both should capture overlapping code
# They may be different but should be stable across runs
assert len(fingerprint1) == 16
assert len(fingerprint2) == 16
finally:
if original_repo_path:
os.environ['REPO_PATH'] = original_repo_path
elif 'REPO_PATH' in os.environ:
del os.environ['REPO_PATH']

def test_fingerprint_file_caching(self, generator, tmp_path):
"""Test that file cache works correctly for multiple findings."""
test_file = tmp_path / "test.py"
test_file.write_text("line1\nline2\nline3\nline4\nline5\n")

import os
original_repo_path = os.environ.get('REPO_PATH')
os.environ['REPO_PATH'] = str(tmp_path)

try:
# Multiple findings in the same file
finding1 = {"file": "test.py", "line": 2, "category": "vuln1"}
finding2 = {"file": "test.py", "line": 4, "category": "vuln2"}

# First call should cache the file
fingerprint1 = generator._generate_fingerprint(finding1)
assert "test.py" in generator._file_cache

# Second call should use cached content
fingerprint2 = generator._generate_fingerprint(finding2)

# Both should generate valid fingerprints
assert len(fingerprint1) == 16
assert len(fingerprint2) == 16
# They should be different (different line numbers)
assert fingerprint1 != fingerprint2
finally:
if original_repo_path:
os.environ['REPO_PATH'] = original_repo_path
elif 'REPO_PATH' in os.environ:
del os.environ['REPO_PATH']

def test_code_normalization(self, generator):
"""Test code normalization for fingerprinting."""
# Test various whitespace scenarios
code1 = "def function():\n return True\n"
code2 = "def function():\n return True\n" # Extra indentation
code3 = "def function():\n\n return True\n" # Extra blank line

normalized1 = generator._normalize_code_for_fingerprint(code1)
normalized2 = generator._normalize_code_for_fingerprint(code2)
normalized3 = generator._normalize_code_for_fingerprint(code3)

# All should normalize to the same result
assert normalized1 == normalized2 == normalized3
assert "def function():" in normalized1
assert "return True" in normalized1


if __name__ == "__main__":
pytest.main([__file__, "-v"])