Skip to content

Commit 17f9363

Browse files
feat(ai_hook): reduce coupling between verticals
1 parent 7474d2b commit 17f9363

File tree

11 files changed

+504
-447
lines changed

11 files changed

+504
-447
lines changed

.importlinter

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ type = layers
1010
layers =
1111
ggshield.__main__
1212
ggshield.cmd.auth | ggshield.cmd.config | ggshield.cmd.hmsl | ggshield.cmd.honeytoken | ggshield.cmd.install | ggshield.cmd.plugin | ggshield.cmd.quota | ggshield.cmd.secret | ggshield.cmd.status | ggshield.cmd.utils
13-
ggshield.verticals.auth | ggshield.verticals.hmsl | ggshield.verticals.secret
13+
ggshield.verticals.ai | ggshield.verticals.auth | ggshield.verticals.hmsl | ggshield.verticals.secret
1414
ggshield.core
1515
click | ggshield.utils | pygitguardian
1616
ignore_imports =
@@ -33,6 +33,7 @@ source_modules =
3333
ggshield.cmd.status
3434
ggshield.cmd.utils
3535
forbidden_modules =
36+
ggshield.verticals.ai
3637
ggshield.verticals.auth
3738
ggshield.verticals.hmsl
3839
ggshield.verticals.secret
@@ -46,6 +47,7 @@ ignore_imports =
4647
ggshield.cmd.hmsl.** -> ggshield.verticals.hmsl.**
4748
ggshield.cmd.honeytoken.** -> ggshield.verticals.honeytoken
4849
ggshield.cmd.honeytoken.** -> ggshield.verticals.honeytoken.**
50+
ggshield.cmd.install -> ggshield.verticals.ai.installation
4951
ggshield.cmd.install.** -> ggshield.verticals.install
5052
ggshield.cmd.install.** -> ggshield.verticals.install.**
5153
ggshield.cmd.plugin.** -> ggshield.core.plugin
@@ -54,6 +56,7 @@ ignore_imports =
5456
ggshield.cmd.quota.** -> ggshield.verticals.quota.**
5557
ggshield.cmd.secret.** -> ggshield.verticals.secret
5658
ggshield.cmd.secret.** -> ggshield.verticals.secret.**
59+
ggshield.cmd.secret.scan.ai_hook -> ggshield.verticals.ai.hooks
5760
ggshield.cmd.status.** -> ggshield.verticals.status
5861
ggshield.cmd.status.** -> ggshield.verticals.status.**
5962
ggshield.cmd.utils.** -> ggshield.verticals.utils

ggshield/cmd/secret/scan/ai_hook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from ggshield.core import ui
1111
from ggshield.core.client import create_client_from_config
1212
from ggshield.core.scan import ScanContext, ScanMode
13+
from ggshield.verticals.ai.hooks import AIHookScanner
1314
from ggshield.verticals.secret import SecretScanner
14-
from ggshield.verticals.secret.ai_hook import AIHookScanner
1515

1616

1717
MAX_READ_SIZE = 1024 * 1024 * 10 # We restrict stdin read to 10MB

ggshield/core/scan/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .scan_context import ScanContext
44
from .scan_mode import ScanMode
55
from .scannable import DecodeError, NonSeekableFileError, Scannable, StringScannable
6+
from .scanner import ResultsProtocol, ScannerProtocol, SecretProtocol
67

78

89
__all__ = [
@@ -11,8 +12,11 @@
1112
"DecodeError",
1213
"File",
1314
"NonSeekableFileError",
15+
"ResultsProtocol",
1416
"ScanContext",
1517
"ScanMode",
1618
"Scannable",
19+
"ScannerProtocol",
20+
"SecretProtocol",
1721
"StringScannable",
1822
]

ggshield/core/scan/scanner.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Protocols for SecretScanner and its results,
3+
so that other verticals can use the scanner if they are provided one.
4+
"""
5+
6+
from collections.abc import Sequence
7+
from typing import Iterable, Optional, Protocol
8+
9+
from pygitguardian.models import Match
10+
11+
from ggshield.core.scanner_ui import ScannerUI
12+
13+
from . import Scannable
14+
15+
16+
class SecretProtocol(Protocol):
17+
"""Abstract base class for secrets.
18+
19+
We use getters instead of properties to have a .
20+
"""
21+
22+
@property
23+
def detector_display_name(self) -> str: ...
24+
25+
@property
26+
def validity(self) -> str: ...
27+
28+
@property
29+
def matches(self) -> Sequence[Match]: ...
30+
31+
32+
class ResultProtocol(Protocol):
33+
@property
34+
def secrets(self) -> Sequence[SecretProtocol]: ...
35+
36+
37+
class ResultsProtocol(Protocol):
38+
@property
39+
def results(self) -> Sequence[ResultProtocol]: ...
40+
41+
42+
class ScannerProtocol(Protocol):
43+
"""Protocol for scanners."""
44+
45+
def scan(
46+
self,
47+
files: Iterable[Scannable],
48+
scanner_ui: ScannerUI,
49+
scan_threads: Optional[int] = None,
50+
) -> ResultsProtocol: ...

ggshield/verticals/ai/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .agents import AGENTS
2+
from .hooks import AIHookScanner
3+
from .installation import install_hooks
4+
5+
6+
__all__ = [
7+
"AGENTS",
8+
"AIHookScanner",
9+
"install_hooks",
10+
]

ggshield/verticals/ai/hooks.py

Lines changed: 174 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,16 @@
33
import re
44
from typing import Any, Dict, List, Sequence, Set
55

6-
from ggshield.verticals.ai.agents import Claude, Copilot, Cursor
6+
from notifypy import Notify
77

8-
from .models import Agent, EventType, HookPayload, Tool
8+
from ggshield.core.filter import censor_match
9+
from ggshield.core.scan import ScannerProtocol
10+
from ggshield.core.scan import SecretProtocol as Secret
11+
from ggshield.core.scanner_ui import create_message_only_scanner_ui
12+
from ggshield.core.text_utils import pluralize, translate_validity
13+
14+
from .agents import Claude, Copilot, Cursor
15+
from .models import Agent, EventType, HookPayload, HookResult, Tool
916

1017

1118
HOOK_NAME_TO_EVENT_TYPE = {
@@ -163,3 +170,168 @@ def _parse_user_prompt(
163170
)
164171
)
165172
return payloads
173+
174+
175+
class AIHookScanner:
176+
"""AI hook scanner.
177+
178+
It is called with the payload of a hook event.
179+
Note that instead of having a base class with common method and a subclass per supported AI tool,
180+
we instead have a single class which detects which protocol to use.
181+
This is because some tools sloppily support hooks from others. For instance,
182+
Cursor will call hooks defined in the Claude Code format, but send payload in its own format.
183+
So we can't assume which tool will call us based on the command line/hook configuration only.
184+
185+
Raises:
186+
ValueError: If the input is not valid.
187+
"""
188+
189+
def __init__(self, scanner: ScannerProtocol):
190+
self.scanner = scanner
191+
192+
def scan(self, content: str) -> int:
193+
"""Scan the content, print the result and return the exit code."""
194+
195+
payloads = parse_hook_input(content)
196+
result = self._scan_payloads(payloads)
197+
payload = result.payload
198+
199+
# Special case: in post-tool use, the action is already done: at least notify the user
200+
if result.block and payload.event_type == EventType.POST_TOOL_USE:
201+
self._send_secret_notification(
202+
result.nbr_secrets,
203+
payload.tool or Tool.OTHER,
204+
payload.agent.display_name,
205+
)
206+
207+
return payload.agent.output_result(result)
208+
209+
def _scan_payloads(self, payloads: List[HookPayload]) -> HookResult:
210+
"""Scan payloads for secrets using the SecretScanner.
211+
212+
Returns:
213+
The result of the first blocking payload, or a non-blocking result.
214+
Raises a ValueError if the list is empty (we must have at least one to emit a result).
215+
"""
216+
if not payloads:
217+
raise ValueError("Error: no payloads to scan")
218+
for payload in payloads:
219+
result = self._scan_content(payload)
220+
if result.block:
221+
return result
222+
return HookResult.allow(payloads[0])
223+
224+
def _scan_content(
225+
self,
226+
payload: HookPayload,
227+
) -> HookResult:
228+
"""Scan content for secrets using the SecretScanner."""
229+
# Short path: if there is no content, no need to do an API call
230+
if payload.empty:
231+
return HookResult.allow(payload)
232+
233+
with create_message_only_scanner_ui() as scanner_ui:
234+
results = self.scanner.scan([payload.scannable], scanner_ui=scanner_ui)
235+
# Collect all secrets from results
236+
secrets: List[Secret] = []
237+
for result in results.results:
238+
secrets.extend(result.secrets)
239+
240+
if not secrets:
241+
return HookResult.allow(payload)
242+
243+
message = self._message_from_secrets(
244+
secrets,
245+
payload,
246+
escape_markdown=True,
247+
)
248+
return HookResult(
249+
block=True,
250+
message=message,
251+
nbr_secrets=len(secrets),
252+
payload=payload,
253+
)
254+
255+
@staticmethod
256+
def _message_from_secrets(
257+
secrets: List[Secret],
258+
payload: HookPayload,
259+
escape_markdown: bool = False,
260+
) -> str:
261+
"""
262+
Format detected secrets into a user-friendly message.
263+
264+
Args:
265+
secrets: List of detected secrets
266+
payload: Text to display after the secrets output
267+
escape_markdown: If True, escape asterisks to prevent markdown interpretation
268+
269+
Returns:
270+
Formatted message describing the detected secrets
271+
"""
272+
count = len(secrets)
273+
header = f"**🚨 Detected {count} {pluralize('secret', count)} 🚨**"
274+
275+
secret_lines = []
276+
for secret in secrets:
277+
validity = translate_validity(secret.validity).lower()
278+
if validity == "valid":
279+
validity = f"**{validity}**"
280+
match_str = ", ".join(censor_match(m) for m in secret.matches)
281+
if escape_markdown:
282+
match_str = match_str.replace("*", "•")
283+
secret_lines.append(
284+
f" - {secret.detector_display_name} ({validity}): {match_str}"
285+
)
286+
287+
if payload.tool == Tool.BASH:
288+
if payload.event_type == EventType.POST_TOOL_USE:
289+
message = "Secrets detected in the command output."
290+
else:
291+
message = (
292+
"Please remove the secrets from the command before executing it. "
293+
"Consider using environment variables or a secrets manager instead."
294+
)
295+
elif payload.tool == Tool.READ:
296+
message = f"Please remove the secrets from {payload.identifier} before reading it."
297+
elif payload.event_type == EventType.USER_PROMPT:
298+
message = "Please remove the secrets from your prompt before submitting."
299+
else:
300+
message = (
301+
"Please remove the secrets from the tool input before executing. "
302+
"Consider using environment variables or a secrets manager instead."
303+
)
304+
305+
secrets_block = "\n".join(secret_lines)
306+
return f"{header}\n{secrets_block}\n\n{message}"
307+
308+
@staticmethod
309+
def _send_secret_notification(
310+
nbr_secrets: int, tool: Tool, agent_name: str
311+
) -> None:
312+
"""
313+
Send desktop notification when secrets are detected.
314+
315+
Args:
316+
nbr_secrets: Number of detected secrets
317+
tool: Tool used to detect the secrets
318+
agent_name: Name of the agent that detected the secrets
319+
"""
320+
source = "using a tool"
321+
if tool == Tool.READ:
322+
source = "reading a file"
323+
elif tool == Tool.BASH:
324+
source = "running a command"
325+
notification = Notify()
326+
notification.title = "ggshield - Secrets Detected"
327+
notification.message = (
328+
f"{agent_name} got access to {nbr_secrets}"
329+
f" {pluralize('secret', nbr_secrets)} by {source}"
330+
)
331+
notification.application_name = "ggshield"
332+
try:
333+
notification.send()
334+
except Exception:
335+
# This is best effort, we don't want to propagate an error
336+
# if the notification fails.
337+
pass

ggshield/verticals/ai/installation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ def install_hooks(
8080
# Report what happened
8181
styled_path = click.style(settings_path, fg="yellow", bold=True)
8282
if stats.added == 0 and stats.already_present > 0:
83-
click.echo(f"{agent.name} hooks already installed in {styled_path}")
83+
click.echo(f"{agent.display_name} hooks already installed in {styled_path}")
8484
elif stats.added > 0 and stats.already_present > 0:
85-
click.echo(f"{agent.name} hooks updated in {styled_path}")
85+
click.echo(f"{agent.display_name} hooks updated in {styled_path}")
8686
else:
87-
click.echo(f"{agent.name} hooks successfully added in {styled_path}")
87+
click.echo(f"{agent.display_name} hooks successfully added in {styled_path}")
8888

8989
return 0
9090

0 commit comments

Comments
 (0)