diff --git a/Gradata/README.md b/Gradata/README.md index 0f4d1dd5..ac2d5c1d 100644 --- a/Gradata/README.md +++ b/Gradata/README.md @@ -131,6 +131,34 @@ agent config, run the offline smoke script from a source checkout: PYTHONPATH=src python examples/offline_quickstart_smoke.py ``` +## Opt-in telemetry and privacy + +Telemetry is off by default. On first interactive `gradata init`, Gradata asks +whether you want to send anonymous usage pings. Your choice is stored in +`~/.gradata/config.toml`: + +```toml +[telemetry] +enabled = "true" # or "false" +``` + +If enabled, Gradata sends only: + +- event name (`brain_initialized`, `first_correction_captured`, `first_graduation`, `first_hook_installed`, `wau_ping`) +- anonymous `user_id` (`sha256` of a machine-local seed) +- UTC timestamp +- SDK version + +It never sends code, file paths, lesson/correction text, prompts, emails, names, +stack traces, environment variables, or raw IP addresses. Set +`GRADATA_TELEMETRY=0` to disable telemetry for any session, even if you opted in. +For dogfood metrics, `wau_ping` fires once on each agent session start and powers +weekly active user reporting: + +```bash +gradata telemetry wau +``` + ## Bring your own API key Gradata defaults to `CLIProvider`, which reuses your installed Claude Code, Codex, or Gemini CLI. If you want clearer API terms, do not want to install a CLI, or want lower call latency, configure Gradata to call your own Anthropic, OpenAI, or Google key directly. diff --git a/Gradata/src/gradata/_telemetry.py b/Gradata/src/gradata/_telemetry.py index 4ad24829..0fd4cea9 100644 --- a/Gradata/src/gradata/_telemetry.py +++ b/Gradata/src/gradata/_telemetry.py @@ -84,15 +84,17 @@ def _config_path() -> Path: return _config_dir() / _CONFIG_FILENAME -# The exhaustive set of activation events. Adding a new one here is the -# only place you need to touch — the prompt copy and the docs reference -# this tuple, the backend schema just validates string length. +# The exhaustive set of anonymous telemetry events. Activation events use +# send_once(); recurring heartbeat events (currently wau_ping) use explicit +# sender functions so they can fire once per session when the user opted in. ACTIVATION_EVENTS: Final[tuple[str, ...]] = ( "brain_initialized", "first_correction_captured", "first_graduation", "first_hook_installed", ) +HEARTBEAT_EVENTS: Final[tuple[str, ...]] = ("wau_ping",) +TELEMETRY_EVENTS: Final[tuple[str, ...]] = ACTIVATION_EVENTS + HEARTBEAT_EVENTS ActivationEvent = Literal[ "brain_initialized", @@ -100,6 +102,13 @@ def _config_path() -> Path: "first_graduation", "first_hook_installed", ] +TelemetryEventName = Literal[ + "brain_initialized", + "first_correction_captured", + "first_graduation", + "first_hook_installed", + "wau_ping", +] # ── Config I/O ──────────────────────────────────────────────────────── @@ -258,11 +267,11 @@ def _build_payload(event: str) -> dict[str, str]: } -def _post(payload: dict[str, str], timeout: float = 3.0) -> bool: +def _post(payload: dict[str, str], timeout: float = 3.0, endpoint: str | None = None) -> bool: """Best-effort POST. Never raises. Returns True on 2xx.""" body = json.dumps(payload).encode("utf-8") req = urllib.request.Request( - _endpoint(), + endpoint or _endpoint(), data=body, headers={"Content-Type": "application/json"}, method="POST", @@ -276,13 +285,13 @@ def _post(payload: dict[str, str], timeout: float = 3.0) -> bool: def send_event(event: str, *, blocking: bool = False) -> None: - """Fire an activation event if the user opted in. + """Fire an anonymous telemetry event if the user opted in. Runs in a background thread by default so it never blocks the user. Pass ``blocking=True`` in tests. """ - if event not in ACTIVATION_EVENTS: - raise ValueError(f"Unknown activation event: {event!r}") + if event not in TELEMETRY_EVENTS: + raise ValueError(f"Unknown telemetry event: {event!r}") if not is_enabled(): return payload = _build_payload(event) @@ -295,6 +304,56 @@ def send_event(event: str, *, blocking: bool = False) -> None: thread.start() +def _ping_endpoint() -> str: + endpoint = _endpoint() + if endpoint.rstrip("/").endswith("/telemetry/event"): + return endpoint.rstrip("/")[: -len("/telemetry/event")] + "/telemetry/ping" + return endpoint + + +def send_session_ping(*, blocking: bool = False) -> None: + """Best-effort anonymous WAU heartbeat for a session start. + + Payload is the same no-PII four-field shape as activation telemetry, with + ``event='wau_ping'``. Default is off unless the user opted in. + """ + if not is_enabled(): + return + payload = _build_payload("wau_ping") + ping_endpoint = _ping_endpoint() + + if blocking: + _post(payload, endpoint=ping_endpoint) + return + + thread = threading.Thread(target=_post, args=(payload,), kwargs={"endpoint": ping_endpoint}, daemon=True) + thread.start() + + +def fetch_wau(timeout: float = 3.0) -> dict[str, object]: + """Fetch live WAU aggregate from the telemetry endpoint. + + Uses the public aggregate endpoint; returns a small error dict instead of + raising so ``gradata telemetry wau`` never crashes because metrics are down. + """ + endpoint = _endpoint().rstrip("/") + if endpoint.endswith("/telemetry/event"): + endpoint = endpoint[: -len("/telemetry/event")] + elif endpoint.endswith("/telemetry/ping"): + endpoint = endpoint[: -len("/telemetry/ping")] + url = endpoint + "/telemetry/wau" + req = urllib.request.Request(url, headers={"User-Agent": "gradata-telemetry/1.0"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = resp.read().decode("utf-8") + data = json.loads(body) + if isinstance(data, dict): + return data + except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc: + logger.debug("telemetry WAU fetch failed: %s", exc) + return {"wau": 0, "error": "unavailable"} + + # ── First-fire guard (activation events fire once per machine) ──────── def _event_flag_key(event: str) -> str: return f"telemetry.fired_{event}" diff --git a/Gradata/src/gradata/cli.py b/Gradata/src/gradata/cli.py index 34fe147d..2cec6328 100644 --- a/Gradata/src/gradata/cli.py +++ b/Gradata/src/gradata/cli.py @@ -132,6 +132,24 @@ def cmd_stats(args): print(f" Has embeddings: {stats['has_embeddings']}") +def cmd_telemetry(args): + """Telemetry visibility commands.""" + from gradata import _telemetry + + if args.telemetry_cmd == "wau": + data = _telemetry.fetch_wau() + if args.json: + print(json.dumps(data, indent=2, sort_keys=True)) + return + print(f"WAU: {data.get('wau', 0)}") + if data.get("week_start"): + print(f"Week start: {data['week_start']}") + if data.get("error"): + print(f"Status: {data['error']}") + return + raise SystemExit("unknown telemetry command") + + def cmd_status(args): """Single human-readable summary of brain health. @@ -1988,6 +2006,12 @@ def main(): # stats sub.add_parser("stats", help="Brain statistics") + # telemetry + p_telemetry = sub.add_parser("telemetry", help="Anonymous opt-in telemetry commands") + telemetry_sub = p_telemetry.add_subparsers(dest="telemetry_cmd", required=True) + p_wau = telemetry_sub.add_parser("wau", help="Show live weekly active user count") + p_wau.add_argument("--json", action="store_true", help="Output raw aggregate JSON") + # status (umbrella health check: stats + daemon + cloud + convergence) sub.add_parser("status", help="Single-page brain/daemon/cloud summary") @@ -2347,6 +2371,7 @@ def main(): "embed": cmd_embed, "manifest": cmd_manifest, "stats": cmd_stats, + "telemetry": cmd_telemetry, "status": cmd_status, "audit": cmd_audit, "sync": cmd_sync, diff --git a/Gradata/src/gradata/hooks/session_boot.py b/Gradata/src/gradata/hooks/session_boot.py index b2910670..0cc6ddc1 100644 --- a/Gradata/src/gradata/hooks/session_boot.py +++ b/Gradata/src/gradata/hooks/session_boot.py @@ -54,6 +54,13 @@ def main(_data: dict) -> dict | None: if not db_path.is_file(): return None + try: + from gradata import _telemetry + + _telemetry.send_session_ping() + except Exception as e: + _log.debug("session telemetry skipped: %s", e) + try: from gradata._events import emit from gradata._file_lock import platform_lock diff --git a/Gradata/tests/test_telemetry.py b/Gradata/tests/test_telemetry.py index 82b7429c..949a0338 100644 --- a/Gradata/tests/test_telemetry.py +++ b/Gradata/tests/test_telemetry.py @@ -136,6 +136,26 @@ def test_posts_when_enabled(self): payload = post.call_args[0][0] assert payload["event"] == "brain_initialized" + def test_session_ping_posts_wau_to_ping_endpoint(self, monkeypatch): + _telemetry.set_enabled(True) + monkeypatch.setenv(_telemetry.ENV_ENDPOINT, "https://api.example.com/telemetry/event") + with patch.object(_telemetry, "_post", return_value=True) as post: + _telemetry.send_session_ping(blocking=True) + post.assert_called_once() + assert post.call_args[0][0]["event"] == "wau_ping" + assert _telemetry._ping_endpoint() == "https://api.example.com/telemetry/ping" + + def test_session_ping_noop_when_disabled(self): + with patch.object(_telemetry, "_post") as post: + _telemetry.send_session_ping(blocking=True) + post.assert_not_called() + + def test_fetch_wau_returns_error_dict_on_failure(self, monkeypatch): + monkeypatch.setenv(_telemetry.ENV_ENDPOINT, "http://127.0.0.1:9/telemetry/event") + data = _telemetry.fetch_wau(timeout=0.01) + assert data["wau"] == 0 + assert data["error"] == "unavailable" + def test_respects_kill_switch(self, monkeypatch): _telemetry.set_enabled(True) monkeypatch.setenv(_telemetry.ENV_KILL_SWITCH, "0")