diff --git a/Cargo.lock b/Cargo.lock index 07da69b4..fedc8caf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1442,6 +1442,7 @@ dependencies = [ "heed3", "helix-db", "helix-metrics", + "hex", "indicatif 0.18.3", "iota", "open", @@ -1452,6 +1453,7 @@ dependencies = [ "self_update", "serde", "serde_json", + "sha2", "tempfile", "tokio", "tokio-tungstenite", diff --git a/helix-cli/Cargo.toml b/helix-cli/Cargo.toml index 13563a9a..d8a1b62f 100644 --- a/helix-cli/Cargo.toml +++ b/helix-cli/Cargo.toml @@ -31,6 +31,8 @@ indicatif = "0.18.3" webbrowser = "1.0" heed3 = "0.22.0" open = "5.3" +sha2 = "0.10" +hex = "0.4" ratatui = "0.29" crossterm = "0.28" diff --git a/helix-cli/src/commands/metrics.rs b/helix-cli/src/commands/metrics.rs index d0820413..83492d04 100644 --- a/helix-cli/src/commands/metrics.rs +++ b/helix-cli/src/commands/metrics.rs @@ -4,6 +4,7 @@ use crate::{ MetricsAction, metrics_sender::{MetricsLevel, load_metrics_config, save_metrics_config}, output, + utils::print_field, }; use color_eyre::owo_colors::OwoColorize; use eyre::Result; @@ -84,6 +85,10 @@ async fn show_metrics_status() -> Result<()> { println!(" {}: {user_id}", "User ID".bright_white().bold()); } + if let Some(device_id) = &config.device_id { + print_field("Device ID", device_id); + } + let last_updated = std::time::UNIX_EPOCH + std::time::Duration::from_secs(config.last_updated); if let Ok(datetime) = last_updated.duration_since(std::time::UNIX_EPOCH) { println!( diff --git a/helix-cli/src/docker.rs b/helix-cli/src/docker.rs index 80ae4083..cee12b09 100644 --- a/helix-cli/src/docker.rs +++ b/helix-cli/src/docker.rs @@ -144,6 +144,11 @@ impl<'a> DockerManager<'a> { env_vars.push(format!("GEMINI_API_KEY={gemini_key}")); } + // Pass device ID to container for metrics correlation + if let Some(device_id) = crate::metrics_sender::get_device_id() { + env_vars.push(format!("HELIX_DEVICE_ID={device_id}")); + } + env_vars } diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index 3c4fd0f4..52ee1091 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -8,6 +8,7 @@ use helix_metrics::events::{ }; use reqwest::Client; use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; use std::{ fs::{self, File, OpenOptions}, io::{BufWriter, Write}, @@ -32,6 +33,7 @@ pub struct MetricsConfig { pub user_id: Option<&'static str>, pub email: Option<&'static str>, pub name: Option<&'static str>, + pub device_id: Option<&'static str>, pub last_updated: u64, pub install_event_sent: bool, } @@ -43,6 +45,7 @@ impl Default for MetricsConfig { user_id: None, email: None, name: None, + device_id: get_device_id(), last_updated: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() @@ -59,6 +62,7 @@ impl MetricsConfig { user_id, email: None, name: None, + device_id: get_device_id(), last_updated: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() @@ -246,6 +250,7 @@ impl MetricsSender { event_data: EventData::CliInstall, user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -278,6 +283,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -305,6 +311,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -332,6 +339,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -359,6 +367,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -387,6 +396,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -411,3 +421,192 @@ fn get_current_timestamp() -> u64 { .unwrap() .as_secs() } + +/// Get a deterministic device ID derived from the machine's unique identifier. +/// This ID is stable across CLI reinstalls and file deletions. +pub fn get_device_id() -> Option<&'static str> { + use std::sync::LazyLock; + static DEVICE_ID: LazyLock> = LazyLock::new(|| { + get_machine_id() + .map(|id| hash_to_device_id(&id)) + .map(|s| -> &'static str { s.leak() }) + }); + *DEVICE_ID +} + +/// Hash the machine ID to create a privacy-preserving device identifier. +fn hash_to_device_id(machine_id: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(b"helix-device-id:"); + hasher.update(machine_id.as_bytes()); + let result = hasher.finalize(); + // Use first 16 bytes (32 hex chars) for a shorter but still unique ID + hex::encode(&result[..16]) +} + +/// Get the machine's unique identifier (platform-specific). +#[cfg(target_os = "macos")] +fn get_machine_id() -> Option { + // macOS: Use IOPlatformUUID from IOKit + use std::process::Command; + Command::new("ioreg") + .args(["-rd1", "-c", "IOPlatformExpertDevice"]) + .output() + .ok() + .and_then(|output| { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .lines() + .find(|line| line.contains("IOPlatformUUID")) + .and_then(|line| line.split('"').nth(3).map(|s| s.to_string())) + }) +} + +#[cfg(target_os = "linux")] +fn get_machine_id() -> Option { + // Linux: Read from /etc/machine-id or /var/lib/dbus/machine-id + fs::read_to_string("/etc/machine-id") + .or_else(|_| fs::read_to_string("/var/lib/dbus/machine-id")) + .ok() + .map(|s| s.trim().to_string()) +} + +#[cfg(target_os = "windows")] +fn get_machine_id() -> Option { + // Windows: Read MachineGuid from registry + use std::process::Command; + Command::new("reg") + .args([ + "query", + r"HKLM\SOFTWARE\Microsoft\Cryptography", + "/v", + "MachineGuid", + ]) + .output() + .ok() + .and_then(|output| { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .lines() + .find(|line| line.contains("MachineGuid")) + .and_then(|line| line.split_whitespace().last()) + .map(|s| s.to_string()) + }) +} + +#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] +fn get_machine_id() -> Option { + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_machine_id_returns_value() { + // Machine ID should be available on macOS, Linux, and Windows + let machine_id = get_machine_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + assert!( + machine_id.is_some(), + "Machine ID should be available on this platform" + ); + let id = machine_id.unwrap(); + assert!(!id.is_empty(), "Machine ID should not be empty"); + println!("Machine ID: {}", id); + } + + #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] + { + // On unsupported platforms, it's expected to be None + assert!(machine_id.is_none()); + } + } + + #[test] + fn test_hash_to_device_id_produces_consistent_hash() { + let machine_id = "test-machine-id-12345"; + let hash1 = hash_to_device_id(machine_id); + let hash2 = hash_to_device_id(machine_id); + + assert_eq!(hash1, hash2, "Same input should produce same hash"); + assert_eq!( + hash1.len(), + 32, + "Hash should be 32 hex characters (16 bytes)" + ); + + // Verify it's valid hex + assert!( + hash1.chars().all(|c| c.is_ascii_hexdigit()), + "Hash should only contain hex digits" + ); + } + + #[test] + fn test_hash_to_device_id_different_inputs_produce_different_hashes() { + let hash1 = hash_to_device_id("machine-id-1"); + let hash2 = hash_to_device_id("machine-id-2"); + + assert_ne!( + hash1, hash2, + "Different inputs should produce different hashes" + ); + } + + #[test] + fn test_get_device_id_is_deterministic() { + // Get device ID twice - should be the same + let device_id1 = get_device_id(); + let device_id2 = get_device_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + assert!(device_id1.is_some(), "Device ID should be available"); + assert!(device_id2.is_some(), "Device ID should be available"); + assert_eq!( + device_id1.unwrap(), + device_id2.unwrap(), + "Device ID should be deterministic" + ); + println!("Device ID: {}", device_id1.unwrap()); + } + } + + #[test] + fn test_device_id_format() { + let device_id = get_device_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + let id = device_id.expect("Device ID should be available"); + assert_eq!(id.len(), 32, "Device ID should be 32 characters"); + assert!( + id.chars().all(|c| c.is_ascii_hexdigit()), + "Device ID should only contain hex digits" + ); + } + } + + #[test] + fn test_hash_includes_salt() { + // The hash function includes a salt "helix-device-id:" + // This ensures different apps using machine ID get different hashes + let machine_id = "same-machine-id"; + + // Direct SHA256 of machine_id without salt would be different + let mut hasher = Sha256::new(); + hasher.update(machine_id.as_bytes()); + let direct_hash = hex::encode(&hasher.finalize()[..16]); + + let salted_hash = hash_to_device_id(machine_id); + + assert_ne!( + direct_hash, salted_hash, + "Salted hash should differ from unsalted" + ); + } +} diff --git a/metrics/src/events.rs b/metrics/src/events.rs index 75406603..3a5c4c9f 100644 --- a/metrics/src/events.rs +++ b/metrics/src/events.rs @@ -51,6 +51,7 @@ pub struct RawEvent { pub event_data: D, pub user_id: Option<&'static str>, pub email: Option<&'static str>, + pub device_id: Option<&'static str>, pub timestamp: u64, } diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs index 2277af31..2a88289f 100644 --- a/metrics/src/lib.rs +++ b/metrics/src/lib.rs @@ -47,6 +47,13 @@ pub static METRICS_ENABLED: LazyLock = LazyLock::new(|| { true }); +/// Device ID passed from CLI when deploying container (for metrics correlation) +pub static HELIX_DEVICE_ID: LazyLock> = LazyLock::new(|| { + std::env::var("HELIX_DEVICE_ID") + .ok() + .map(|s| -> &'static str { s.leak() }) +}); + pub const METRICS_URL: &str = "https://logs.helix-db.com/v2"; // Thread-local buffer for events @@ -213,6 +220,7 @@ fn create_raw_event( .expect("Failed to get system time") .as_secs(), email: None, + device_id: *HELIX_DEVICE_ID, } }