From 89b60996cd3aec6ad3d2d6fefa891817ff8d6517 Mon Sep 17 00:00:00 2001 From: xav-db Date: Fri, 12 Dec 2025 22:23:36 +0000 Subject: [PATCH 1/5] implementing device id usage for anonymous unique user metrics --- Cargo.lock | 2 + helix-cli/Cargo.toml | 2 + helix-cli/src/commands/metrics.rs | 4 + helix-cli/src/metrics_sender.rs | 191 ++++++++++++++++++++++++++++++ metrics/src/events.rs | 1 + metrics/src/lib.rs | 3 +- 6 files changed, 201 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1582435d..a9111d2fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1374,6 +1374,7 @@ dependencies = [ "heed3", "helix-db", "helix-metrics", + "hex", "indicatif 0.18.3", "iota", "open", @@ -1383,6 +1384,7 @@ dependencies = [ "self_update", "serde", "serde_json", + "sha2", "tempfile", "tokio", "tokio-tungstenite", diff --git a/helix-cli/Cargo.toml b/helix-cli/Cargo.toml index 1b3476e62..b59bc3598 100644 --- a/helix-cli/Cargo.toml +++ b/helix-cli/Cargo.toml @@ -30,6 +30,8 @@ indicatif = "0.18.3" webbrowser = "1.0" heed3 = "0.22.0" open = "5.3" +sha2 = "0.10" +hex = "0.4" [dev-dependencies] tempfile = "3.23.0" diff --git a/helix-cli/src/commands/metrics.rs b/helix-cli/src/commands/metrics.rs index 51dbf68df..71f95aff5 100644 --- a/helix-cli/src/commands/metrics.rs +++ b/helix-cli/src/commands/metrics.rs @@ -79,6 +79,10 @@ async fn show_metrics_status() -> Result<()> { print_field("User ID", user_id); } + if let Some(device_id) = &config.device_id { + print_field("Device ID", device_id); + } + let last_updated = std::time::UNIX_EPOCH + std::time::Duration::from_secs(config.last_updated); if let Ok(datetime) = last_updated.duration_since(std::time::UNIX_EPOCH) { print_field( diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index d184b4d23..3bfe7bf8b 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -8,10 +8,12 @@ use helix_metrics::events::{ }; use reqwest::Client; use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; use std::{ fs::{self, File, OpenOptions}, io::{BufWriter, Write}, path::PathBuf, + process::Command, }; use tokio::task::JoinHandle; @@ -32,6 +34,7 @@ pub struct MetricsConfig { pub user_id: Option<&'static str>, pub email: Option<&'static str>, pub name: Option<&'static str>, + pub device_id: Option<&'static str>, pub last_updated: u64, pub install_event_sent: bool, } @@ -43,6 +46,7 @@ impl Default for MetricsConfig { user_id: None, email: None, name: None, + device_id: get_device_id(), last_updated: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() @@ -59,6 +63,7 @@ impl MetricsConfig { user_id, email: None, name: None, + device_id: get_device_id(), last_updated: std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap() @@ -246,6 +251,7 @@ impl MetricsSender { event_data: EventData::CliInstall, user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -278,6 +284,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -305,6 +312,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -332,6 +340,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -359,6 +368,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -387,6 +397,7 @@ impl MetricsSender { }), user_id: get_user_id(), email: get_email(), + device_id: get_device_id(), timestamp: get_current_timestamp(), }; self.send_event(event); @@ -411,3 +422,183 @@ fn get_current_timestamp() -> u64 { .unwrap() .as_secs() } + +/// Get a deterministic device ID derived from the machine's unique identifier. +/// This ID is stable across CLI reinstalls and file deletions. +fn get_device_id() -> Option<&'static str> { + get_machine_id() + .map(|id| hash_to_device_id(&id)) + .map(|s| -> &'static str { s.leak() }) +} + +/// Hash the machine ID to create a privacy-preserving device identifier. +fn hash_to_device_id(machine_id: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(b"helix-device-id:"); + hasher.update(machine_id.as_bytes()); + let result = hasher.finalize(); + // Use first 16 bytes (32 hex chars) for a shorter but still unique ID + hex::encode(&result[..16]) +} + +/// Get the machine's unique identifier (platform-specific). +#[cfg(target_os = "macos")] +fn get_machine_id() -> Option { + // macOS: Use IOPlatformUUID from IOKit + Command::new("ioreg") + .args(["-rd1", "-c", "IOPlatformExpertDevice"]) + .output() + .ok() + .and_then(|output| { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .lines() + .find(|line| line.contains("IOPlatformUUID")) + .and_then(|line| { + line.split('"') + .nth(3) + .map(|s| s.to_string()) + }) + }) +} + +#[cfg(target_os = "linux")] +fn get_machine_id() -> Option { + // Linux: Read from /etc/machine-id or /var/lib/dbus/machine-id + fs::read_to_string("/etc/machine-id") + .or_else(|_| fs::read_to_string("/var/lib/dbus/machine-id")) + .ok() + .map(|s| s.trim().to_string()) +} + +#[cfg(target_os = "windows")] +fn get_machine_id() -> Option { + // Windows: Read MachineGuid from registry + Command::new("reg") + .args([ + "query", + r"HKLM\SOFTWARE\Microsoft\Cryptography", + "/v", + "MachineGuid", + ]) + .output() + .ok() + .and_then(|output| { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .lines() + .find(|line| line.contains("MachineGuid")) + .and_then(|line| line.split_whitespace().last()) + .map(|s| s.to_string()) + }) +} + +#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] +fn get_machine_id() -> Option { + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_machine_id_returns_value() { + // Machine ID should be available on macOS, Linux, and Windows + let machine_id = get_machine_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + assert!( + machine_id.is_some(), + "Machine ID should be available on this platform" + ); + let id = machine_id.unwrap(); + assert!(!id.is_empty(), "Machine ID should not be empty"); + println!("Machine ID: {}", id); + } + + #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] + { + // On unsupported platforms, it's expected to be None + assert!(machine_id.is_none()); + } + } + + #[test] + fn test_hash_to_device_id_produces_consistent_hash() { + let machine_id = "test-machine-id-12345"; + let hash1 = hash_to_device_id(machine_id); + let hash2 = hash_to_device_id(machine_id); + + assert_eq!(hash1, hash2, "Same input should produce same hash"); + assert_eq!(hash1.len(), 32, "Hash should be 32 hex characters (16 bytes)"); + + // Verify it's valid hex + assert!( + hash1.chars().all(|c| c.is_ascii_hexdigit()), + "Hash should only contain hex digits" + ); + } + + #[test] + fn test_hash_to_device_id_different_inputs_produce_different_hashes() { + let hash1 = hash_to_device_id("machine-id-1"); + let hash2 = hash_to_device_id("machine-id-2"); + + assert_ne!(hash1, hash2, "Different inputs should produce different hashes"); + } + + #[test] + fn test_get_device_id_is_deterministic() { + // Get device ID twice - should be the same + let device_id1 = get_device_id(); + let device_id2 = get_device_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + assert!(device_id1.is_some(), "Device ID should be available"); + assert!(device_id2.is_some(), "Device ID should be available"); + assert_eq!( + device_id1.unwrap(), + device_id2.unwrap(), + "Device ID should be deterministic" + ); + println!("Device ID: {}", device_id1.unwrap()); + } + } + + #[test] + fn test_device_id_format() { + let device_id = get_device_id(); + + #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] + { + let id = device_id.expect("Device ID should be available"); + assert_eq!(id.len(), 32, "Device ID should be 32 characters"); + assert!( + id.chars().all(|c| c.is_ascii_hexdigit()), + "Device ID should only contain hex digits" + ); + } + } + + #[test] + fn test_hash_includes_salt() { + // The hash function includes a salt "helix-device-id:" + // This ensures different apps using machine ID get different hashes + let machine_id = "same-machine-id"; + + // Direct SHA256 of machine_id without salt would be different + let mut hasher = Sha256::new(); + hasher.update(machine_id.as_bytes()); + let direct_hash = hex::encode(&hasher.finalize()[..16]); + + let salted_hash = hash_to_device_id(machine_id); + + assert_ne!( + direct_hash, salted_hash, + "Salted hash should differ from unsalted" + ); + } +} diff --git a/metrics/src/events.rs b/metrics/src/events.rs index 27cd145d8..e8e135898 100644 --- a/metrics/src/events.rs +++ b/metrics/src/events.rs @@ -51,6 +51,7 @@ pub struct RawEvent { pub event_data: D, pub user_id: Option<&'static str>, pub email: Option<&'static str>, + pub device_id: Option<&'static str>, pub timestamp: u64, } diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs index ad6039f47..d83bc245e 100644 --- a/metrics/src/lib.rs +++ b/metrics/src/lib.rs @@ -213,6 +213,7 @@ fn create_raw_event( .expect("Failed to get system time") .as_secs(), email: None, + device_id: None, } } @@ -602,7 +603,6 @@ mod tests { // Channel should have fewer or equal batches let _final_count = METRICS_STATE.events_rx.len(); - } } @@ -691,5 +691,4 @@ mod tests { assert!(json_str.starts_with('[')); assert!(json_str.ends_with(']')); } - } From cb382978f00b7d38e91d6ac8b411ed23106e66fb Mon Sep 17 00:00:00 2001 From: xav-db Date: Fri, 12 Dec 2025 23:00:35 +0000 Subject: [PATCH 2/5] fixing so device id is passed to docker as env var --- helix-cli/src/docker.rs | 5 +++++ helix-cli/src/metrics_sender.rs | 2 +- metrics/src/lib.rs | 9 ++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/helix-cli/src/docker.rs b/helix-cli/src/docker.rs index fbfa237da..dd3dd4133 100644 --- a/helix-cli/src/docker.rs +++ b/helix-cli/src/docker.rs @@ -117,6 +117,11 @@ impl<'a> DockerManager<'a> { env_vars.push(format!("GEMINI_API_KEY={gemini_key}")); } + // Pass device ID to container for metrics correlation + if let Some(device_id) = crate::metrics_sender::get_device_id() { + env_vars.push(format!("HELIX_DEVICE_ID={device_id}")); + } + env_vars } diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index 3bfe7bf8b..5d5a7f1a8 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -425,7 +425,7 @@ fn get_current_timestamp() -> u64 { /// Get a deterministic device ID derived from the machine's unique identifier. /// This ID is stable across CLI reinstalls and file deletions. -fn get_device_id() -> Option<&'static str> { +pub fn get_device_id() -> Option<&'static str> { get_machine_id() .map(|id| hash_to_device_id(&id)) .map(|s| -> &'static str { s.leak() }) diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs index d83bc245e..36dd27765 100644 --- a/metrics/src/lib.rs +++ b/metrics/src/lib.rs @@ -47,6 +47,13 @@ pub static METRICS_ENABLED: LazyLock = LazyLock::new(|| { true }); +/// Device ID passed from CLI when deploying container (for metrics correlation) +pub static HELIX_DEVICE_ID: LazyLock> = LazyLock::new(|| { + std::env::var("HELIX_DEVICE_ID") + .ok() + .map(|s| -> &'static str { s.leak() }) +}); + pub const METRICS_URL: &str = "https://logs.helix-db.com/v2"; // Thread-local buffer for events @@ -213,7 +220,7 @@ fn create_raw_event( .expect("Failed to get system time") .as_secs(), email: None, - device_id: None, + device_id: *HELIX_DEVICE_ID, } } From 2a472a78d292b3665c2f12fdd8fe7b3b305eefc4 Mon Sep 17 00:00:00 2001 From: xav-db Date: Mon, 12 Jan 2026 09:54:15 +0000 Subject: [PATCH 3/5] clippy fix --- helix-cli/src/metrics_sender.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index 5d5a7f1a8..e1a6751f0 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -1,6 +1,6 @@ use chrono::{Local, NaiveDate}; use dirs::home_dir; -use eyre::{eyre, OptionExt, Result}; +use eyre::{OptionExt, Result, eyre}; use flume::{Receiver, Sender, unbounded}; use helix_metrics::events::{ CompileEvent, DeployCloudEvent, DeployLocalEvent, EventData, EventType, RawEvent, @@ -13,7 +13,6 @@ use std::{ fs::{self, File, OpenOptions}, io::{BufWriter, Write}, path::PathBuf, - process::Command, }; use tokio::task::JoinHandle; @@ -445,6 +444,7 @@ fn hash_to_device_id(machine_id: &str) -> String { #[cfg(target_os = "macos")] fn get_machine_id() -> Option { // macOS: Use IOPlatformUUID from IOKit + use std::process::Command; Command::new("ioreg") .args(["-rd1", "-c", "IOPlatformExpertDevice"]) .output() @@ -454,11 +454,7 @@ fn get_machine_id() -> Option { stdout .lines() .find(|line| line.contains("IOPlatformUUID")) - .and_then(|line| { - line.split('"') - .nth(3) - .map(|s| s.to_string()) - }) + .and_then(|line| line.split('"').nth(3).map(|s| s.to_string())) }) } @@ -474,6 +470,7 @@ fn get_machine_id() -> Option { #[cfg(target_os = "windows")] fn get_machine_id() -> Option { // Windows: Read MachineGuid from registry + use std::process::Command; Command::new("reg") .args([ "query", @@ -532,7 +529,11 @@ mod tests { let hash2 = hash_to_device_id(machine_id); assert_eq!(hash1, hash2, "Same input should produce same hash"); - assert_eq!(hash1.len(), 32, "Hash should be 32 hex characters (16 bytes)"); + assert_eq!( + hash1.len(), + 32, + "Hash should be 32 hex characters (16 bytes)" + ); // Verify it's valid hex assert!( @@ -546,7 +547,10 @@ mod tests { let hash1 = hash_to_device_id("machine-id-1"); let hash2 = hash_to_device_id("machine-id-2"); - assert_ne!(hash1, hash2, "Different inputs should produce different hashes"); + assert_ne!( + hash1, hash2, + "Different inputs should produce different hashes" + ); } #[test] From 537eaaae471a5c29b0faaed490c2c7c8342c4573 Mon Sep 17 00:00:00 2001 From: xav-db Date: Tue, 27 Jan 2026 09:06:37 +0000 Subject: [PATCH 4/5] fix: wrap get_device_id() in LazyLock to prevent repeated memory leak. The .leak() call now only executes once via LazyLock, ensuring the device ID string is leaked exactly once rather than on every call. --- helix-cli/src/metrics_sender.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index e1a6751f0..52ee10911 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -425,9 +425,13 @@ fn get_current_timestamp() -> u64 { /// Get a deterministic device ID derived from the machine's unique identifier. /// This ID is stable across CLI reinstalls and file deletions. pub fn get_device_id() -> Option<&'static str> { - get_machine_id() - .map(|id| hash_to_device_id(&id)) - .map(|s| -> &'static str { s.leak() }) + use std::sync::LazyLock; + static DEVICE_ID: LazyLock> = LazyLock::new(|| { + get_machine_id() + .map(|id| hash_to_device_id(&id)) + .map(|s| -> &'static str { s.leak() }) + }); + *DEVICE_ID } /// Hash the machine ID to create a privacy-preserving device identifier. From 6dca735df056e0099f0dac7281a1b752a6b4dfcb Mon Sep 17 00:00:00 2001 From: xav-db Date: Thu, 29 Jan 2026 08:42:09 +0000 Subject: [PATCH 5/5] clippy checks --- helix-cli/src/commands/metrics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/helix-cli/src/commands/metrics.rs b/helix-cli/src/commands/metrics.rs index 8e0aa6d54..83492d049 100644 --- a/helix-cli/src/commands/metrics.rs +++ b/helix-cli/src/commands/metrics.rs @@ -4,6 +4,7 @@ use crate::{ MetricsAction, metrics_sender::{MetricsLevel, load_metrics_config, save_metrics_config}, output, + utils::print_field, }; use color_eyre::owo_colors::OwoColorize; use eyre::Result;