-
-
Notifications
You must be signed in to change notification settings - Fork 216
impr (cli): Device id for uniqueness #797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
89b6099
cb38297
2a472a7
537eaaa
ba42146
6dca735
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ use helix_metrics::events::{ | |
| }; | ||
| use reqwest::Client; | ||
| use serde::{Deserialize, Serialize}; | ||
| use sha2::{Digest, Sha256}; | ||
| use std::{ | ||
| fs::{self, File, OpenOptions}, | ||
| io::{BufWriter, Write}, | ||
|
|
@@ -32,6 +33,7 @@ pub struct MetricsConfig { | |
| pub user_id: Option<&'static str>, | ||
| pub email: Option<&'static str>, | ||
| pub name: Option<&'static str>, | ||
| pub device_id: Option<&'static str>, | ||
| pub last_updated: u64, | ||
| pub install_event_sent: bool, | ||
| } | ||
|
|
@@ -43,6 +45,7 @@ impl Default for MetricsConfig { | |
| user_id: None, | ||
| email: None, | ||
| name: None, | ||
| device_id: get_device_id(), | ||
| last_updated: std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap() | ||
|
|
@@ -59,6 +62,7 @@ impl MetricsConfig { | |
| user_id, | ||
| email: None, | ||
| name: None, | ||
| device_id: get_device_id(), | ||
| last_updated: std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap() | ||
|
|
@@ -246,6 +250,7 @@ impl MetricsSender { | |
| event_data: EventData::CliInstall, | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -278,6 +283,7 @@ impl MetricsSender { | |
| }), | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -305,6 +311,7 @@ impl MetricsSender { | |
| }), | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -332,6 +339,7 @@ impl MetricsSender { | |
| }), | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -359,6 +367,7 @@ impl MetricsSender { | |
| }), | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -387,6 +396,7 @@ impl MetricsSender { | |
| }), | ||
| user_id: get_user_id(), | ||
| email: get_email(), | ||
| device_id: get_device_id(), | ||
| timestamp: get_current_timestamp(), | ||
| }; | ||
| self.send_event(event); | ||
|
|
@@ -411,3 +421,192 @@ fn get_current_timestamp() -> u64 { | |
| .unwrap() | ||
| .as_secs() | ||
| } | ||
|
|
||
| /// Get a deterministic device ID derived from the machine's unique identifier. | ||
| /// This ID is stable across CLI reinstalls and file deletions. | ||
| pub fn get_device_id() -> Option<&'static str> { | ||
| use std::sync::LazyLock; | ||
| static DEVICE_ID: LazyLock<Option<&'static str>> = LazyLock::new(|| { | ||
| get_machine_id() | ||
| .map(|id| hash_to_device_id(&id)) | ||
| .map(|s| -> &'static str { s.leak() }) | ||
| }); | ||
| *DEVICE_ID | ||
| } | ||
|
|
||
| /// Hash the machine ID to create a privacy-preserving device identifier. | ||
| fn hash_to_device_id(machine_id: &str) -> String { | ||
| let mut hasher = Sha256::new(); | ||
| hasher.update(b"helix-device-id:"); | ||
| hasher.update(machine_id.as_bytes()); | ||
| let result = hasher.finalize(); | ||
| // Use first 16 bytes (32 hex chars) for a shorter but still unique ID | ||
| hex::encode(&result[..16]) | ||
| } | ||
|
|
||
| /// Get the machine's unique identifier (platform-specific). | ||
| #[cfg(target_os = "macos")] | ||
| fn get_machine_id() -> Option<String> { | ||
| // macOS: Use IOPlatformUUID from IOKit | ||
| use std::process::Command; | ||
| Command::new("ioreg") | ||
| .args(["-rd1", "-c", "IOPlatformExpertDevice"]) | ||
| .output() | ||
| .ok() | ||
| .and_then(|output| { | ||
| let stdout = String::from_utf8_lossy(&output.stdout); | ||
| stdout | ||
| .lines() | ||
| .find(|line| line.contains("IOPlatformUUID")) | ||
| .and_then(|line| line.split('"').nth(3).map(|s| s.to_string())) | ||
| }) | ||
| } | ||
|
|
||
| #[cfg(target_os = "linux")] | ||
| fn get_machine_id() -> Option<String> { | ||
| // Linux: Read from /etc/machine-id or /var/lib/dbus/machine-id | ||
| fs::read_to_string("/etc/machine-id") | ||
| .or_else(|_| fs::read_to_string("/var/lib/dbus/machine-id")) | ||
| .ok() | ||
| .map(|s| s.trim().to_string()) | ||
| } | ||
|
|
||
| #[cfg(target_os = "windows")] | ||
| fn get_machine_id() -> Option<String> { | ||
| // Windows: Read MachineGuid from registry | ||
| use std::process::Command; | ||
| Command::new("reg") | ||
| .args([ | ||
| "query", | ||
| r"HKLM\SOFTWARE\Microsoft\Cryptography", | ||
| "/v", | ||
| "MachineGuid", | ||
| ]) | ||
| .output() | ||
| .ok() | ||
| .and_then(|output| { | ||
| let stdout = String::from_utf8_lossy(&output.stdout); | ||
| stdout | ||
| .lines() | ||
| .find(|line| line.contains("MachineGuid")) | ||
| .and_then(|line| line.split_whitespace().last()) | ||
| .map(|s| s.to_string()) | ||
| }) | ||
| } | ||
|
|
||
| #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] | ||
| fn get_machine_id() -> Option<String> { | ||
| None | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_get_machine_id_returns_value() { | ||
| // Machine ID should be available on macOS, Linux, and Windows | ||
| let machine_id = get_machine_id(); | ||
|
|
||
| #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] | ||
| { | ||
| assert!( | ||
| machine_id.is_some(), | ||
| "Machine ID should be available on this platform" | ||
| ); | ||
| let id = machine_id.unwrap(); | ||
| assert!(!id.is_empty(), "Machine ID should not be empty"); | ||
| println!("Machine ID: {}", id); | ||
| } | ||
|
|
||
| #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))] | ||
| { | ||
| // On unsupported platforms, it's expected to be None | ||
| assert!(machine_id.is_none()); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_hash_to_device_id_produces_consistent_hash() { | ||
| let machine_id = "test-machine-id-12345"; | ||
| let hash1 = hash_to_device_id(machine_id); | ||
| let hash2 = hash_to_device_id(machine_id); | ||
|
|
||
| assert_eq!(hash1, hash2, "Same input should produce same hash"); | ||
| assert_eq!( | ||
| hash1.len(), | ||
| 32, | ||
| "Hash should be 32 hex characters (16 bytes)" | ||
| ); | ||
|
|
||
| // Verify it's valid hex | ||
| assert!( | ||
| hash1.chars().all(|c| c.is_ascii_hexdigit()), | ||
| "Hash should only contain hex digits" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_hash_to_device_id_different_inputs_produce_different_hashes() { | ||
| let hash1 = hash_to_device_id("machine-id-1"); | ||
| let hash2 = hash_to_device_id("machine-id-2"); | ||
|
|
||
| assert_ne!( | ||
| hash1, hash2, | ||
| "Different inputs should produce different hashes" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_get_device_id_is_deterministic() { | ||
| // Get device ID twice - should be the same | ||
| let device_id1 = get_device_id(); | ||
| let device_id2 = get_device_id(); | ||
|
|
||
| #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] | ||
| { | ||
| assert!(device_id1.is_some(), "Device ID should be available"); | ||
| assert!(device_id2.is_some(), "Device ID should be available"); | ||
| assert_eq!( | ||
| device_id1.unwrap(), | ||
| device_id2.unwrap(), | ||
| "Device ID should be deterministic" | ||
|
Comment on lines
+557
to
+573
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test calls Additionally, the assertion on line 562-565 compares two leaked pointers. After the fix, both calls will return the same cached static reference, so the test will still pass correctly. Prompt To Fix With AIThis is a comment left during a code review.
Path: helix-cli/src/metrics_sender.rs
Line: 553:569
Comment:
This test calls `get_device_id()` twice (lines 555-556), which currently causes two memory leaks due to the bug in `get_device_id()`. Once that function is fixed with a `LazyLock` pattern, the test will work correctly without leaking memory.
Additionally, the assertion on line 562-565 compares two leaked pointers. After the fix, both calls will return the same cached static reference, so the test will still pass correctly.
How can I resolve this? If you propose a fix, please make it concise. |
||
| ); | ||
| println!("Device ID: {}", device_id1.unwrap()); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_device_id_format() { | ||
| let device_id = get_device_id(); | ||
|
|
||
| #[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))] | ||
| { | ||
| let id = device_id.expect("Device ID should be available"); | ||
| assert_eq!(id.len(), 32, "Device ID should be 32 characters"); | ||
| assert!( | ||
| id.chars().all(|c| c.is_ascii_hexdigit()), | ||
| "Device ID should only contain hex digits" | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_hash_includes_salt() { | ||
| // The hash function includes a salt "helix-device-id:" | ||
| // This ensures different apps using machine ID get different hashes | ||
| let machine_id = "same-machine-id"; | ||
|
|
||
| // Direct SHA256 of machine_id without salt would be different | ||
| let mut hasher = Sha256::new(); | ||
| hasher.update(machine_id.as_bytes()); | ||
| let direct_hash = hex::encode(&hasher.finalize()[..16]); | ||
|
|
||
| let salted_hash = hash_to_device_id(machine_id); | ||
|
|
||
| assert_ne!( | ||
| direct_hash, salted_hash, | ||
| "Salted hash should differ from unsalted" | ||
| ); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Windows registry parsing uses
.split_whitespace().last()which assumes a specific output format from thereg querycommand. While this should work for most cases, there's a potential issue if the registry value contains spaces or if the output format changes.Consider a more robust parsing approach:
The typical output is:
A safer approach would be to:
However, since this is used across the Windows ecosystem and the current parsing should work for the standard format, this is a minor concern. The
.ok()wrapper ensures that parsing failures returnNonerather than panicking, which is good error handling.Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!
Prompt To Fix With AI