Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions helix-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ indicatif = "0.18.3"
webbrowser = "1.0"
heed3 = "0.22.0"
open = "5.3"
sha2 = "0.10"
hex = "0.4"
ratatui = "0.29"
crossterm = "0.28"

Expand Down
5 changes: 5 additions & 0 deletions helix-cli/src/commands/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::{
MetricsAction,
metrics_sender::{MetricsLevel, load_metrics_config, save_metrics_config},
output,
utils::print_field,
};
use color_eyre::owo_colors::OwoColorize;
use eyre::Result;
Expand Down Expand Up @@ -84,6 +85,10 @@ async fn show_metrics_status() -> Result<()> {
println!(" {}: {user_id}", "User ID".bright_white().bold());
}

if let Some(device_id) = &config.device_id {
print_field("Device ID", device_id);
}

let last_updated = std::time::UNIX_EPOCH + std::time::Duration::from_secs(config.last_updated);
if let Ok(datetime) = last_updated.duration_since(std::time::UNIX_EPOCH) {
println!(
Expand Down
5 changes: 5 additions & 0 deletions helix-cli/src/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ impl<'a> DockerManager<'a> {
env_vars.push(format!("GEMINI_API_KEY={gemini_key}"));
}

// Pass device ID to container for metrics correlation
if let Some(device_id) = crate::metrics_sender::get_device_id() {
env_vars.push(format!("HELIX_DEVICE_ID={device_id}"));
}

env_vars
}

Expand Down
199 changes: 199 additions & 0 deletions helix-cli/src/metrics_sender.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use helix_metrics::events::{
};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::{
fs::{self, File, OpenOptions},
io::{BufWriter, Write},
Expand All @@ -32,6 +33,7 @@ pub struct MetricsConfig {
pub user_id: Option<&'static str>,
pub email: Option<&'static str>,
pub name: Option<&'static str>,
pub device_id: Option<&'static str>,
pub last_updated: u64,
pub install_event_sent: bool,
}
Expand All @@ -43,6 +45,7 @@ impl Default for MetricsConfig {
user_id: None,
email: None,
name: None,
device_id: get_device_id(),
last_updated: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
Expand All @@ -59,6 +62,7 @@ impl MetricsConfig {
user_id,
email: None,
name: None,
device_id: get_device_id(),
last_updated: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
Expand Down Expand Up @@ -246,6 +250,7 @@ impl MetricsSender {
event_data: EventData::CliInstall,
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand Down Expand Up @@ -278,6 +283,7 @@ impl MetricsSender {
}),
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand Down Expand Up @@ -305,6 +311,7 @@ impl MetricsSender {
}),
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand Down Expand Up @@ -332,6 +339,7 @@ impl MetricsSender {
}),
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand Down Expand Up @@ -359,6 +367,7 @@ impl MetricsSender {
}),
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand Down Expand Up @@ -387,6 +396,7 @@ impl MetricsSender {
}),
user_id: get_user_id(),
email: get_email(),
device_id: get_device_id(),
timestamp: get_current_timestamp(),
};
self.send_event(event);
Expand All @@ -411,3 +421,192 @@ fn get_current_timestamp() -> u64 {
.unwrap()
.as_secs()
}

/// Get a deterministic device ID derived from the machine's unique identifier.
/// This ID is stable across CLI reinstalls and file deletions.
pub fn get_device_id() -> Option<&'static str> {
use std::sync::LazyLock;
static DEVICE_ID: LazyLock<Option<&'static str>> = LazyLock::new(|| {
get_machine_id()
.map(|id| hash_to_device_id(&id))
.map(|s| -> &'static str { s.leak() })
});
*DEVICE_ID
}

/// Hash the machine ID to create a privacy-preserving device identifier.
fn hash_to_device_id(machine_id: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(b"helix-device-id:");
hasher.update(machine_id.as_bytes());
let result = hasher.finalize();
// Use first 16 bytes (32 hex chars) for a shorter but still unique ID
hex::encode(&result[..16])
}

/// Get the machine's unique identifier (platform-specific).
#[cfg(target_os = "macos")]
fn get_machine_id() -> Option<String> {
// macOS: Use IOPlatformUUID from IOKit
use std::process::Command;
Command::new("ioreg")
.args(["-rd1", "-c", "IOPlatformExpertDevice"])
.output()
.ok()
.and_then(|output| {
let stdout = String::from_utf8_lossy(&output.stdout);
stdout
.lines()
.find(|line| line.contains("IOPlatformUUID"))
.and_then(|line| line.split('"').nth(3).map(|s| s.to_string()))
})
}

#[cfg(target_os = "linux")]
fn get_machine_id() -> Option<String> {
// Linux: Read from /etc/machine-id or /var/lib/dbus/machine-id
fs::read_to_string("/etc/machine-id")
.or_else(|_| fs::read_to_string("/var/lib/dbus/machine-id"))
.ok()
.map(|s| s.trim().to_string())
}

#[cfg(target_os = "windows")]
fn get_machine_id() -> Option<String> {
// Windows: Read MachineGuid from registry
use std::process::Command;
Command::new("reg")
.args([
"query",
r"HKLM\SOFTWARE\Microsoft\Cryptography",
"/v",
"MachineGuid",
])
.output()
.ok()
.and_then(|output| {
let stdout = String::from_utf8_lossy(&output.stdout);
stdout
.lines()
.find(|line| line.contains("MachineGuid"))
.and_then(|line| line.split_whitespace().last())
.map(|s| s.to_string())
})
}

#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
fn get_machine_id() -> Option<String> {
Comment on lines +478 to +498
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Windows registry parsing uses .split_whitespace().last() which assumes a specific output format from the reg query command. While this should work for most cases, there's a potential issue if the registry value contains spaces or if the output format changes.

Consider a more robust parsing approach:

The typical output is:

    MachineGuid    REG_SZ    {GUID}

A safer approach would be to:

  1. Find the line containing "MachineGuid"
  2. Split by "REG_SZ" (or other type indicators)
  3. Take the part after the type and trim it

However, since this is used across the Windows ecosystem and the current parsing should work for the standard format, this is a minor concern. The .ok() wrapper ensures that parsing failures return None rather than panicking, which is good error handling.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Prompt To Fix With AI
This is a comment left during a code review.
Path: helix-cli/src/metrics_sender.rs
Line: 474:494

Comment:
The Windows registry parsing uses `.split_whitespace().last()` which assumes a specific output format from the `reg query` command. While this should work for most cases, there's a potential issue if the registry value contains spaces or if the output format changes.

**Consider a more robust parsing approach:**

The typical output is:
```
    MachineGuid    REG_SZ    {GUID}
```

A safer approach would be to:
1. Find the line containing "MachineGuid"
2. Split by "REG_SZ" (or other type indicators)
3. Take the part after the type and trim it

However, since this is used across the Windows ecosystem and the current parsing should work for the standard format, this is a minor concern. The `.ok()` wrapper ensures that parsing failures return `None` rather than panicking, which is good error handling.

<sub>Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!</sub>

How can I resolve this? If you propose a fix, please make it concise.

None
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_get_machine_id_returns_value() {
// Machine ID should be available on macOS, Linux, and Windows
let machine_id = get_machine_id();

#[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))]
{
assert!(
machine_id.is_some(),
"Machine ID should be available on this platform"
);
let id = machine_id.unwrap();
assert!(!id.is_empty(), "Machine ID should not be empty");
println!("Machine ID: {}", id);
}

#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
{
// On unsupported platforms, it's expected to be None
assert!(machine_id.is_none());
}
}

#[test]
fn test_hash_to_device_id_produces_consistent_hash() {
let machine_id = "test-machine-id-12345";
let hash1 = hash_to_device_id(machine_id);
let hash2 = hash_to_device_id(machine_id);

assert_eq!(hash1, hash2, "Same input should produce same hash");
assert_eq!(
hash1.len(),
32,
"Hash should be 32 hex characters (16 bytes)"
);

// Verify it's valid hex
assert!(
hash1.chars().all(|c| c.is_ascii_hexdigit()),
"Hash should only contain hex digits"
);
}

#[test]
fn test_hash_to_device_id_different_inputs_produce_different_hashes() {
let hash1 = hash_to_device_id("machine-id-1");
let hash2 = hash_to_device_id("machine-id-2");

assert_ne!(
hash1, hash2,
"Different inputs should produce different hashes"
);
}

#[test]
fn test_get_device_id_is_deterministic() {
// Get device ID twice - should be the same
let device_id1 = get_device_id();
let device_id2 = get_device_id();

#[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))]
{
assert!(device_id1.is_some(), "Device ID should be available");
assert!(device_id2.is_some(), "Device ID should be available");
assert_eq!(
device_id1.unwrap(),
device_id2.unwrap(),
"Device ID should be deterministic"
Comment on lines +557 to +573
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test calls get_device_id() twice (lines 555-556), which currently causes two memory leaks due to the bug in get_device_id(). Once that function is fixed with a LazyLock pattern, the test will work correctly without leaking memory.

Additionally, the assertion on line 562-565 compares two leaked pointers. After the fix, both calls will return the same cached static reference, so the test will still pass correctly.

Prompt To Fix With AI
This is a comment left during a code review.
Path: helix-cli/src/metrics_sender.rs
Line: 553:569

Comment:
This test calls `get_device_id()` twice (lines 555-556), which currently causes two memory leaks due to the bug in `get_device_id()`. Once that function is fixed with a `LazyLock` pattern, the test will work correctly without leaking memory.

Additionally, the assertion on line 562-565 compares two leaked pointers. After the fix, both calls will return the same cached static reference, so the test will still pass correctly.

How can I resolve this? If you propose a fix, please make it concise.

);
println!("Device ID: {}", device_id1.unwrap());
}
}

#[test]
fn test_device_id_format() {
let device_id = get_device_id();

#[cfg(any(target_os = "macos", target_os = "linux", target_os = "windows"))]
{
let id = device_id.expect("Device ID should be available");
assert_eq!(id.len(), 32, "Device ID should be 32 characters");
assert!(
id.chars().all(|c| c.is_ascii_hexdigit()),
"Device ID should only contain hex digits"
);
}
}

#[test]
fn test_hash_includes_salt() {
// The hash function includes a salt "helix-device-id:"
// This ensures different apps using machine ID get different hashes
let machine_id = "same-machine-id";

// Direct SHA256 of machine_id without salt would be different
let mut hasher = Sha256::new();
hasher.update(machine_id.as_bytes());
let direct_hash = hex::encode(&hasher.finalize()[..16]);

let salted_hash = hash_to_device_id(machine_id);

assert_ne!(
direct_hash, salted_hash,
"Salted hash should differ from unsalted"
);
}
}
1 change: 1 addition & 0 deletions metrics/src/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub struct RawEvent<D: Serialize + std::fmt::Debug + Clone> {
pub event_data: D,
pub user_id: Option<&'static str>,
pub email: Option<&'static str>,
pub device_id: Option<&'static str>,
pub timestamp: u64,
}

Expand Down
8 changes: 8 additions & 0 deletions metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ pub static METRICS_ENABLED: LazyLock<bool> = LazyLock::new(|| {
true
});

/// Device ID passed from CLI when deploying container (for metrics correlation)
pub static HELIX_DEVICE_ID: LazyLock<Option<&'static str>> = LazyLock::new(|| {
std::env::var("HELIX_DEVICE_ID")
.ok()
.map(|s| -> &'static str { s.leak() })
});

pub const METRICS_URL: &str = "https://logs.helix-db.com/v2";

// Thread-local buffer for events
Expand Down Expand Up @@ -213,6 +220,7 @@ fn create_raw_event(
.expect("Failed to get system time")
.as_secs(),
email: None,
device_id: *HELIX_DEVICE_ID,
}
}

Expand Down
Loading