Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 3 additions & 15 deletions helix-db/src/helix_engine/tests/traversal_tests/upsert_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ fn test_upsert_v_creates_new_vector_when_none_exists() {
}

#[test]
fn test_upsert_v_creates_vector_with_default_data_when_none_provided() {
fn test_upsert_v_rejects_empty_vector_data() {
let (_temp_dir, storage) = setup_test_db();
let arena = Bump::new();
let mut txn = storage.graph_env.write_txn().unwrap();
Expand All @@ -542,21 +542,9 @@ fn test_upsert_v_creates_vector_with_default_data_when_none_provided() {
&arena,
)
.upsert_v(&[], "placeholder", &[("status", Value::from("pending"))])
.collect::<Result<Vec<_>, _>>()
.unwrap();
.collect::<Result<Vec<_>, _>>();

assert_eq!(result.len(), 1);
if let TraversalValue::Vector(vector) = &result[0] {
assert_eq!(vector.label, "placeholder");
assert!(vector.data.is_empty()); // Default empty data
assert_eq!(
vector.get_property("status").unwrap(),
&Value::from("pending")
);
} else {
panic!("Expected vector");
}
txn.commit().unwrap();
assert!(result.is_err());
}

#[test]
Expand Down
87 changes: 84 additions & 3 deletions helix-db/src/helix_engine/tests/vector_tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::helix_engine::vector_core::vector_distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL};
use crate::helix_engine::vector_core::vector_distance::{
cosine_similarity, MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL,
};

use crate::helix_engine::vector_core::vector::HVector;
use bumpalo::Bump;
Expand Down Expand Up @@ -36,8 +38,8 @@ fn test_hvector_distance_min() {
#[test]
fn test_hvector_distance_max() {
let arena = Bump::new();
let v1 = alloc_vector(&arena, &[0.0, 0.0]);
let v2 = alloc_vector(&arena, &[3.0, 4.0]);
let v1 = alloc_vector(&arena, &[-1.0, -2.0, -3.0]);
let v2 = alloc_vector(&arena, &[1.0, 2.0, 3.0]);
let distance = v1.distance_to(&v2).unwrap();
assert_eq!(distance, MAX_DISTANCE);
}
Expand Down Expand Up @@ -99,3 +101,82 @@ fn test_hvector_cosine_similarity() {
let similarity = v1.distance_to(&v2).unwrap();
assert!((similarity - (1.0 - 0.9746318461970762)).abs() < 1e-9);
}

#[test]
fn test_cosine_similarity_zero_vector_returns_error() {
let result = cosine_similarity(&[0.0, 0.0, 0.0], &[1.0, 2.0, 3.0]);
assert!(result.is_err());
}

#[test]
fn test_cosine_similarity_both_zero_vectors_returns_error() {
let result = cosine_similarity(&[0.0, 0.0], &[0.0, 0.0]);
assert!(result.is_err());
}

#[test]
fn test_cosine_similarity_empty_vectors_returns_error() {
let result = cosine_similarity(&[], &[]);
assert!(result.is_err());
}

#[test]
fn test_cosine_similarity_one_empty_vector_returns_error() {
let result = cosine_similarity(&[], &[1.0, 2.0]);
assert!(result.is_err());
}

#[test]
fn test_cosine_similarity_dimension_mismatch_returns_error() {
let result = cosine_similarity(&[1.0, 2.0], &[1.0, 2.0, 3.0]);
assert!(result.is_err());
}

#[test]
fn test_cosine_similarity_identical_vectors() {
let result = cosine_similarity(&[1.0, 2.0, 3.0], &[1.0, 2.0, 3.0]);
assert!((result.unwrap() - 1.0).abs() < 1e-10);
}

#[test]
fn test_cosine_similarity_opposite_vectors() {
let result = cosine_similarity(&[1.0, 2.0, 3.0], &[-1.0, -2.0, -3.0]);
assert!((result.unwrap() - (-1.0)).abs() < 1e-10);
}

#[test]
fn test_cosine_similarity_orthogonal_vectors() {
let result = cosine_similarity(&[1.0, 0.0], &[0.0, 1.0]);
assert!(result.unwrap().abs() < 1e-10);
}

#[test]
fn test_cosine_similarity_single_element() {
let result = cosine_similarity(&[5.0], &[3.0]);
assert!((result.unwrap() - 1.0).abs() < 1e-10);
}

#[test]
fn test_cosine_similarity_large_dimensions() {
let a: Vec<f64> = (0..1024).map(|i| (i as f64).sin()).collect();
let b: Vec<f64> = (0..1024).map(|i| (i as f64).cos()).collect();
let result = cosine_similarity(&a, &b);
assert!(result.is_ok());
let sim = result.unwrap();
assert!(sim >= -1.0 && sim <= 1.0);
}

#[test]
fn test_hvector_distance_zero_vector_returns_error() {
let arena = Bump::new();
let v1 = alloc_vector(&arena, &[0.0, 0.0]);
let v2 = alloc_vector(&arena, &[3.0, 4.0]);
assert!(v1.distance_to(&v2).is_err());
}

#[test]
fn test_cosine_similarity_near_zero_magnitude_returns_error() {
let tiny = f64::EPSILON * 0.1;
let result = cosine_similarity(&[tiny, 0.0], &[1.0, 2.0]);
assert!(result.is_err());
}
30 changes: 15 additions & 15 deletions helix-db/src/helix_engine/vector_core/vector_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,18 +288,10 @@ impl VectorCore {

neighbor.set_distance(neighbor.distance_to(query)?);

/*
let passes_filters = match filter {
Some(filter_slice) => filter_slice.iter().all(|f| f(&neighbor, txn)),
None => true,
};

if passes_filters {
result.push(neighbor);
}
*/

if filter.is_none() || filter.unwrap().iter().all(|f| f(&neighbor, txn)) {
if filter
.as_ref()
.map_or(true, |f| f.iter().all(|f| f(&neighbor, txn)))
{
result.push(neighbor);
}
}
Expand Down Expand Up @@ -458,7 +450,7 @@ impl VectorCore {
let (key, _) = result?;

// Extract id from the key: v: (2 bytes) + id (16 bytes) + level (8 bytes)
if key.len() < VECTOR_PREFIX.len() + 16 {
if key.len() < VECTOR_PREFIX.len() + 16 + 8 {
continue; // Skip malformed keys
}

Expand Down Expand Up @@ -505,6 +497,10 @@ impl HNSW for VectorCore {
'db: 'arena,
'arena: 'txn,
{
if query.is_empty() {
return Err(VectorError::InvalidVectorData);
}

let query = HVector::from_slice(label, 0, query);
// let temp_arena = bumpalo::Bump::new();

Expand Down Expand Up @@ -572,6 +568,10 @@ impl HNSW for VectorCore {
'db: 'arena,
'arena: 'txn,
{
if data.is_empty() {
return Err(VectorError::InvalidVectorData);
}

let new_level = self.get_new_level();

let mut query = HVector::from_slice(label, 0, data);
Expand All @@ -597,7 +597,7 @@ impl HNSW for VectorCore {
let mut nearest =
self.search_level::<F>(txn, label, &query, &mut curr_ep, 1, level, None, arena)?;
curr_ep = nearest.pop().ok_or(VectorError::VectorCoreError(
"emtpy search result".to_string(),
"empty search result".to_string(),
))?;
}

Expand All @@ -613,7 +613,7 @@ impl HNSW for VectorCore {
arena,
)?;
curr_ep = *nearest.peek().ok_or(VectorError::VectorCoreError(
"emtpy search result".to_string(),
"empty search result".to_string(),
))?;

let neighbors =
Expand Down
18 changes: 13 additions & 5 deletions helix-db/src/helix_engine/vector_core/vector_distance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ pub fn cosine_similarity(from: &[f64], to: &[f64]) -> Result<f64, VectorError> {
let len = from.len();
let other_len = to.len();

if len == 0 || other_len == 0 {
return Err(VectorError::InvalidVectorData);
}

if len != other_len {
println!("mis-match in vector dimensions!\n{len} != {other_len}");
return Err(VectorError::InvalidVectorLength);
}
//debug_assert_eq!(len, other.data.len(), "Vectors must have the same length");

#[cfg(target_feature = "avx2")]
{
Expand Down Expand Up @@ -78,11 +80,17 @@ pub fn cosine_similarity(from: &[f64], to: &[f64]) -> Result<f64, VectorError> {
magnitude_b += b_val * b_val;
}

if magnitude_a.abs() == 0.0 || magnitude_b.abs() == 0.0 {
return Ok(-1.0);
if magnitude_a < f64::EPSILON || magnitude_b < f64::EPSILON {
return Err(VectorError::InvalidVectorData);
}

let similarity = dot_product / (magnitude_a.sqrt() * magnitude_b.sqrt());

if similarity.is_nan() || similarity.is_infinite() {
return Err(VectorError::InvalidVectorData);
}

Ok(dot_product / (magnitude_a.sqrt() * magnitude_b.sqrt()))
Ok(similarity)
}

// SIMD implementation using AVX2 (256-bit vectors)
Expand Down
Loading