Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 38 additions & 22 deletions laurus/tests/vector_field_routing_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,18 @@ async fn test_query_vector_fields_routes() {
.search(request(Some(vec!["img_vec".into()]), None))
.unwrap();
let ids = doc_ids(&results);
// Routing invariant (deterministic): the result contains only img_vec docs
// and never leaks txt_vec docs. Whether *every* img_vec doc comes back is
// an approximate-recall property of the randomized HNSW graph, so we assert
// the routing guarantee (non-empty + subset) rather than exact recall, which
// is environment-sensitively flaky (Issue #773).
assert!(
ids.contains(&1) && ids.contains(&2),
"img_vec docs present: {ids:?}"
!ids.is_empty(),
"routing must return at least one hit: {ids:?}"
);
assert!(
!ids.contains(&3) && !ids.contains(&4),
"txt_vec docs absent: {ids:?}"
ids.iter().all(|id| *id == 1 || *id == 2),
"only img_vec docs (1, 2) may be returned: {ids:?}"
);
}

Expand All @@ -156,13 +161,15 @@ async fn test_params_fields_exact() {
))
.unwrap();
let ids = doc_ids(&results);
// Routing invariant: only txt_vec docs may be returned (see #773 for why we
// assert the routing guarantee instead of exact recall).
assert!(
ids.contains(&3) && ids.contains(&4),
"txt_vec docs present: {ids:?}"
!ids.is_empty(),
"routing must return at least one hit: {ids:?}"
);
assert!(
!ids.contains(&1) && !ids.contains(&2),
"img_vec docs absent: {ids:?}"
ids.iter().all(|id| *id == 3 || *id == 4),
"only txt_vec docs (3, 4) may be returned: {ids:?}"
);
}

Expand All @@ -177,13 +184,14 @@ async fn test_params_fields_prefix() {
))
.unwrap();
let ids = doc_ids(&results);
// Routing invariant: only img_vec docs may be returned (see #773).
assert!(
ids.contains(&1) && ids.contains(&2),
"img_vec docs present: {ids:?}"
!ids.is_empty(),
"routing must return at least one hit: {ids:?}"
);
assert!(
!ids.contains(&3) && !ids.contains(&4),
"txt_vec docs absent: {ids:?}"
ids.iter().all(|id| *id == 1 || *id == 2),
"only img_vec docs (1, 2) may be returned: {ids:?}"
);
}

Expand All @@ -193,12 +201,18 @@ async fn test_no_fields_searches_all() {
// No field selector anywhere → all fields searched (regression guard).
let results = store.search(request(None, None)).unwrap();
let ids = doc_ids(&results);
for id in [1, 2, 3, 4] {
assert!(
ids.contains(&id),
"doc {id} should be present with no field filter: {ids:?}"
);
}
// Both fields must be represented, proving neither was skipped. We anchor on
// the docs identical to the query (doc 1 in img_vec, doc 3 in txt_vec), which
// are the nearest in their field and reliably returned; requiring all four
// docs would assert exact recall on a randomized HNSW graph (Issue #773).
assert!(
ids.iter().any(|id| *id == 1 || *id == 2),
"img_vec field must be represented: {ids:?}"
);
assert!(
ids.iter().any(|id| *id == 3 || *id == 4),
"txt_vec field must be represented: {ids:?}"
);
}

#[tokio::test(flavor = "multi_thread")]
Expand All @@ -212,12 +226,14 @@ async fn test_per_query_overrides_params() {
))
.unwrap();
let ids = doc_ids(&results);
// Per-query img_vec wins → only img_vec docs may be returned, never the
// request-level txt_vec docs (see #773 for the routing-vs-recall rationale).
assert!(
ids.contains(&1) && ids.contains(&2),
"per-query img_vec wins: {ids:?}"
!ids.is_empty(),
"routing must return at least one hit: {ids:?}"
);
assert!(
!ids.contains(&3) && !ids.contains(&4),
"txt_vec excluded: {ids:?}"
ids.iter().all(|id| *id == 1 || *id == 2),
"per-query img_vec wins, only docs (1, 2) may be returned: {ids:?}"
);
}