Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions helix-db/src/helix_engine/tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@
- `test_openai_embedding_success` - Tests OpenAI embedding success
- `test_gemini_embedding_success` - Tests Gemini embedding success
- `test_gemini_embedding_with_task_type` - Tests Gemini embedding with task types
- `test_minimax_embedding_success` - Tests MiniMax embedding success
- `test_minimax_embedding_with_query_type` - Tests MiniMax embedding with query type
- `test_local_embedding_success` - Tests local embedding success
- `test_local_embedding_invalid_url` - Tests invalid URL handling for local embeddings

Expand Down
97 changes: 97 additions & 0 deletions helix-db/src/helix_gateway/embedding_providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pub enum EmbeddingProvider {
resource_name: String,
deployment_id: String,
},
MiniMax {
embedding_type: String,
},
Local,
}

Expand Down Expand Up @@ -85,6 +88,13 @@ impl EmbeddingModelImpl {
.ok_or_else(|| GraphError::from("AZURE_OPENAI_API_KEY not set"))?;
Some(key)
}
EmbeddingProvider::MiniMax { .. } => {
let key = api_key
.map(String::from)
.or_else(|| env::var("MINIMAX_API_KEY").ok())
.ok_or_else(|| GraphError::from("MINIMAX_API_KEY not set"))?;
Some(key)
}
EmbeddingProvider::Local => None,
};

Expand Down Expand Up @@ -158,6 +168,21 @@ impl EmbeddingModelImpl {
model_name.to_string(),
))
}
Some(m) if m.starts_with("minimax:") => {
let parts: Vec<&str> = m.splitn(2, ':').collect();
let model_and_type = parts.get(1).unwrap_or(&"embo-01");
let (model_name, embedding_type) = if model_and_type.contains(':') {
let type_parts: Vec<&str> = model_and_type.splitn(2, ':').collect();
(
type_parts[0].to_string(),
type_parts.get(1).unwrap_or(&"db").to_string(),
)
} else {
(model_and_type.to_string(), "db".to_string())
};

Ok((EmbeddingProvider::MiniMax { embedding_type }, model_name))
}
Comment on lines +171 to +185
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Dead unwrap_or fallback — default "embo-01" is never applied

Because the match guard already requires m.starts_with("minimax:"), calling m.splitn(2, ':') will always produce at least two parts: ["minimax", "<rest>"]. This means parts.get(1) always returns Some(...) — even Some("") when the input is "minimax:". The .unwrap_or(&"embo-01") fallback is therefore dead code and the intended default is never applied.

Concretely, "minimax:" results in model_name = "" (not "embo-01"), which will be sent verbatim to the MiniMax API and cause a server-side error rather than a clean default.

The same dead-fallback pattern exists in the Gemini arm (line 125), but this PR introduces it again for MiniMax.

A clean fix is to use strip_prefix and treat an empty suffix as the default:

Suggested change
Some(m) if m.starts_with("minimax:") => {
let parts: Vec<&str> = m.splitn(2, ':').collect();
let model_and_type = parts.get(1).unwrap_or(&"embo-01");
let (model_name, embedding_type) = if model_and_type.contains(':') {
let type_parts: Vec<&str> = model_and_type.splitn(2, ':').collect();
(
type_parts[0].to_string(),
type_parts.get(1).unwrap_or(&"db").to_string(),
)
} else {
(model_and_type.to_string(), "db".to_string())
};
Ok((EmbeddingProvider::MiniMax { embedding_type }, model_name))
}
Some(m) if m.starts_with("minimax:") => {
let suffix = m.strip_prefix("minimax:").unwrap_or("embo-01");
let suffix = if suffix.is_empty() { "embo-01" } else { suffix };
let (model_name, embedding_type) = if suffix.contains(':') {
let type_parts: Vec<&str> = suffix.splitn(2, ':').collect();
(
type_parts[0].to_string(),
type_parts.get(1).unwrap_or(&"db").to_string(),
)
} else {
(suffix.to_string(), "db".to_string())
};
Ok((EmbeddingProvider::MiniMax { embedding_type }, model_name))
}

Some("local") => Ok((EmbeddingProvider::Local, "local".to_string())),

Some(_) => Ok((
Expand Down Expand Up @@ -365,6 +390,77 @@ impl EmbeddingModel for EmbeddingModelImpl {
Ok(embedding)
}

EmbeddingProvider::MiniMax { embedding_type } => {
let api_key = self.api_key.as_ref().ok_or_else(|| {
GraphError::EmbeddingError("MiniMax API key not set".to_string())
})?;

let response = self
.client
.post("https://api.minimax.io/v1/embeddings")
.header("Authorization", format!("Bearer {api_key}"))
.header("Content-Type", "application/json")
.json(&json!({
"model": &self.model,
"texts": [text],
"type": embedding_type
}))
.send()
.await
.map_err(|e| {
GraphError::EmbeddingError(format!(
"Failed to send request to MiniMax: {e}"
))
})?;

let status = response.status();
let text_response = response.text().await.map_err(|e| {
GraphError::EmbeddingError(format!("Failed to read MiniMax response: {e}"))
})?;

if !status.is_success() {
return Err(parse_api_error("MiniMax", status.as_u16(), &text_response));
}

let response =
sonic_rs::from_str::<sonic_rs::Value>(&text_response).map_err(|e| {
GraphError::EmbeddingError(format!("Failed to parse MiniMax response: {e}"))
})?;

// MiniMax returns HTTP 200 even for errors (e.g. rate limits);
// check base_resp.status_code for the real status.
if let Some(code) = response["base_resp"]["status_code"].as_i64() {
if code != 0 {
let msg = response["base_resp"]["status_msg"]
.as_str()
.unwrap_or("unknown error");
return Err(GraphError::EmbeddingError(format!(
"MiniMax embedding API error ({}): {}",
code, msg
)));
}
}

let embedding = response["vectors"][0]
.as_array()
.ok_or_else(|| {
GraphError::EmbeddingError(
"Invalid embedding format in MiniMax response".to_string(),
)
})?
.iter()
.map(|v| {
v.as_f64().ok_or_else(|| {
GraphError::EmbeddingError(
"Invalid float value in embedding".to_string(),
)
})
})
.collect::<Result<Vec<f64>, GraphError>>()?;

Ok(embedding)
}

EmbeddingProvider::Local => {
let url = self.url.as_ref().ok_or_else(|| {
GraphError::EmbeddingError("Local embedding URL not set".to_string())
Expand Down Expand Up @@ -451,6 +547,7 @@ pub fn get_embedding_model(
/// let query = embed!("Hello, world!");
/// let embedding = embed!("Hello, world!", "text-embedding-ada-002");
/// let embedding = embed!("Hello, world!", "gemini:gemini-embedding-001:SEMANTIC_SIMILARITY");
/// let embedding = embed!("Hello, world!", "minimax:embo-01");
/// let embedding = embed!("Hello, world!", "text-embedding-ada-002", "http://localhost:8699/embed");
/// ```
macro_rules! embed {
Expand Down
86 changes: 86 additions & 0 deletions helix-db/src/helix_gateway/tests/embedding_providers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,32 @@ fn test_local_embedding_success() {
println!("embedding: {:?}", embedding);
}

#[tokio::test]
#[ignore] // Requires API key and network
async fn test_minimax_embedding_success() {
let model = get_embedding_model(None, Some("minimax:embo-01"), None).unwrap();
let result = model.fetch_embedding_async("test text").await;
assert!(
result.is_ok(),
"MiniMax embedding failed: {:?}",
result.err()
);
let embedding = result.unwrap();
assert_eq!(embedding.len(), 1536); // embo-01 produces 1536-dimensional embeddings
println!("embedding: {embedding:?}");
}

#[tokio::test]
#[ignore] // Requires API key and network
async fn test_minimax_embedding_with_query_type() {
let model = get_embedding_model(None, Some("minimax:embo-01:query"), None).unwrap();
let result = model.fetch_embedding_async("search query text").await;
assert!(result.is_ok());
let embedding = result.unwrap();
assert_eq!(embedding.len(), 1536);
println!("embedding: {embedding:?}");
}

#[test]
fn test_local_embedding_invalid_url() {
let model = get_embedding_model(None, Some("local"), Some("invalid_url"));
Expand Down Expand Up @@ -163,6 +189,66 @@ fn test_parse_gemini_provider_empty_model() {
assert_eq!(model, ""); // Returns empty string when no model specified after colon
}

#[test]
fn test_parse_minimax_provider_with_model() {
let result = EmbeddingModelImpl::parse_provider_and_model(Some("minimax:embo-01"));
assert!(result.is_ok());
let (provider, model) = result.unwrap();
match provider {
EmbeddingProvider::MiniMax { embedding_type } => {
assert_eq!(embedding_type, "db");
}
_ => panic!("Expected MiniMax provider"),
}
assert_eq!(model, "embo-01");
}

#[test]
fn test_parse_minimax_provider_with_type() {
let result = EmbeddingModelImpl::parse_provider_and_model(Some("minimax:embo-01:query"));
assert!(result.is_ok());
let (provider, model) = result.unwrap();
match provider {
EmbeddingProvider::MiniMax { embedding_type } => {
assert_eq!(embedding_type, "query");
}
_ => panic!("Expected MiniMax provider"),
}
assert_eq!(model, "embo-01");
}

#[test]
fn test_parse_minimax_provider_empty_model() {
let result = EmbeddingModelImpl::parse_provider_and_model(Some("minimax:"));
assert!(result.is_ok());
let (provider, model) = result.unwrap();
match provider {
EmbeddingProvider::MiniMax { embedding_type } => {
assert_eq!(embedding_type, "db");
}
_ => panic!("Expected MiniMax provider"),
}
assert_eq!(model, "");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 test_parse_minimax_provider_empty_model asserts empty model rather than the documented default

This test asserts model == "" for input "minimax:". Combined with the dead unwrap_or(&"embo-01") in the parser, this test documents and entrenches the broken behaviour: a user who types "minimax:" expecting the default model will silently get an empty model string sent to the API. If the dead-fallback bug in the parser is fixed, this test should be updated to assert model == "embo-01".

Suggested change
assert_eq!(model, "");
assert_eq!(model, "embo-01"); // defaults to embo-01 when no model specified

}

#[test]
fn test_new_minimax_without_api_key_fails() {
unsafe {
std::env::remove_var("MINIMAX_API_KEY");
}
let result = EmbeddingModelImpl::new(None, Some("minimax:embo-01"), None);
assert!(result.is_err());
}

#[test]
fn test_new_minimax_with_api_key() {
let result = EmbeddingModelImpl::new(Some("test-key"), Some("minimax:embo-01"), None);
assert!(result.is_ok());
let model = result.unwrap();
assert!(matches!(model.provider, EmbeddingProvider::MiniMax { .. }));
assert_eq!(model.model, "embo-01");
}

#[test]
fn test_parse_local_provider() {
let result = EmbeddingModelImpl::parse_provider_and_model(Some("local"));
Expand Down