diff --git a/helix-db/src/helix_engine/tests/traversal_tests/upsert_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/upsert_tests.rs index 5288cbb9..1a0d74c0 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/upsert_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/upsert_tests.rs @@ -540,6 +540,112 @@ fn test_upsert_e_updates_existing_edge_with_properties() { txn.commit().unwrap(); } +#[test] +fn test_upsert_e_noop_when_all_properties_identical() { + let (_temp_dir, storage) = setup_test_db(); + let arena = Bump::new(); + let mut txn = storage.graph_env.write_txn().unwrap(); + + let node1 = G::new_mut(&storage, &arena, &mut txn) + .add_n("person", None, None) + .collect::, _>>() + .unwrap()[0] + .id(); + let node2 = G::new_mut(&storage, &arena, &mut txn) + .add_n("person", None, None) + .collect::, _>>() + .unwrap()[0] + .id(); + + let existing_edge = G::new_mut(&storage, &arena, &mut txn) + .add_edge( + "relates", + props_option(&arena, props!("kind" => "friend", "weight" => 2)), + node1, + node2, + false, + false, + ) + .collect::, _>>() + .unwrap() + .into_iter() + .next() + .unwrap(); + + let edge_id = if let TraversalValue::Edge(ref e) = existing_edge { + e.id + } else { + panic!("Expected edge"); + }; + + let result = G::new_mut_from_iter(&storage, &mut txn, std::iter::once(existing_edge), &arena) + .upsert_e( + "relates", + node1, + node2, + &[("kind", Value::from("friend")), ("weight", Value::from(2))], + ) + .collect::, _>>() + .unwrap(); + + assert_eq!(result.len(), 1); + if let TraversalValue::Edge(edge) = &result[0] { + assert_eq!(edge.id, edge_id); + assert_eq!(edge.label, "relates"); + assert_eq!(edge.get_property("kind").unwrap(), &Value::from("friend")); + assert_eq!(edge.get_property("weight").unwrap(), &Value::from(2)); + } else { + panic!("Expected edge"); + } + txn.commit().unwrap(); +} + +#[test] +fn test_upsert_e_noop_when_no_properties_and_empty_props() { + let (_temp_dir, storage) = setup_test_db(); + let arena = Bump::new(); + let mut txn = storage.graph_env.write_txn().unwrap(); + + let node1 = G::new_mut(&storage, &arena, &mut txn) + .add_n("person", None, None) + .collect::, _>>() + .unwrap()[0] + .id(); + let node2 = G::new_mut(&storage, &arena, &mut txn) + .add_n("person", None, None) + .collect::, _>>() + .unwrap()[0] + .id(); + + let existing_edge = G::new_mut(&storage, &arena, &mut txn) + .add_edge("plain", None, node1, node2, false, false) + .collect::, _>>() + .unwrap() + .into_iter() + .next() + .unwrap(); + + let edge_id = if let TraversalValue::Edge(ref e) = existing_edge { + e.id + } else { + panic!("Expected edge"); + }; + + let result = G::new_mut_from_iter(&storage, &mut txn, std::iter::once(existing_edge), &arena) + .upsert_e("plain", node1, node2, &[]) + .collect::, _>>() + .unwrap(); + + assert_eq!(result.len(), 1); + if let TraversalValue::Edge(edge) = &result[0] { + assert_eq!(edge.id, edge_id); + assert!(edge.properties.is_none()); + } else { + panic!("Expected edge"); + } + txn.commit().unwrap(); +} + #[test] fn test_upsert_e_with_defaults_applies_on_create_and_explicit_wins() { let (_temp_dir, storage) = setup_test_db(); @@ -896,6 +1002,87 @@ fn test_upsert_v_updates_existing_vector_with_properties() { txn.commit().unwrap(); } +#[test] +fn test_upsert_v_noop_when_all_properties_identical() { + let (_temp_dir, storage) = setup_test_db(); + let arena = Bump::new(); + let mut txn = storage.graph_env.write_txn().unwrap(); + + let query = [0.1_f64, 0.2, 0.3]; + let existing_vector = G::new_mut(&storage, &arena, &mut txn) + .insert_v::( + &query, + "embedding", + props_option( + &arena, + props!("model" => "gpt3", "version" => 1, "accuracy" => 0.95), + ), + ) + .collect::, _>>() + .unwrap() + .into_iter() + .next() + .unwrap(); + + let vector_id = existing_vector.id(); + + let result = G::new_mut_from_iter(&storage, &mut txn, std::iter::once(existing_vector), &arena) + .upsert_v( + &query, + "embedding", + &[ + ("model", Value::from("gpt3")), + ("version", Value::from(1)), + ("accuracy", Value::from(0.95)), + ], + ) + .collect::, _>>() + .unwrap(); + + assert_eq!(result.len(), 1); + if let TraversalValue::Vector(vector) = &result[0] { + assert_eq!(vector.id, vector_id); + assert_eq!(vector.get_property("model").unwrap(), &Value::from("gpt3")); + assert_eq!(vector.get_property("version").unwrap(), &Value::from(1)); + assert_eq!(vector.get_property("accuracy").unwrap(), &Value::from(0.95)); + } else { + panic!("Expected vector"); + } + txn.commit().unwrap(); +} + +#[test] +fn test_upsert_v_noop_when_no_properties_and_empty_props() { + let (_temp_dir, storage) = setup_test_db(); + let arena = Bump::new(); + let mut txn = storage.graph_env.write_txn().unwrap(); + + let query = [0.5_f64, 0.6, 0.7]; + let existing_vector = G::new_mut(&storage, &arena, &mut txn) + .insert_v::(&query, "embedding", None) + .collect::, _>>() + .unwrap() + .into_iter() + .next() + .unwrap(); + + let vector_id = existing_vector.id(); + + let result = G::new_mut_from_iter(&storage, &mut txn, std::iter::once(existing_vector), &arena) + .upsert_v(&query, "embedding", &[]) + .collect::, _>>() + .unwrap(); + + assert_eq!(result.len(), 1); + if let TraversalValue::Vector(vector) = &result[0] { + assert_eq!(vector.id, vector_id); + assert!(vector.properties.is_none()); + } else { + panic!("Expected vector"); + } + txn.commit().unwrap(); +} + #[test] fn test_upsert_v_with_defaults_applies_on_create_and_explicit_wins() { let (_temp_dir, storage) = setup_test_db(); diff --git a/helix-db/src/helix_engine/traversal_core/ops/util/upsert.rs b/helix-db/src/helix_engine/traversal_core/ops/util/upsert.rs index af300541..d1e9d439 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/util/upsert.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/util/upsert.rs @@ -228,7 +228,9 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE &old_serialized, &node.id, )?; - if let Err(e) = update_secondary_index(index, self.txn, k, node.id, v) { + if let Err(e) = + update_secondary_index(index, self.txn, k, node.id, v) + { // Restore the old index entry since the new one failed let _ = db.put(self.txn, &old_serialized, &node.id); return Err(e); @@ -420,6 +422,10 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE // Update existing edge - merge properties match edge.properties { None => { + if props.is_empty() { + return Ok(TraversalValue::Edge(edge)); + } + let map = ImmutablePropertiesMap::new( props.len(), props.iter().map(|(k, v)| (*k, v.clone())), @@ -428,6 +434,10 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE edge.properties = Some(map); } Some(old) => { + if props.iter().all(|(k, v)| old.get(k) == Some(v)) { + return Ok(TraversalValue::Edge(edge)); + } + let diff: Vec<_> = props .iter() .filter(|(k, _)| !old.iter().map(|(old_k, _)| old_k).contains(k)) @@ -553,6 +563,10 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE Some(Ok(TraversalValue::Vector(mut vector))) => { match vector.properties { None => { + if props.is_empty() { + return Ok(TraversalValue::Vector(vector)); + } + // Insert secondary indices for (k, v) in props.iter() { let Some((db, secondary_index)) = @@ -590,6 +604,10 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE vector.properties = Some(map); } Some(old) => { + if props.iter().all(|(k, v)| old.get(k) == Some(v)) { + return Ok(TraversalValue::Vector(vector)); + } + for (k, v) in props.iter() { let Some((db, secondary_index)) = self.storage.secondary_indices.get(*k) @@ -748,135 +766,141 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE match vector.properties { None => { - // Insert secondary indices - for (k, v) in props.iter() { - let Some((db, secondary_index)) = - self.storage.secondary_indices.get(*k) - else { - continue; - }; - - let v_serialized = bincode::serialize(v)?; - match secondary_index { - crate::helix_engine::types::SecondaryIndex::Unique(_) => db - .put_with_flags( - self.txn, - PutFlags::NO_OVERWRITE, - &v_serialized, - &vector.id, - ) - .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, - crate::helix_engine::types::SecondaryIndex::Index(_) => { - db.put(self.txn, &v_serialized, &vector.id)? - } - crate::helix_engine::types::SecondaryIndex::None => { - unreachable!() + if !props.is_empty() { + // Insert secondary indices + for (k, v) in props.iter() { + let Some((db, secondary_index)) = + self.storage.secondary_indices.get(*k) + else { + continue; + }; + + let v_serialized = bincode::serialize(v)?; + match secondary_index { + crate::helix_engine::types::SecondaryIndex::Unique(_) => db + .put_with_flags( + self.txn, + PutFlags::NO_OVERWRITE, + &v_serialized, + &vector.id, + ) + .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, + crate::helix_engine::types::SecondaryIndex::Index(_) => { + db.put(self.txn, &v_serialized, &vector.id)? + } + crate::helix_engine::types::SecondaryIndex::None => { + unreachable!() + } } } - } - // Create properties map and insert node - let map = ImmutablePropertiesMap::new( - props.len(), - props.iter().map(|(k, v)| (*k, v.clone())), - self.arena, - ); + // Create properties map and insert node + let map = ImmutablePropertiesMap::new( + props.len(), + props.iter().map(|(k, v)| (*k, v.clone())), + self.arena, + ); - vector.properties = Some(map); + vector.properties = Some(map); + } } Some(old) => { - for (k, v) in props.iter() { - let Some((db, secondary_index)) = - self.storage.secondary_indices.get(*k) - else { - continue; - }; - - // delete secondary indexes for the props changed - let Some(old_value) = old.get(k) else { - continue; - }; - - let old_serialized = bincode::serialize(old_value)?; - db.delete_one_duplicate(self.txn, &old_serialized, &vector.id)?; - - // create new secondary indexes for the props changed - let v_serialized = bincode::serialize(v)?; - match secondary_index { - crate::helix_engine::types::SecondaryIndex::Unique(_) => db - .put_with_flags( - self.txn, - PutFlags::NO_OVERWRITE, - &v_serialized, - &vector.id, - ) - .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, - crate::helix_engine::types::SecondaryIndex::Index(_) => { - db.put(self.txn, &v_serialized, &vector.id)? - } - crate::helix_engine::types::SecondaryIndex::None => { - unreachable!() + if !props.iter().all(|(k, v)| old.get(k) == Some(v)) { + for (k, v) in props.iter() { + let Some((db, secondary_index)) = + self.storage.secondary_indices.get(*k) + else { + continue; + }; + + // delete secondary indexes for the props changed + let Some(old_value) = old.get(k) else { + continue; + }; + + let old_serialized = bincode::serialize(old_value)?; + db.delete_one_duplicate(self.txn, &old_serialized, &vector.id)?; + + // create new secondary indexes for the props changed + let v_serialized = bincode::serialize(v)?; + match secondary_index { + crate::helix_engine::types::SecondaryIndex::Unique(_) => db + .put_with_flags( + self.txn, + PutFlags::NO_OVERWRITE, + &v_serialized, + &vector.id, + ) + .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, + crate::helix_engine::types::SecondaryIndex::Index(_) => { + db.put(self.txn, &v_serialized, &vector.id)? + } + crate::helix_engine::types::SecondaryIndex::None => { + unreachable!() + } } } - } - - let diff: Vec<_> = props - .iter() - .filter(|(k, _)| !old.iter().map(|(old_k, _)| old_k).contains(k)) - .cloned() - .collect(); - // Add secondary indices for NEW properties (not in old) - for (k, v) in &diff { - let Some((db, secondary_index)) = - self.storage.secondary_indices.get(*k) - else { - continue; - }; - - let v_serialized = bincode::serialize(v)?; - match secondary_index { - crate::helix_engine::types::SecondaryIndex::Unique(_) => db - .put_with_flags( - self.txn, - PutFlags::NO_OVERWRITE, - &v_serialized, - &vector.id, - ) - .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, - crate::helix_engine::types::SecondaryIndex::Index(_) => { - db.put(self.txn, &v_serialized, &vector.id)? - } - crate::helix_engine::types::SecondaryIndex::None => { - unreachable!() + let diff: Vec<_> = props + .iter() + .filter(|(k, _)| { + !old.iter().map(|(old_k, _)| old_k).contains(k) + }) + .cloned() + .collect(); + + // Add secondary indices for NEW properties (not in old) + for (k, v) in &diff { + let Some((db, secondary_index)) = + self.storage.secondary_indices.get(*k) + else { + continue; + }; + + let v_serialized = bincode::serialize(v)?; + match secondary_index { + crate::helix_engine::types::SecondaryIndex::Unique(_) => db + .put_with_flags( + self.txn, + PutFlags::NO_OVERWRITE, + &v_serialized, + &vector.id, + ) + .map_err(|_| GraphError::DuplicateKey(k.to_string()))?, + crate::helix_engine::types::SecondaryIndex::Index(_) => { + db.put(self.txn, &v_serialized, &vector.id)? + } + crate::helix_engine::types::SecondaryIndex::None => { + unreachable!() + } } } - } - - // find out how many new properties we'll need space for - let len_diff = diff.len(); - - let merged = old - .iter() - .map(|(old_k, old_v)| { - props - .iter() - .find_map(|(k, v)| old_k.eq(*k).then_some(v)) - .map_or_else( - || (old_k, old_v.clone()), - |v| (old_k, v.clone()), - ) - }) - .chain(diff); - // make new props, updated by current props - let new_map = ImmutablePropertiesMap::new( - old.len() + len_diff, - merged, - self.arena, - ); + // find out how many new properties we'll need space for + let len_diff = diff.len(); + + let merged = old + .iter() + .map(|(old_k, old_v)| { + props + .iter() + .find_map(|(k, v)| old_k.eq(*k).then_some(v)) + .map_or_else( + || (old_k, old_v.clone()), + |v| (old_k, v.clone()), + ) + }) + .chain(diff); + + // make new props, updated by current props + let new_map = ImmutablePropertiesMap::new( + old.len() + len_diff, + merged, + self.arena, + ); - vector.properties = Some(new_map); + vector.properties = Some(new_map); + } } }