From c4df02c7a4b2f7d4a6ca04552c5201a06af15058 Mon Sep 17 00:00:00 2001 From: MesTTo Date: Tue, 23 Jun 2026 18:56:04 +1000 Subject: [PATCH] Add relation arrangement sidecar --- kernel/src/arrangements.rs | 276 +++++++++++++++++++++++++++++++++++++ kernel/src/lib.rs | 1 + 2 files changed, 277 insertions(+) create mode 100644 kernel/src/arrangements.rs diff --git a/kernel/src/arrangements.rs b/kernel/src/arrangements.rs new file mode 100644 index 00000000..194d4fd3 --- /dev/null +++ b/kernel/src/arrangements.rs @@ -0,0 +1,276 @@ +use std::collections::BTreeMap; + +use crate::term_identity::{FactId, TermId, TermIdentitySidecar, TermKind}; + +/// Physical argument-order sidecar for relation-like facts. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ArrangementDescriptor { + /// Relation/functor term at child position 0. + pub relation: TermId, + /// Number of relation arguments, excluding the relation/functor child. + pub argument_count: u8, + /// Argument positions used as the key, zero-based after the relation child. + pub key_order: Box<[u8]>, +} + +impl ArrangementDescriptor { + /// Creates a descriptor after validating the key order. + pub fn new( + relation: TermId, + argument_count: u8, + key_order: impl Into>, + ) -> Result { + let key_order = key_order.into(); + validate_key_order(argument_count, &key_order)?; + + Ok(Self { + relation, + argument_count, + key_order, + }) + } +} + +/// One arranged fact row. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ArrangementRow { + /// Complete fact identity. + pub fact: FactId, + /// Root term for the fact. + pub root: TermId, +} + +/// Snapshot-local arrangement index. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ArrangementIndex { + descriptor: ArrangementDescriptor, + rows_by_key: BTreeMap, Vec>, + stats: ArrangementStats, +} + +/// Counters for arrangement construction and lookup. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct ArrangementStats { + /// Complete fact roots inspected during build. + pub facts_scanned: usize, + /// Facts whose root matched the relation and arity. + pub rows: usize, + /// Distinct arranged keys. + pub distinct_keys: usize, + /// Rows returned by prefix lookups. + pub prefix_rows_returned: usize, + /// Prefix lookup calls. + pub prefix_lookups: usize, +} + +/// Errors from arrangement construction. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ArrangementError { + /// The relation term is absent from the term sidecar. + UnknownRelation { relation: TermId }, + /// A key position is outside the declared argument count. + InvalidKeyPosition { position: u8, argument_count: u8 }, + /// Duplicate key positions would not define a useful arrangement order. + DuplicateKeyPosition { position: u8 }, + /// A fact referenced a missing term record. + UnknownTerm { term: TermId }, + /// Encoded arity would overflow when adding the relation/functor child. + ArityOverflow { argument_count: u8 }, +} + +impl ArrangementIndex { + /// Builds a derived arrangement from a term snapshot. + pub fn build( + sidecar: &TermIdentitySidecar, + descriptor: ArrangementDescriptor, + ) -> Result { + if sidecar.get_term(descriptor.relation).is_none() { + return Err(ArrangementError::UnknownRelation { + relation: descriptor.relation, + }); + } + + let encoded_arity = + descriptor + .argument_count + .checked_add(1) + .ok_or(ArrangementError::ArityOverflow { + argument_count: descriptor.argument_count, + })?; + let mut stats = ArrangementStats::default(); + let mut rows_by_key: BTreeMap, Vec> = BTreeMap::new(); + + for fact in sidecar.facts() { + stats.facts_scanned += 1; + let Some(root) = sidecar.get_term(fact.root) else { + return Err(ArrangementError::UnknownTerm { term: fact.root }); + }; + if root.kind + != (TermKind::Application { + arity: encoded_arity, + }) + { + continue; + } + let children = root.children(); + if children.first().copied() != Some(descriptor.relation) { + continue; + } + + let key = descriptor + .key_order + .iter() + .map(|&position| children[usize::from(position) + 1]) + .collect::>() + .into_boxed_slice(); + rows_by_key.entry(key).or_default().push(ArrangementRow { + fact: fact.id, + root: fact.root, + }); + stats.rows += 1; + } + + stats.distinct_keys = rows_by_key.len(); + Ok(Self { + descriptor, + rows_by_key, + stats, + }) + } + + /// Arrangement descriptor. + pub fn descriptor(&self) -> &ArrangementDescriptor { + &self.descriptor + } + + /// Returns counters accumulated by build and lookup calls. + pub fn stats(&self) -> ArrangementStats { + self.stats + } + + /// Returns all rows whose arranged key starts with `prefix`. + pub fn seek_prefix(&mut self, prefix: &[TermId]) -> Vec { + self.stats.prefix_lookups += 1; + + let mut rows = Vec::new(); + for (key, key_rows) in self.rows_by_key.range(prefix.to_vec().into_boxed_slice()..) { + if !key.starts_with(prefix) { + break; + } + rows.extend_from_slice(key_rows); + } + + self.stats.prefix_rows_returned += rows.len(); + rows + } + + /// Exact arranged-key lookup. + pub fn get_exact(&self, key: &[TermId]) -> &[ArrangementRow] { + self.rows_by_key.get(key).map(Vec::as_slice).unwrap_or(&[]) + } +} + +fn validate_key_order(argument_count: u8, key_order: &[u8]) -> Result<(), ArrangementError> { + let mut seen = 0u64; + for &position in key_order { + if position >= argument_count { + return Err(ArrangementError::InvalidKeyPosition { + position, + argument_count, + }); + } + let bit = 1u64 << position; + if seen & bit != 0 { + return Err(ArrangementError::DuplicateKeyPosition { position }); + } + seen |= bit; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::space::Space; + use crate::term_identity::TermIdentitySidecar; + use std::collections::BTreeSet; + + fn encoded_roots( + sidecar: &TermIdentitySidecar, + rows: impl IntoIterator, + ) -> BTreeSet> { + rows.into_iter() + .map(|row| sidecar.get_term(row.root).unwrap().encoded().to_vec()) + .collect() + } + + fn encoded_expr(space: &mut Space, expr: &'static str) -> Vec { + let expr = crate::expr!(space, expr); + unsafe { expr.span().as_ref().unwrap() }.to_vec() + } + + #[test] + fn suffix_bound_arrangement_matches_product_query_roots() { + let mut space = Space::new(); + space + .add_all_sexpr( + br#" +(edge Alice Bob) +(edge Carol Bob) +(edge Alice Dave) +(edge Eve Frank) +(note Bob Alice) +"#, + ) + .unwrap(); + + let mut sidecar = TermIdentitySidecar::new(); + sidecar.extend_from_pathmap(&space.btm).unwrap(); + let edge = sidecar + .term_id_for_encoded(&encoded_expr(&mut space, "edge")) + .unwrap(); + let bob = sidecar + .term_id_for_encoded(&encoded_expr(&mut space, "Bob")) + .unwrap(); + let alice = sidecar + .term_id_for_encoded(&encoded_expr(&mut space, "Alice")) + .unwrap(); + + let descriptor = ArrangementDescriptor::new(edge, 2, [1, 0]).unwrap(); + let mut arrangement = ArrangementIndex::build(&sidecar, descriptor).unwrap(); + let bob_rows = arrangement.seek_prefix(&[bob]); + let exact_rows = arrangement.get_exact(&[bob, alice]); + + let product_pattern = crate::expr!(space, "[2] , [3] edge $ Bob"); + let mut product_roots = BTreeSet::new(); + let product_count = Space::query_multi(&space.btm, product_pattern, |_, loc| { + let span = unsafe { loc.span().as_ref().unwrap() }; + product_roots.insert(span.to_vec()); + true + }); + + assert_eq!(product_count, 2); + assert_eq!(encoded_roots(&sidecar, bob_rows), product_roots); + assert_eq!(exact_rows.len(), 1); + + let stats = arrangement.stats(); + assert_eq!(stats.rows, 4); + assert_eq!(stats.prefix_lookups, 1); + assert_eq!(stats.prefix_rows_returned, 2); + } + + #[test] + fn descriptor_rejects_invalid_key_orders() { + assert_eq!( + ArrangementDescriptor::new(TermId(0), 2, [2]), + Err(ArrangementError::InvalidKeyPosition { + position: 2, + argument_count: 2, + }) + ); + assert_eq!( + ArrangementDescriptor::new(TermId(0), 2, [1, 1]), + Err(ArrangementError::DuplicateKeyPosition { position: 1 }) + ); + } +} diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index e020d925..38ee377e 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -11,6 +11,7 @@ mod pure; pub mod term_identity; pub mod binding_env; pub mod pattern_relations; +pub mod arrangements; #[doc(hidden)] pub use mork_expr as __mork_expr;