diff --git a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs index 1dc2b95be8d..fb9180ae1ac 100644 --- a/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs +++ b/libs/@local/hashql/compiletest/src/suite/mir_pass_transform_post_inline.rs @@ -54,7 +54,7 @@ pub(crate) fn mir_pass_transform_post_inline<'heap>( diagnostics: DiagnosticIssues::new(), }; - let mut pass = PostInline::new_in(heap, &mut scratch); + let mut pass = PostInline::new_in(&mut scratch); let _: Changed = pass.run( &mut context, &mut GlobalTransformState::new_in(&bodies, heap), diff --git a/libs/@local/hashql/core/src/id/bit_vec/finite.rs b/libs/@local/hashql/core/src/id/bit_vec/finite.rs index 2490383981b..48472ffc9bc 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/finite.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/finite.rs @@ -11,7 +11,6 @@ //! [`DenseBitSet`]: super::DenseBitSet #![expect( clippy::cast_possible_truncation, - clippy::cast_lossless, reason = "Integral conversions in macro expansions may truncate or widen depending on target \ type" )] @@ -19,7 +18,7 @@ use core::{ fmt::{self, Debug}, hash::{Hash, Hasher}, - marker::PhantomData, + marker::{Destruct, PhantomData}, ops::{BitAnd, BitAndAssign, BitOrAssign, Not, RangeBounds, Shl, Shr, Sub}, }; @@ -34,17 +33,17 @@ use crate::id::{Id, bit_vec::inclusive_start_end}; /// /// The "integral" in the name refers to the mathematical concept of integers, distinguishing /// these types from other potential backing stores like arrays of integers. -pub trait FiniteBitSetIntegral: +pub const trait FiniteBitSetIntegral: Copy + Clone + Hash - + BitAnd - + BitOrAssign - + BitAndAssign - + Shl - + Shr - + Sub - + Not + + const BitAnd + + const BitOrAssign + + const BitAndAssign + + const Shl + + const Shr + + const Sub + + const Not + const PartialEq + fmt::Binary { @@ -66,7 +65,7 @@ pub trait FiniteBitSetIntegral: const ZERO: Self; /// Converts an [`Id`] to this integral type. - fn from_id(id: I) -> Self; + fn from_id(id: I) -> Self; /// Converts a `usize` to this integral type. fn from_usize(value: usize) -> Self; @@ -86,14 +85,14 @@ macro_rules! impl_trait { $(impl_trait!(@impl $integral);)* }; (@impl $integral:ty) => { - impl FiniteBitSetIntegral for $integral { + impl const FiniteBitSetIntegral for $integral { const EMPTY: Self = Self::MIN; const FILLED: Self = Self::MAX; const MAX_DOMAIN_SIZE: u32 = <$integral>::BITS; const ONE: Self = 1; const ZERO: Self = 0; - fn from_id(id: I) -> Self { + fn from_id(id: I) -> Self { id.as_u32() as Self } @@ -207,7 +206,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn insert(&mut self, index: I) { + pub const fn insert(&mut self, index: I) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { assert!(index.as_u32() < T::MAX_DOMAIN_SIZE); self.store |= T::ONE << T::from_id(index); @@ -221,8 +224,13 @@ impl FiniteBitSet { /// /// Panics if the range end exceeds the capacity of the underlying integral type. #[inline] - pub fn insert_range(&mut self, bounds: impl RangeBounds) { - let Some((start, end)) = inclusive_start_end(bounds, T::MAX_DOMAIN_SIZE as usize) else { + pub const fn insert_range(&mut self, bounds: R, domain_size: usize) + where + R: [const] RangeBounds + [const] Destruct, + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { + let Some((start, end)) = inclusive_start_end(bounds, domain_size) else { return; }; @@ -240,7 +248,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn remove(&mut self, index: I) { + pub const fn remove(&mut self, index: I) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { assert!(index.as_u32() < T::MAX_DOMAIN_SIZE); self.store &= !(T::ONE << T::from_id(index)); @@ -252,7 +264,11 @@ impl FiniteBitSet { /// /// Panics if `index` is out of bounds for the underlying integral type. #[inline] - pub fn set(&mut self, index: I, value: bool) { + pub const fn set(&mut self, index: I, value: bool) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { if value { self.insert(index); } else { @@ -265,7 +281,11 @@ impl FiniteBitSet { /// Returns `false` if `index` is out of bounds (rather than panicking). #[inline] #[must_use] - pub fn contains(&self, index: I) -> bool { + pub const fn contains(&self, index: I) -> bool + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { if index.as_u32() >= T::MAX_DOMAIN_SIZE { false } else { @@ -273,6 +293,28 @@ impl FiniteBitSet { } } + /// Flips all bits within the domain, turning set bits off and unset bits on. + /// + /// # Panics + /// + /// Panics if `domain_size` is greater than `T::MAX_DOMAIN_SIZE`. + #[inline] + pub const fn negate(&mut self, domain_size: u32) + where + I: [const] Id, + T: [const] FiniteBitSetIntegral, + { + assert!(domain_size <= T::MAX_DOMAIN_SIZE); + + let mask = if domain_size == T::MAX_DOMAIN_SIZE { + !T::EMPTY + } else { + (T::ONE << T::from_u32(domain_size)) - T::ONE + }; + + self.store = !self.store & mask; + } + /// Returns an iterator over the indices of set bits. #[inline] pub fn iter(&self) -> FiniteBitIter { @@ -416,7 +458,7 @@ mod tests { #[test] fn remove() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); set.remove(TestId::from_usize(0)); set.remove(TestId::from_usize(7)); @@ -442,7 +484,7 @@ mod tests { fn insert_range_basic() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(2)..TestId::from_usize(5)); + set.insert_range(TestId::from_usize(2)..TestId::from_usize(5), 8); assert!(!set.contains(TestId::from_usize(0))); assert!(!set.contains(TestId::from_usize(1))); @@ -457,7 +499,7 @@ mod tests { fn insert_range_inclusive() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(2)..=TestId::from_usize(5)); + set.insert_range(TestId::from_usize(2)..=TestId::from_usize(5), 8); assert!(set.contains(TestId::from_usize(2))); assert!(set.contains(TestId::from_usize(5))); @@ -468,7 +510,7 @@ mod tests { fn insert_range_full() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert_eq!(set.len(), 8); assert_eq!(set.into_inner(), u8::MAX); @@ -478,7 +520,7 @@ mod tests { fn insert_range_empty() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(5)..TestId::from_usize(5)); + set.insert_range(TestId::from_usize(5)..TestId::from_usize(5), 8); assert!(set.is_empty()); } @@ -516,7 +558,7 @@ mod tests { #[test] fn clear() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); set.clear(); assert!(set.is_empty()); } @@ -524,7 +566,7 @@ mod tests { #[test] fn contains_out_of_bounds_returns_false() { let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); - set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert!(!set.contains(TestId::from_usize(100))); } @@ -538,19 +580,19 @@ mod tests { #[test] fn different_integral_types() { let mut set8: FiniteBitSet = FiniteBitSet::new_empty(8); - set8.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + set8.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); assert_eq!(set8.len(), 8); let mut set16: FiniteBitSet = FiniteBitSet::new_empty(16); - set16.insert_range(TestId::from_usize(0)..=TestId::from_usize(15)); + set16.insert_range(TestId::from_usize(0)..=TestId::from_usize(15), 16); assert_eq!(set16.len(), 16); let mut set64: FiniteBitSet = FiniteBitSet::new_empty(64); - set64.insert_range(TestId::from_usize(0)..=TestId::from_usize(63)); + set64.insert_range(TestId::from_usize(0)..=TestId::from_usize(63), 64); assert_eq!(set64.len(), 64); let mut set128: FiniteBitSet = FiniteBitSet::new_empty(128); - set128.insert_range(TestId::from_usize(0)..=TestId::from_usize(127)); + set128.insert_range(TestId::from_usize(0)..=TestId::from_usize(127), 128); assert_eq!(set128.len(), 128); } @@ -561,7 +603,7 @@ mod tests { for start in 0..bits.min(8) { for end in start..bits.min(16) { let mut set: FiniteBitSet = FiniteBitSet::new_empty(64); - set.insert_range(TestId::from_u32(start)..=TestId::from_u32(end.min(63))); + set.insert_range(TestId::from_u32(start)..=TestId::from_u32(end.min(63)), 64); for i in 0..64 { let expected = i >= start && i <= end.min(63); @@ -611,7 +653,7 @@ mod tests { #[test] fn subtract_removes_bits() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); b.insert(TestId::from_usize(1)); @@ -632,10 +674,10 @@ mod tests { #[test] fn subtract_disjoint_sets() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7)); + b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7), 8); assert!(!a.subtract(&b)); assert_eq!(a.len(), 4); @@ -644,10 +686,10 @@ mod tests { #[test] fn intersect_keeps_common_bits() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(4), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(2)..=TestId::from_usize(6)); + b.insert_range(TestId::from_usize(2)..=TestId::from_usize(6), 8); assert!(a.intersect(&b)); assert!(!a.contains(TestId::from_usize(0))); @@ -666,10 +708,10 @@ mod tests { #[test] fn intersect_disjoint_sets() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 8); let mut b: FiniteBitSet = FiniteBitSet::new_empty(8); - b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7)); + b.insert_range(TestId::from_usize(4)..=TestId::from_usize(7), 8); assert!(a.intersect(&b)); assert!(a.is_empty()); @@ -678,11 +720,91 @@ mod tests { #[test] fn intersect_with_empty() { let mut a: FiniteBitSet = FiniteBitSet::new_empty(8); - a.insert_range(TestId::from_usize(0)..=TestId::from_usize(7)); + a.insert_range(TestId::from_usize(0)..=TestId::from_usize(7), 8); let b: FiniteBitSet = FiniteBitSet::new_empty(8); assert!(a.intersect(&b)); assert!(a.is_empty()); } + + #[test] + fn negate_empty_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); + set.negate(4); + + assert!(set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(set.contains(TestId::from_usize(3))); + assert!(!set.contains(TestId::from_usize(4))); + assert_eq!(set.len(), 4); + } + + #[test] + fn negate_full_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(4); + set.insert_range(TestId::from_usize(0)..=TestId::from_usize(3), 4); + set.negate(4); + + assert!(set.is_empty()); + } + + #[test] + fn negate_partial_set() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(1)); + set.insert(TestId::from_usize(3)); + set.insert(TestId::from_usize(5)); + set.negate(8); + + assert!(set.contains(TestId::from_usize(0))); + assert!(!set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(!set.contains(TestId::from_usize(3))); + assert!(set.contains(TestId::from_usize(4))); + assert!(!set.contains(TestId::from_usize(5))); + assert!(set.contains(TestId::from_usize(6))); + assert!(set.contains(TestId::from_usize(7))); + assert_eq!(set.len(), 5); + } + + #[test] + fn negate_masks_above_domain() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(3); + set.negate(3); + + assert!(set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(2))); + assert!(!set.contains(TestId::from_usize(3))); + assert_eq!(set.len(), 3); + } + + #[test] + fn negate_is_involution() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(6); + set.insert(TestId::from_usize(2)); + set.insert(TestId::from_usize(4)); + + let original = set; + set.negate(6); + set.negate(6); + + assert_eq!(set, original); + } + + #[test] + fn negate_full_width() { + let mut set: FiniteBitSet = FiniteBitSet::new_empty(8); + set.insert(TestId::from_usize(0)); + set.insert(TestId::from_usize(7)); + set.negate(8); + + assert!(!set.contains(TestId::from_usize(0))); + assert!(set.contains(TestId::from_usize(1))); + assert!(set.contains(TestId::from_usize(6))); + assert!(!set.contains(TestId::from_usize(7))); + assert_eq!(set.len(), 6); + } } diff --git a/libs/@local/hashql/core/src/id/bit_vec/mod.rs b/libs/@local/hashql/core/src/id/bit_vec/mod.rs index 095f67dcc25..bbc332ca59d 100644 --- a/libs/@local/hashql/core/src/id/bit_vec/mod.rs +++ b/libs/@local/hashql/core/src/id/bit_vec/mod.rs @@ -38,7 +38,7 @@ use alloc::rc::Rc; use core::{ fmt, iter, - marker::PhantomData, + marker::{Destruct, PhantomData}, ops::{Bound, Range, RangeBounds}, slice, }; @@ -78,32 +78,12 @@ const CHUNK_BITS: usize = CHUNK_WORDS * WORD_BITS; // 2048 bits type ChunkSize = u16; const _: () = assert!(CHUNK_BITS <= ChunkSize::MAX as usize); -pub trait BitRelations { +pub const trait BitRelations { fn union(&mut self, other: &Rhs) -> bool; fn subtract(&mut self, other: &Rhs) -> bool; fn intersect(&mut self, other: &Rhs) -> bool; } -#[inline] -fn inclusive_start_end(range: impl RangeBounds, domain: usize) -> Option<(usize, usize)> { - // Both start and end are inclusive. - let start = match range.start_bound().cloned() { - Bound::Included(start) => start.as_usize(), - Bound::Excluded(start) => start.as_usize() + 1, - Bound::Unbounded => 0, - }; - let end = match range.end_bound().cloned() { - Bound::Included(end) => end.as_usize(), - Bound::Excluded(end) => end.as_usize().checked_sub(1)?, - Bound::Unbounded => domain - 1, - }; - assert!(end < domain); - if start > end { - return None; - } - Some((start, end)) -} - /// A fixed-size bitset type with a dense representation. /// /// Note 1: Since this bitset is dense, if your domain is big, and/or relatively @@ -1589,3 +1569,32 @@ const fn max_bit(word: Word) -> usize { fn count_ones(words: &[Word]) -> usize { words.iter().map(|word| word.count_ones() as usize).sum() } + +#[inline] +const fn inclusive_start_end(range: R, domain: usize) -> Option<(usize, usize)> +where + T: [const] Id, + R: [const] RangeBounds + [const] Destruct, +{ + // Both start and end are inclusive. + let start = match range.start_bound().copied() { + Bound::Included(start) => start.as_usize(), + Bound::Excluded(start) => start.as_usize() + 1, + Bound::Unbounded => 0, + }; + let end = match range.end_bound().copied() { + Bound::Included(end) => end.as_usize(), + Bound::Excluded(end) => match end.as_usize().checked_sub(1) { + Some(end) => end, + None => return None, + }, + Bound::Unbounded => domain - 1, + }; + + assert!(end < domain); + if start > end { + return None; + } + + Some((start, end)) +} diff --git a/libs/@local/hashql/core/src/id/mod.rs b/libs/@local/hashql/core/src/id/mod.rs index 3a500e8429a..333dc42c41b 100644 --- a/libs/@local/hashql/core/src/id/mod.rs +++ b/libs/@local/hashql/core/src/id/mod.rs @@ -46,7 +46,7 @@ impl Display for IdError { /// /// Provides type safety for IDs of different domains (nodes, users, etc.) /// while maintaining a consistent conversion API. -pub trait Id: +pub const trait Id: Copy + PartialEq + Eq @@ -55,9 +55,9 @@ pub trait Id: + Hash + Debug + Display - + TryFrom - + TryFrom - + TryFrom + + [const] TryFrom + + [const] TryFrom + + [const] TryFrom + 'static { /// The maximum value this ID type can represent. @@ -75,7 +75,10 @@ pub trait Id: #[inline] #[must_use] fn from_u32(index: u32) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Creates an ID from a [`u64`] value. @@ -87,7 +90,10 @@ pub trait Id: #[inline] #[must_use] fn from_u64(index: u64) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Creates an ID from a [`usize`] value. @@ -99,7 +105,10 @@ pub trait Id: #[inline] #[must_use] fn from_usize(index: usize) -> Self { - Self::try_from(index).expect("Cannot create ID: value outside valid range") + match Self::try_from(index) { + Ok(id) => id, + Err(_) => panic!("Cannot create ID: value outside valid range"), + } } /// Converts this ID to a [`u32`] value. @@ -181,16 +190,16 @@ pub trait Id: /// } /// } /// ``` -pub trait HasId { +pub const trait HasId { type Id: Id; /// Returns the ID of this entity. fn id(&self) -> Self::Id; } -impl HasId for &T +impl const HasId for &T where - T: HasId, + T: [const] HasId, { type Id = T::Id; @@ -199,7 +208,7 @@ where } } -impl HasId for (I, T) +impl const HasId for (I, T) where I: Id, { diff --git a/libs/@local/hashql/core/src/lib.rs b/libs/@local/hashql/core/src/lib.rs index 5b2aa8711c9..98a62567f1b 100644 --- a/libs/@local/hashql/core/src/lib.rs +++ b/libs/@local/hashql/core/src/lib.rs @@ -17,8 +17,13 @@ // Library Features allocator_api, binary_heap_into_iter_sorted, + bound_copied, clone_from_ref, const_cmp, + const_convert, + const_destruct, + const_ops, + const_range, const_trait_impl, extend_one, get_disjoint_mut_helpers, diff --git a/libs/@local/hashql/macros/src/id/enum.rs b/libs/@local/hashql/macros/src/id/enum.rs index e3d868d6f0f..e7798207b7d 100644 --- a/libs/@local/hashql/macros/src/id/enum.rs +++ b/libs/@local/hashql/macros/src/id/enum.rs @@ -186,8 +186,10 @@ pub(super) fn expand_enum( fn prev(self) -> ::core::option::Option { let discriminant = self.into_discriminant(); - let prev = discriminant.checked_sub(1)?; - Self::try_from_discriminant(prev) + match discriminant.checked_sub(1) { + Some(prev) => Self::try_from_discriminant(prev), + None => None, + } } } }); @@ -196,7 +198,7 @@ pub(super) fn expand_enum( for int in [quote!(u32), quote!(u64), quote!(usize)] { output.extend(quote! { #[automatically_derived] - impl ::core::convert::TryFrom<#int> for #name { + impl #konst ::core::convert::TryFrom<#int> for #name { type Error = #krate::id::IdError; #[inline] @@ -218,7 +220,7 @@ pub(super) fn expand_enum( // 6. HasId impl output.extend(quote! { #[automatically_derived] - impl #krate::id::HasId for #name { + impl #konst #krate::id::HasId for #name { type Id = Self; #[inline] diff --git a/libs/@local/hashql/mir/benches/execution.rs b/libs/@local/hashql/mir/benches/execution.rs index 37dbf67e6f4..2b9a6bbfc38 100644 --- a/libs/@local/hashql/mir/benches/execution.rs +++ b/libs/@local/hashql/mir/benches/execution.rs @@ -12,10 +12,7 @@ use hashql_mir::{ builder::body, def::DefIdSlice, intern::Interner, - pass::{ - Changed, GlobalAnalysisPass as _, TransformPass as _, - analysis::size_estimation::SizeEstimationAnalysis, transform::TraversalExtraction, - }, + pass::{GlobalAnalysisPass as _, analysis::size_estimation::SizeEstimationAnalysis}, }; use self::run::run_bencher; @@ -97,22 +94,12 @@ fn execution_analysis(criterion: &mut Criterion) { group.bench_function("simple", |bencher| { run_bencher(bencher, create_simple, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&*scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; @@ -126,22 +113,12 @@ fn execution_analysis(criterion: &mut Criterion) { bencher, create_entity_projections, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&*scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; @@ -153,22 +130,12 @@ fn execution_analysis(criterion: &mut Criterion) { group.bench_function("diamond_cfg", |bencher| { run_bencher(bencher, create_diamond_cfg, |context, [body], scratch| { - let mut extraction = TraversalExtraction::new_in(&mut *scratch); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - scratch.reset(); - let mut size_analysis = SizeEstimationAnalysis::new_in(&scratch); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(&*body))); let footprints = size_analysis.finish(); scratch.reset(); - let bodies = [Some(traversals)]; - let analysis = hashql_mir::pass::execution::ExecutionAnalysis { - traversals: DefIdSlice::from_raw(&bodies), footprints: &footprints, scratch: &mut *scratch, }; diff --git a/libs/@local/hashql/mir/benches/interpret.rs b/libs/@local/hashql/mir/benches/interpret.rs index 0c62ba0eab1..cc65af23087 100644 --- a/libs/@local/hashql/mir/benches/interpret.rs +++ b/libs/@local/hashql/mir/benches/interpret.rs @@ -78,7 +78,7 @@ fn create_fibonacci_body<'heap>( let _: Changed = inline.run(&mut context, &mut state.as_mut(), bodies_mut); scratch.reset(); - let mut post = PostInline::new_in(context.heap, &mut scratch); + let mut post = PostInline::new_in(&mut scratch); let _: Changed = post.run(&mut context, &mut state.as_mut(), bodies_mut); scratch.reset(); diff --git a/libs/@local/hashql/mir/benches/transform.rs b/libs/@local/hashql/mir/benches/transform.rs index b99f2bcbef4..38d7e74e684 100644 --- a/libs/@local/hashql/mir/benches/transform.rs +++ b/libs/@local/hashql/mir/benches/transform.rs @@ -512,8 +512,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -528,8 +527,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -544,8 +542,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); @@ -560,8 +557,7 @@ fn pipeline(criterion: &mut Criterion) { changed |= Inline::new_in(InlineConfig::default(), &mut *scratch) .run(context, &mut state, bodies); scratch.reset(); - changed |= - PostInline::new_in(context.heap, &mut *scratch).run(context, &mut state, bodies); + changed |= PostInline::new_in(&mut *scratch).run(context, &mut state, bodies); scratch.reset(); changed }); diff --git a/libs/@local/hashql/mir/package.json b/libs/@local/hashql/mir/package.json index 575098ee79d..c4513eecab0 100644 --- a/libs/@local/hashql/mir/package.json +++ b/libs/@local/hashql/mir/package.json @@ -9,7 +9,7 @@ "fix:clippy": "just clippy --fix", "lint:clippy": "just clippy", "test:codspeed": "cargo codspeed run -p hashql-mir", - "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests pass::analysis::execution::cost", + "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests pass::execution::block_partitioned_vec::tests pass::execution::cost::tests", "test:unit": "mise run test:unit @rust/hashql-mir" }, "dependencies": { diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs index df40960b7d3..bb4725f3fa0 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/lattice/impls.rs @@ -195,6 +195,80 @@ macro_rules! impl_bitset { impl_bitset!(DenseBitSet, ChunkedBitSet, MixedBitSet); +impl HasBottom<(T, U)> for (A, B) +where + A: HasBottom, + B: HasBottom, +{ + #[inline] + fn bottom(&self) -> (T, U) { + (self.0.bottom(), self.1.bottom()) + } + + #[inline] + fn is_bottom(&self, value: &(T, U)) -> bool { + self.0.is_bottom(&value.0) && self.1.is_bottom(&value.1) + } +} + +impl HasTop<(T, U)> for (A, B) +where + A: HasTop, + B: HasTop, +{ + #[inline] + fn top(&self) -> (T, U) { + (self.0.top(), self.1.top()) + } + + #[inline] + fn is_top(&self, value: &(T, U)) -> bool { + self.0.is_top(&value.0) && self.1.is_top(&value.1) + } +} + +impl JoinSemiLattice<(T, U)> for (A, B) +where + A: JoinSemiLattice, + B: JoinSemiLattice, +{ + #[inline] + fn join_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) + where + (T, U): Sized, + { + self.0.join(&mut lhs.0, &rhs.0); + self.1.join(&mut lhs.1, &rhs.1); + lhs + } + + #[inline] + fn join(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { + self.0.join(&mut lhs.0, &rhs.0) | self.1.join(&mut lhs.1, &rhs.1) + } +} + +impl MeetSemiLattice<(T, U)> for (A, B) +where + A: MeetSemiLattice, + B: MeetSemiLattice, +{ + #[inline] + fn meet_owned(&self, mut lhs: (T, U), rhs: &(T, U)) -> (T, U) + where + (T, U): Sized, + { + self.0.meet(&mut lhs.0, &rhs.0); + self.1.meet(&mut lhs.1, &rhs.1); + lhs + } + + #[inline] + fn meet(&self, lhs: &mut (T, U), rhs: &(T, U)) -> bool { + self.0.meet(&mut lhs.0, &rhs.0) | self.1.meet(&mut lhs.1, &rhs.1) + } +} + impl MeetSemiLattice for Reverse where U: JoinSemiLattice, @@ -246,10 +320,14 @@ where #[cfg(test)] mod tests { #![expect(clippy::min_ident_chars)] + use core::cmp::Reverse; + use hashql_core::id::{self, Id as _, bit_vec::DenseBitSet}; use super::{PowersetLattice, SaturatingSemiring, WrappingSemiring}; - use crate::pass::analysis::dataflow::lattice::laws::{assert_bounded_lattice, assert_semiring}; + use crate::pass::analysis::dataflow::lattice::laws::{ + assert_bounded_join_semilattice, assert_bounded_lattice, assert_semiring, + }; #[test] fn saturating_semiring_u32() { @@ -303,4 +381,94 @@ mod tests { assert_bounded_lattice(&lattice, a, b, c); } + + /// Tuple lattice with two `PowersetLattice`s of different domain sizes. + #[test] + fn tuple_lattice_different_domains() { + id::newtype!(struct Left(u32 is 0..=15)); + id::newtype!(struct Right(u32 is 0..=31)); + + let lattice = (PowersetLattice::new(16), PowersetLattice::new(32)); + + let mut left_a: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_b: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_c: DenseBitSet = DenseBitSet::new_empty(16); + + left_a.insert(Left::from_usize(0)); + left_a.insert(Left::from_usize(1)); + + left_b.insert(Left::from_usize(1)); + left_b.insert(Left::from_usize(2)); + + left_c.insert(Left::from_usize(2)); + left_c.insert(Left::from_usize(3)); + + let mut right_a: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_b: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_c: DenseBitSet = DenseBitSet::new_empty(32); + + right_a.insert(Right::from_usize(10)); + right_a.insert(Right::from_usize(11)); + + right_b.insert(Right::from_usize(11)); + right_b.insert(Right::from_usize(12)); + + right_c.insert(Right::from_usize(12)); + right_c.insert(Right::from_usize(13)); + + assert_bounded_lattice( + &lattice, + (left_a, right_a), + (left_b, right_b), + (left_c, right_c), + ); + } + + /// Tuple lattice with `Reverse` and `PowersetLattice`. + /// + /// Verifies the `(A, B)` combinator works with heterogeneous lattice structures + /// where one component uses intersection-as-join (the dual lattice). + #[test] + fn tuple_lattice_heterogeneous_structures() { + id::newtype!(struct Left(u32 is 0..=15)); + id::newtype!(struct Right(u32 is 0..=31)); + + let lattice = (Reverse(PowersetLattice::new(16)), PowersetLattice::new(32)); + + let mut left_a: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_b: DenseBitSet = DenseBitSet::new_empty(16); + let mut left_c: DenseBitSet = DenseBitSet::new_empty(16); + + left_a.insert(Left::from_usize(0)); + left_a.insert(Left::from_usize(1)); + left_a.insert(Left::from_usize(2)); + + left_b.insert(Left::from_usize(1)); + left_b.insert(Left::from_usize(2)); + left_b.insert(Left::from_usize(3)); + + left_c.insert(Left::from_usize(2)); + left_c.insert(Left::from_usize(3)); + left_c.insert(Left::from_usize(4)); + + let mut right_a: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_b: DenseBitSet = DenseBitSet::new_empty(32); + let mut right_c: DenseBitSet = DenseBitSet::new_empty(32); + + right_a.insert(Right::from_usize(10)); + right_a.insert(Right::from_usize(11)); + + right_b.insert(Right::from_usize(11)); + right_b.insert(Right::from_usize(12)); + + right_c.insert(Right::from_usize(12)); + right_c.insert(Right::from_usize(13)); + + assert_bounded_join_semilattice( + &lattice, + (left_a, right_a), + (left_b, right_b), + (left_c, right_c), + ); + } } diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs index df60f07649f..9bc0fe7b74e 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/mod.rs @@ -15,30 +15,28 @@ //! This module provides two liveness analyses: //! //! - [`LivenessAnalysis`]: Standard liveness following the gen/kill semantics above. -//! - [`TraversalLivenessAnalysis`]: Traversal-aware liveness that suppresses uses of a traversal -//! source when assigning to a known traversal destination. +//! - [`TraversalLivenessAnalysis`]: Tracks local liveness alongside per-vertex path liveness. //! //! ## Traversal-Aware Liveness //! -//! When performing traversal extraction, a source local (e.g., `entity`) may have multiple -//! partial projections extracted into separate destination locals (e.g., `entity.uuid`, -//! `entity.name`). Standard liveness would mark `entity` as live at every assignment to these -//! destinations, even though only the projections are needed. +//! In a [`GraphReadFilter`] body, the vertex local (`_1`) is an input representing a graph +//! vertex. Rather than tracking the vertex as a monolithic live value, this analysis resolves +//! each vertex projection to an [`EntityPath`] and records it in a [`TraversalPathBitSet`]. +//! The vertex local itself is never marked live in the local bitset. //! -//! [`TraversalLivenessAnalysis`] takes a [`Traversals`] reference and modifies the transfer -//! function: when an assignment's left-hand side is a full definition of a registered traversal -//! destination, uses of the traversal source on the right-hand side are *not* generated. +//! This allows edge cost computation to sum only the [`InformationRange`] of live paths, +//! rather than charging the full entity size at every edge where the vertex is used. //! //! ```text -//! // Given: traversals.source() = _1, traversals.contains(_2) = true //! bb0: -//! _2 = _1.uuid // Standard: gens _1. Traversal-aware: skips _1 (full def of _2) -//! _3 = _1.name // If _3 not in traversals: gens _1 normally +//! _2 = _1.metadata.archived // gens EntityPath::Archived in path bitset, _1 stays dead +//! _3 = _1.properties // gens EntityPath::Properties in path bitset, _1 stays dead +//! _4 = _1 // unresolvable: insert_all in path bitset, _1 stays dead //! return _2 //! ``` //! -//! This allows dead code elimination to remove the source local when all its uses are through -//! extracted traversals. +//! [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +//! [`InformationRange`]: crate::pass::analysis::size_estimation::InformationRange //! //! # Example //! @@ -68,35 +66,43 @@ use crate::{ Body, local::Local, location::Location, - place::{DefUse, PlaceContext}, - statement::{Assign, Statement, StatementKind}, + place::{DefUse, Place, PlaceContext}, + statement::Statement, terminator::Terminator, }, - pass::transform::Traversals, - visit::Visitor, + pass::execution::{ + VertexType, + traversal::{EntityPath, TraversalLattice, TraversalPathBitSet}, + }, + visit::{self, Visitor}, }; -/// Traversal-aware liveness analysis. +/// Liveness analysis that tracks local liveness and per-vertex path liveness in parallel. /// -/// Extends standard liveness with special handling for traversal extraction. When the left-hand -/// side of an assignment is a full definition of a traversal destination, uses of the traversal -/// source on the right-hand side are suppressed (not added to the live set). +/// The domain is `(DenseBitSet, TraversalPathBitSet)`: +/// - The local bitset tracks which locals are live, with the vertex local excluded entirely. +/// - The path bitset tracks which vertex field paths are live (resolved via [`EntityPath`]). /// -/// This allows subsequent dead code elimination to remove the source local when its only uses -/// are through extracted traversal projections. -pub struct TraversalLivenessAnalysis<'ctx, 'heap> { - pub traversals: &'ctx Traversals<'heap>, +/// When the vertex is accessed through a resolvable projection (e.g., `_1.metadata.archived`), +/// the corresponding [`EntityPath`] is gen'd in the path bitset. When the projection cannot be +/// resolved (bare `_1` or unknown path), all paths are marked live via +/// [`TraversalPathBitSet::insert_all`]. +pub struct TraversalLivenessAnalysis { + pub vertex: VertexType, } -impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { - type Domain = DenseBitSet; - type Lattice = PowersetLattice; +impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis { + type Domain = (DenseBitSet, TraversalPathBitSet); + type Lattice = (PowersetLattice, TraversalLattice); type SwitchIntData = !; const DIRECTION: Direction = Direction::Backward; fn lattice_in(&self, body: &Body<'heap>, _: A) -> Self::Lattice { - PowersetLattice::new(body.local_decls.len()) + let locals = PowersetLattice::new(body.local_decls.len()); + let paths = TraversalLattice::new(self.vertex); + + (locals, paths) } fn initialize_boundary(&self, _: &Body<'heap>, _: &mut Self::Domain, _: A) { @@ -109,7 +115,9 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { params: Interned<'heap, [Local]>, state: &mut Self::Domain, ) { - Ok(()) = TraversalTransferFunction(state, None).visit_basic_block_params(location, params); + let (locals, paths) = state; + Ok(()) = + TraversalTransferFunction { locals, paths }.visit_basic_block_params(location, params); } fn transfer_statement( @@ -118,20 +126,8 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { statement: &Statement<'heap>, state: &mut Self::Domain, ) { - // This is the pattern that's exhibited by explicit traversal extraction, in particular. - // Meaning we skip any assignments to our local, as long as it is a `Def`, to the particular - // chosen source. - let skip_uses_of = if let StatementKind::Assign(Assign { lhs, rhs: _ }) = &statement.kind - && lhs.projections.is_empty() - && self.traversals.contains(lhs.local) - { - Some(self.traversals.source()) - } else { - None - }; - - Ok(()) = - TraversalTransferFunction(state, skip_uses_of).visit_statement(location, statement); + let (locals, paths) = state; + Ok(()) = TraversalTransferFunction { locals, paths }.visit_statement(location, statement); } fn transfer_terminator( @@ -140,11 +136,15 @@ impl<'heap> DataflowAnalysis<'heap> for TraversalLivenessAnalysis<'_, '_> { terminator: &Terminator<'heap>, state: &mut Self::Domain, ) { - Ok(()) = TraversalTransferFunction(state, None).visit_terminator(location, terminator); + let (locals, paths) = state; + Ok(()) = TraversalTransferFunction { locals, paths }.visit_terminator(location, terminator); } } -struct TraversalTransferFunction<'mir>(&'mir mut DenseBitSet, Option); +struct TraversalTransferFunction<'mir> { + locals: &'mir mut DenseBitSet, + paths: &'mir mut TraversalPathBitSet, +} impl Visitor<'_> for TraversalTransferFunction<'_> { type Result = Result<(), !>; @@ -154,16 +154,43 @@ impl Visitor<'_> for TraversalTransferFunction<'_> { return Ok(()); }; + if local == Local::VERTEX { + debug_assert_eq!( + def_use, + DefUse::Use, + "vertex local is immutable in GraphReadFilter bodies" + ); + return Ok(()); + } + match def_use { - // Full definition kills liveness - the variable gets a new value - DefUse::Def => self.0.remove(local), - // Partial definitions and uses generate liveness - the current value is needed - DefUse::Use if Some(local) == self.1 => false, - DefUse::PartialDef | DefUse::Use => self.0.insert(local), + DefUse::Def => self.locals.remove(local), + DefUse::PartialDef | DefUse::Use => self.locals.insert(local), }; Ok(()) } + + fn visit_place( + &mut self, + location: Location, + context: PlaceContext, + place: &Place<'_>, + ) -> Self::Result { + if place.local == Local::VERTEX && Some(DefUse::Use) == context.into_def_use() { + match self.paths { + TraversalPathBitSet::Entity(bitset) => { + if let Some((path, _)) = EntityPath::resolve(&place.projections) { + bitset.insert(path); + } else { + bitset.insert_all(); + } + } + } + } + + visit::r#ref::walk_place(self, location, context, place) + } } /// Computes liveness information for all locals in a MIR body. diff --git a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs index e6292ba3076..678a0d37af4 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/dataflow/liveness/tests.rs @@ -8,23 +8,22 @@ use hashql_core::{ heap::Heap, id::bit_vec::DenseBitSet, pretty::Formatter, - r#type::{TypeBuilder, TypeFormatter, TypeFormatterOptions, environment::Environment}, + symbol::sym, + r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, }; use insta::{Settings, assert_snapshot}; use super::{LivenessAnalysis, TraversalLivenessAnalysis}; use crate::{ - body::{ - Body, - basic_block::BasicBlockId, - local::Local, - place::{FieldIndex, Place, ProjectionKind}, - }, + body::{Body, basic_block::BasicBlockId, local::Local}, builder::body, intern::Interner, pass::{ analysis::dataflow::framework::{DataflowAnalysis, DataflowResults, Direction}, - transform::Traversals, + execution::{ + VertexType, + traversal::{EntityPath, TraversalPathBitSet}, + }, }, pretty::TextFormatOptions, }; @@ -342,164 +341,177 @@ fn diamond_one_branch_uses() { // TraversalLivenessAnalysis Tests // ============================================================================= -#[track_caller] -fn assert_traversal_liveness<'heap>( - name: &'static str, - env: &Environment<'heap>, - body: &Body<'heap>, - traversals: &Traversals<'heap>, -) { - let analysis = TraversalLivenessAnalysis { traversals }; - let results = analysis.iterate_to_fixpoint(body); +fn traversal_liveness<'a>(body: &'a Body<'a>) -> DataflowResults<'a, TraversalLivenessAnalysis> { + let analysis = TraversalLivenessAnalysis { + vertex: VertexType::Entity, + }; + analysis.iterate_to_fixpoint(body) +} - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut settings = Settings::clone_current(); - settings.set_snapshot_path(dir.join("tests/ui/pass/liveness")); - settings.set_prepend_module_to_snapshot(false); +fn entry_locals<'a>( + results: &'a DataflowResults<'a, TraversalLivenessAnalysis>, + block: BasicBlockId, +) -> &'a DenseBitSet { + &results.entry_states[block].0 +} - let _drop = settings.bind_to_scope(); +fn entry_paths<'a>( + results: &'a DataflowResults<'a, TraversalLivenessAnalysis>, + block: BasicBlockId, +) -> &'a TraversalPathBitSet { + &results.entry_states[block].1 +} - assert_snapshot!( - name, - format!( - "{}\n\n========\n\n{}", - format_body(env, body), - format_liveness(body, &results) - ) - ); +/// Vertex local (`_1`) is never marked live in the local bitset. +#[test] +fn vertex_excluded_from_local_bitset() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // _0 = env, _1 = vertex, _2 = props + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; + + bb0() { + props = load properties; + goto bb1(); + }, + bb1() { + return props; + } + }); + + let results = traversal_liveness(&body); + + // At bb1 entry, _2 (props) is live (used in return), vertex is not + let bb1_locals = entry_locals(&results, BasicBlockId::new(1)); + assert!(bb1_locals.contains(Local::new(2))); + assert!(!bb1_locals.contains(Local::VERTEX)); + + // At bb0 entry, _2 is killed by its definition, vertex is never live + let bb0_locals = entry_locals(&results, BasicBlockId::new(0)); + assert!(!bb0_locals.contains(Local::VERTEX)); + assert!(!bb0_locals.contains(Local::new(2))); } -/// Assigning to a traversal destination does not mark the source as live. +/// Vertex field accesses are recorded as `EntityPaths` in the path bitset. #[test] -fn traversal_assignment_skips_source() { +fn vertex_access_records_entity_path() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = traversal destination, _3 = result - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; bb0() { - dest = load source_0; - return dest; + props = load properties; + return props; } }); - // source = _1, destinations = {_2} - let source = Local::new(1); - let dest = Local::new(2); - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - // The projection type is Int (the element type of the tuple) - traversals.insert( - dest, - Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ), - ); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "traversal_assignment_skips_source", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + assert!(entity_paths.contains(EntityPath::Properties)); } -/// Assigning to a non-traversal local marks the source as live. +/// Bare vertex access sets all bits in the path bitset. #[test] -fn non_traversal_assignment_gens_source() { +fn bare_vertex_access_sets_all_paths() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = NOT in traversals, _3 = result - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), other: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; bb0() { - other = load source_0; - return other; + val = load vertex; + return val; } }); - // source = _1, destinations = {} (empty - _2 is NOT a traversal destination) - let source = Local::new(1); - let traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "non_traversal_assignment_gens_source", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + // 25 variants - 7 children = 18 top-level paths + assert_eq!(entity_paths.len(), 18); } -/// Assignment with projections on LHS (partial def) does not trigger traversal skip. +/// Non-vertex locals are tracked normally in the local bitset. #[test] -fn lhs_projection_does_not_skip() { +fn non_vertex_locals_tracked_normally() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - // _0 = env, _1 = source, _2 = traversal destination (tuple), _3 = result - let body = body!(interner, env; fn@0/2 -> (Int, Int) { - decl env: (), source: (Int, Int), dest: (Int, Int); - @proj source_0 = source.0: Int, dest_0 = dest.0: Int; + // _0 = env, _1 = vertex, _2 = val, _3 = result + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int, result: Int; + @proj env_0 = env.0: Int; bb0() { - dest_0 = load source_0; - return dest; + val = load env_0; + goto bb1(); + }, + bb1() { + result = load val; + return result; } }); - // source = _1, destinations = {_2} - // Even though _2 is in traversals, the assignment is to dest.0 (has projection), - // so it should NOT skip the source use. - let _env = Local::new(0); - let source = Local::new(1); - let dest = Local::new(2); - - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - let source_0_place = Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ); - traversals.insert(dest, source_0_place); + let results = traversal_liveness(&body); - assert_traversal_liveness("lhs_projection_does_not_skip", &env, &body, &traversals); + // At bb1 entry, val (_2) is live (used by the load) + let bb1_locals = entry_locals(&results, BasicBlockId::new(1)); + assert!(bb1_locals.contains(Local::new(2))); + assert!(!bb1_locals.contains(Local::VERTEX)); + + // Path bitset is empty (no vertex access) + let bb1_paths = entry_paths(&results, BasicBlockId::new(1)); + assert!(bb1_paths.is_empty()); } -/// Empty traversals set produces identical results to standard liveness. +/// Paths from multiple blocks are joined at merge points. #[test] -fn empty_traversals_is_standard_liveness() { +fn paths_joined_across_branches() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/2 -> Int { - decl env: (), source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, archived: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived_proj = metadata.archived: Bool; bb0() { - dest = load source_0; - return dest; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + props = load properties; + return props; + }, + bb2() { + archived = load archived_proj; + return archived; } }); - // source = _1, destinations = {} (empty) - let source = Local::new(1); - let traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); + let results = traversal_liveness(&body); + let paths = entry_paths(&results, BasicBlockId::new(0)); - assert_traversal_liveness( - "empty_traversals_is_standard_liveness", - &env, - &body, - &traversals, - ); + let entity_paths = paths.as_entity().expect("should be entity variant"); + // Join of {Properties} and {Archived} + assert!(entity_paths.contains(EntityPath::Properties)); + assert!(entity_paths.contains(EntityPath::Archived)); + assert_eq!(entity_paths.len(), 2); } diff --git a/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs b/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs index a6cab78d8a5..60acadb6d21 100644 --- a/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs +++ b/libs/@local/hashql/mir/src/pass/analysis/size_estimation/range.rs @@ -203,6 +203,14 @@ macro_rules! range { // Could become empty if min > max (no overlap) Self { min, max: max.map(<$inner>::new) } } + + pub fn midpoint(self) -> Option<$inner> { + let min = self.min.raw; + let max = self.inclusive_max()?.raw; + + let avg = min.midpoint(max); + Some(<$inner>::new(avg)) + } } impl Debug for $name { diff --git a/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs new file mode 100644 index 00000000000..65d47cb89f4 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/block_partitioned_vec.rs @@ -0,0 +1,217 @@ +//! Generic block-partitioned storage. +//! +//! Provides a flat data array with a block-offset table, so that per-element values can be +//! accessed by block ID. Each block owns a contiguous slice of the array, sized by the count +//! provided at construction time. +//! +//! Used as the backing store for both per-statement cost maps and per-edge terminator matrices. + +use core::{alloc::Allocator, iter}; + +use hashql_core::id::Id as _; + +use crate::body::basic_block::{BasicBlockId, BasicBlockSlice}; + +/// Dense block-partitioned storage. +/// +/// Stores a flat array of `T` values, partitioned into per-block slices via an offset table. +/// Each [`BasicBlockId`] maps to a contiguous range within the data array. The per-block count +/// is determined at construction time and can be rebuilt via [`remap`](Self::remap). +#[derive(Debug)] +pub(crate) struct BlockPartitionedVec { + offsets: Box, A>, + data: Vec, +} + +impl BlockPartitionedVec { + pub(crate) fn new_in(counts: impl ExactSizeIterator, value: T, alloc: A) -> Self + where + T: Clone, + A: Clone, + { + let (offsets, length) = Self::build_offsets(counts, alloc.clone()); + let data = alloc::vec::from_elem_in(value, length, alloc); + + Self { offsets, data } + } + + #[expect(unsafe_code)] + fn build_offsets( + mut iter: impl ExactSizeIterator, + alloc: A, + ) -> (Box, A>, usize) { + let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); + + let mut offset = 0_u32; + + offsets[0].write(0); + + let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { + let next = iter.next()?; + + offset += next; + + Some(offset) + })); + + debug_assert!(rest.is_empty()); + debug_assert_eq!(iter.len(), 0); + + // SAFETY: We have initialized all elements of the slice. + let offsets = unsafe { offsets.assume_init() }; + let offsets = BasicBlockSlice::from_boxed_slice(offsets); + + (offsets, offset as usize) + } + + #[inline] + fn range(&self, block: BasicBlockId) -> core::ops::Range { + (self.offsets[block] as usize)..(self.offsets[block.plus(1)] as usize) + } + + /// Returns the slice of values for `block`. + #[inline] + pub(crate) fn of(&self, block: BasicBlockId) -> &[T] { + let range = self.range(block); + &self.data[range] + } + + /// Returns a mutable slice of values for `block`. + #[inline] + pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [T] { + let range = self.range(block); + &mut self.data[range] + } + + /// Returns an iterator over all values in the flat data array. + #[cfg(test)] + pub(crate) fn iter(&self) -> impl Iterator { + self.data.iter() + } + + /// Returns the total number of elements across all blocks. + pub(crate) const fn len(&self) -> usize { + self.data.len() + } + + /// Returns the number of blocks in the partition. + #[cfg(test)] + pub(crate) fn block_count(&self) -> usize { + self.offsets.len() - 1 + } + + /// Rebuilds the offset table for a new partitioning. + /// + /// Call after transforms that change element counts per block. Does not resize or clear + /// the data array; callers must ensure the total element count remains unchanged. + pub(crate) fn remap(&mut self, counts: impl ExactSizeIterator) + where + A: Clone, + { + let alloc = Box::allocator(&self.offsets).clone(); + + let (offsets, _) = Self::build_offsets(counts, alloc); + self.offsets = offsets; + } +} + +#[cfg(test)] +mod tests { + #![expect(clippy::cast_possible_truncation)] + use alloc::alloc::Global; + + use super::BlockPartitionedVec; + use crate::body::basic_block::BasicBlockId; + + /// Single block with 5 elements: all accessible via `of()`/`of_mut()`. + #[test] + fn single_block() { + let mut vec = BlockPartitionedVec::new_in([5].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 5); + assert_eq!(vec.block_count(), 1); + + let slice = vec.of_mut(BasicBlockId::new(0)); + for (index, value) in slice.iter_mut().enumerate() { + *value = index as u32; + } + + let slice = vec.of(BasicBlockId::new(0)); + assert_eq!(slice, &[0, 1, 2, 3, 4]); + } + + /// Multiple blocks with varying sizes: elements are correctly partitioned. + #[test] + fn multiple_blocks() { + let mut vec = BlockPartitionedVec::new_in([2, 3, 1].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 6); + assert_eq!(vec.block_count(), 3); + + vec.of_mut(BasicBlockId::new(0))[0] = 10; + vec.of_mut(BasicBlockId::new(0))[1] = 20; + vec.of_mut(BasicBlockId::new(1))[0] = 30; + vec.of_mut(BasicBlockId::new(1))[1] = 40; + vec.of_mut(BasicBlockId::new(1))[2] = 50; + vec.of_mut(BasicBlockId::new(2))[0] = 60; + + assert_eq!(vec.of(BasicBlockId::new(0)), &[10, 20]); + assert_eq!(vec.of(BasicBlockId::new(1)), &[30, 40, 50]); + assert_eq!(vec.of(BasicBlockId::new(2)), &[60]); + } + + /// Blocks with zero elements produce empty slices. + #[test] + fn empty_blocks() { + let vec = BlockPartitionedVec::new_in([0, 3, 0].into_iter(), 0_u32, Global); + + assert_eq!(vec.len(), 3); + assert_eq!(vec.block_count(), 3); + assert!(vec.of(BasicBlockId::new(0)).is_empty()); + assert_eq!(vec.of(BasicBlockId::new(1)).len(), 3); + assert!(vec.of(BasicBlockId::new(2)).is_empty()); + } + + /// Zero blocks is valid. + #[test] + fn no_blocks() { + let vec = BlockPartitionedVec::new_in(core::iter::empty::(), 0_u32, Global); + + assert_eq!(vec.len(), 0); + assert_eq!(vec.block_count(), 0); + } + + /// `iter()` yields all elements in flat order. + #[test] + fn iter_all_elements() { + let mut vec = BlockPartitionedVec::new_in([2, 1].into_iter(), 0_u32, Global); + + vec.of_mut(BasicBlockId::new(0))[0] = 1; + vec.of_mut(BasicBlockId::new(0))[1] = 2; + vec.of_mut(BasicBlockId::new(1))[0] = 3; + + let collected: Vec = vec.iter().copied().collect(); + assert_eq!(collected, vec![1, 2, 3]); + } + + /// `remap()` rebuilds the offset table without changing data. + #[test] + fn remap_preserves_data() { + let mut vec = BlockPartitionedVec::new_in([3, 3].into_iter(), 0_u32, Global); + + // Write sequential values + for (index, value) in vec.of_mut(BasicBlockId::new(0)).iter_mut().enumerate() { + *value = index as u32; + } + for (index, value) in vec.of_mut(BasicBlockId::new(1)).iter_mut().enumerate() { + *value = (index + 3) as u32; + } + + // Remap to a different partitioning with the same total count + vec.remap([2, 4].into_iter()); + + assert_eq!(vec.block_count(), 2); + assert_eq!(vec.of(BasicBlockId::new(0)), &[0, 1]); + assert_eq!(vec.of(BasicBlockId::new(1)), &[2, 3, 4, 5]); + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs new file mode 100644 index 00000000000..3850c247a85 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/cost/analysis.rs @@ -0,0 +1,586 @@ +use core::alloc::Allocator; + +use super::{ApproxCost, StatementCostVec}; +use crate::{ + body::basic_block::{BasicBlock, BasicBlockId, BasicBlockSlice, BasicBlockVec}, + pass::{ + analysis::size_estimation::{InformationRange, range::SaturatingMul as _}, + execution::{ + TargetId, VertexType, + target::{TargetArray, TargetBitSet}, + traversal::{ + TransferCostConfig, TraversalAnalysisVisitor, TraversalPathBitSet, TraversalResult, + }, + }, + }, + visit::Visitor as _, +}; + +/// Cost of running a single basic block on one target. +/// +/// Separates the statement cost sum (`base`) from the path transfer premium (`load`) so that +/// callers can inspect or log each component independently, even though the solver only sees +/// the combined [`total`](Self::total). +#[derive(Debug, Copy, Clone)] +struct BasicBlockTargetCost { + /// Sum of per-statement costs for this target (from [`StatementCostVec::sum_approx`]). + base: ApproxCost, + /// Transfer premium for vertex paths accessed in this block whose origin is a different + /// backend. Zero when the target is the natural origin for every accessed path. + load: ApproxCost, +} + +impl BasicBlockTargetCost { + const ZERO: Self = Self { + base: ApproxCost::ZERO, + load: ApproxCost::ZERO, + }; + + fn total(self) -> ApproxCost { + self.base + self.load + } +} + +/// Precomputed cost for one basic block across all candidate targets. +#[derive(Debug, Copy, Clone)] +struct BasicBlockCost { + /// Which targets can execute this block (copied from the domain after AC-3). + targets: TargetBitSet, + /// Per-target cost (only entries where `targets` is set are meaningful). + costs: TargetArray, +} + +/// Per-block cost map for the entire body. +/// +/// Indexed by [`BasicBlockId`]. Each entry stores the set of candidate targets and the +/// combined (statement + path transfer) cost for each candidate. +/// +/// Produced by [`BasicBlockCostAnalysis::analyze_in`] and consumed by the placement solver. +#[derive(Debug)] +pub(crate) struct BasicBlockCostVec { + inner: BasicBlockVec, +} + +impl BasicBlockCostVec { + /// Returns the set of candidate targets for `block`. + pub(crate) fn assignments(&self, block: BasicBlockId) -> TargetBitSet { + self.inner[block].targets + } + + /// Returns the total cost (statement base + path transfer load) of placing `block` on + /// `target`. + /// + /// # Panics + /// + /// Debug-asserts that `target` is in the block's candidate domain. + pub(crate) fn cost(&self, block: BasicBlockId, target: TargetId) -> ApproxCost { + let entry = &self.inner[block]; + + debug_assert!( + entry.targets.contains(target), + "target {target:?} is not in the domain of block {block:?}" + ); + + entry.costs[target].total() + } +} + +/// Computes per-block costs by combining statement costs with path transfer premiums. +/// +/// For each block, walks the MIR statements to discover which vertex paths are accessed, +/// then charges a transfer premium on every target that is not the natural origin for those +/// paths. The premium is the estimated transfer size multiplied by the target's cost +/// multiplier. +/// +/// Path premiums are charged once per block (intra-block dedup), not once per statement. +/// Composite paths are kept as-is rather than expanded to leaves, under the assumption that +/// a composite fetch is cheaper than fetching each leaf independently. +pub(crate) struct BasicBlockCostAnalysis<'ctx, A: Allocator> { + pub vertex: VertexType, + pub assignments: &'ctx BasicBlockSlice, + pub costs: &'ctx TargetArray>, +} + +impl BasicBlockCostAnalysis<'_, A> { + fn analyze_basic_block_target( + &self, + config: &TransferCostConfig, + id: BasicBlockId, + target: TargetId, + traversals: TraversalPathBitSet, + ) -> BasicBlockTargetCost { + let base = self.costs[target].sum_approx(id); + + let mut range = InformationRange::zero(); + + // For *any* target that is *not* able to be assigned in this block, add the cost to the + // total range. + for path in &traversals { + if !path.origin().contains(target) { + range += path.estimate_size(config); + } + } + + let load = range + .saturating_mul(config.target_multiplier[target].get()) + .midpoint() + .map_or(ApproxCost::INF, From::from); + + BasicBlockTargetCost { base, load } + } + + fn analyze_basic_block( + &self, + config: &TransferCostConfig, + id: BasicBlockId, + block: &BasicBlock<'_>, + ) -> BasicBlockCost { + let targets = self.assignments[id]; + let mut costs = TargetArray::from_raw([BasicBlockTargetCost::ZERO; _]); + + // We do not expand to the leaf nodes on purpose, we work under the assumption that any + // composite path that is given is more efficient than its individual components and will + // always be fetched together, therefore the cost of the parent must be used to accurately + // describe the cost. If a node can be used in multiple places at the same time, then fetch + // from the composite will always be preferred. + let mut traversals = TraversalPathBitSet::empty(self.vertex); + let mut visitor = TraversalAnalysisVisitor::new(self.vertex, |_, result| match result { + TraversalResult::Path(path) => traversals.insert(path), + TraversalResult::Complete => traversals.insert_all(), + }); + Ok(()) = visitor.visit_basic_block(id, block); + + for target in &targets { + costs[target] = self.analyze_basic_block_target(config, id, target, traversals); + } + + BasicBlockCost { targets, costs } + } + + /// Computes per-block costs for every block in `blocks`. + pub(crate) fn analyze_in( + &self, + config: &TransferCostConfig, + blocks: &BasicBlockSlice>, + alloc: A, + ) -> BasicBlockCostVec { + let inner = BasicBlockVec::from_domain_derive_in( + |id, block| self.analyze_basic_block(config, id, block), + blocks, + alloc, + ); + + BasicBlockCostVec { inner } + } +} + +#[cfg(test)] +mod tests { + #![expect(clippy::min_ident_chars)] + use alloc::alloc::Global; + + use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; + + use super::*; + use crate::{ + body::basic_block::BasicBlockId, + builder::body, + intern::Interner, + pass::{ + analysis::size_estimation::{InformationRange, InformationUnit}, + execution::traversal::TransferCostConfig, + }, + }; + + fn all_targets() -> TargetBitSet { + let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + for target in TargetId::all() { + set.insert(target); + } + set + } + + fn default_config() -> TransferCostConfig { + TransferCostConfig::new(InformationRange::full()) + } + + fn make_targets(body: &crate::body::Body<'_>, domain: TargetBitSet) -> Vec { + body.basic_blocks.iter().map(|_| domain).collect() + } + + /// A block with no vertex accesses has zero load cost on every target. + #[test] + fn no_vertex_access_zero_load() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int; + @proj env_0 = env.0: Int; + + bb0() { + val = load env_0; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + for target in TargetId::all() { + let total = result.cost(bb0, target); + let base = costs[target].sum_approx(bb0); + assert_eq!(total, base, "target {target:?} should have zero load cost"); + } + } + + /// Accessing Vectors (Embedding-origin) charges load on Interpreter and Postgres + /// but not on Embedding. + #[test] + fn vectors_path_charges_non_origin_targets() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + val = load vectors; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let config = default_config(); + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let embedding_cost = result.cost(bb0, TargetId::Embedding); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert_eq!( + embedding_cost, embedding_base, + "Embedding is origin for Vectors; no load premium" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for Vectors" + ); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert!( + postgres_cost > postgres_base, + "Postgres should pay load premium for Vectors" + ); + } + + /// Accessing Archived (Postgres-origin) charges load on non-Postgres targets. + #[test] + fn postgres_path_charges_non_postgres_targets() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!( + postgres_cost, postgres_base, + "Postgres is origin for Archived; no load premium" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for Archived" + ); + + let embedding_cost = result.cost(bb0, TargetId::Embedding); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert!( + embedding_cost > embedding_base, + "Embedding should pay load premium for Archived" + ); + } + + /// Properties (Postgres) + Vectors (Embedding) in one block: Interpreter pays both + /// premiums, Postgres pays only Vectors, Embedding pays only Properties. + #[test] + fn multiple_paths_accumulate_load() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + result = tuple properties, vectors; + return result; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + // Use a bounded properties size so both premiums are finite and comparable. + let config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(100))); + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let embedding_cost = result.cost(bb0, TargetId::Embedding); + + // Interpreter pays both premiums, so it's the most expensive + assert!( + interpreter_cost > postgres_cost, + "Interpreter pays both premiums, Postgres only Vectors" + ); + assert!( + interpreter_cost > embedding_cost, + "Interpreter pays both premiums, Embedding only Properties" + ); + + // Both Postgres and Embedding pay above their base + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + let embedding_base = costs[TargetId::Embedding].sum_approx(bb0); + assert!(postgres_cost > postgres_base); + assert!(embedding_cost > embedding_base); + } + + /// `RecordId` (composite) expands to leaf descendants. All leaves are Postgres-origin, + /// so Postgres pays no premium and Interpreter does. + #[test] + fn composite_path_expands_to_leaves() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?; + + bb0() { + val = load record_id; + return val; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + // Use zero properties size so Properties path doesn't contribute noise + let config = TransferCostConfig::new(InformationRange::zero()); + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!( + postgres_cost, postgres_base, + "Postgres is origin for all RecordId leaves" + ); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!( + interpreter_cost > interpreter_base, + "Interpreter should pay load premium for RecordId leaves" + ); + } + + /// With a restricted target domain, only available targets are analyzed. + #[test] + fn restricted_target_domain() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let mut restricted = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + restricted.insert(TargetId::Postgres); + restricted.insert(TargetId::Interpreter); + let targets = make_targets(&body, restricted); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([1].into_iter(), Global)); + + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&default_config(), &body.basic_blocks, Global); + let bb0 = BasicBlockId::new(0); + + let postgres_cost = result.cost(bb0, TargetId::Postgres); + let postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!(postgres_cost, postgres_base); + + let interpreter_cost = result.cost(bb0, TargetId::Interpreter); + let interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!(interpreter_cost > interpreter_base); + } + + /// Paths across multiple blocks are analyzed independently per block. + #[test] + fn paths_across_blocks_independent() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, val: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + props = load properties; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load vectors; + return val; + }, + bb2() { + return cond; + } + }); + + let targets = make_targets(&body, all_targets()); + let targets = BasicBlockSlice::from_raw(&targets); + + let costs: TargetArray> = + TargetArray::from_fn(|_| StatementCostVec::from_iter([2, 1, 0].into_iter(), Global)); + + let config = default_config(); + let analysis = BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments: targets, + costs: &costs, + }; + + let result = analysis.analyze_in(&config, &body.basic_blocks, Global); + + let bb0 = BasicBlockId::new(0); + let bb1 = BasicBlockId::new(1); + let bb2 = BasicBlockId::new(2); + + // bb0 accesses Properties (Postgres-origin): Postgres no premium, others pay + let bb0_postgres = result.cost(bb0, TargetId::Postgres); + let bb0_postgres_base = costs[TargetId::Postgres].sum_approx(bb0); + assert_eq!(bb0_postgres, bb0_postgres_base); + + let bb0_interpreter = result.cost(bb0, TargetId::Interpreter); + let bb0_interpreter_base = costs[TargetId::Interpreter].sum_approx(bb0); + assert!(bb0_interpreter > bb0_interpreter_base); + + // bb1 accesses Vectors (Embedding-origin): Embedding no premium, others pay + let bb1_embedding = result.cost(bb1, TargetId::Embedding); + let bb1_embedding_base = costs[TargetId::Embedding].sum_approx(bb1); + assert_eq!(bb1_embedding, bb1_embedding_base); + + let bb1_postgres = result.cost(bb1, TargetId::Postgres); + let bb1_postgres_base = costs[TargetId::Postgres].sum_approx(bb1); + assert!(bb1_postgres > bb1_postgres_base); + + // bb2 has no vertex accesses: all targets equal base + for target in TargetId::all() { + let cost = result.cost(bb2, target); + let base = costs[target].sum_approx(bb2); + assert_eq!(cost, base, "bb2 target {target:?} should have zero load"); + } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/cost.rs b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs similarity index 51% rename from libs/@local/hashql/mir/src/pass/execution/cost.rs rename to libs/@local/hashql/mir/src/pass/execution/cost/mod.rs index f587fe0b026..7c971af0fa7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/cost.rs +++ b/libs/@local/hashql/mir/src/pass/execution/cost/mod.rs @@ -1,31 +1,34 @@ //! Cost tracking for execution planning. //! -//! Provides data structures for recording the execution cost of statements on different targets. -//! The execution planner uses these costs to select optimal targets for each statement. +//! Two levels of cost representation: +//! +//! - **Per-statement**: [`StatementCostVec`] records the [`Cost`] of each statement on a given +//! target. Produced by the statement placement pass and consumed by [`BasicBlockCostAnalysis`]. +//! +//! - **Per-block**: [`BasicBlockCostVec`] aggregates statement costs and adds a path transfer +//! premium for non-origin backends. This is what the placement solver operates on. use alloc::alloc::Global; use core::{ alloc::Allocator, fmt, - iter::{self, Sum}, + iter::Sum, ops::{Add, AddAssign, Index, IndexMut, Mul, MulAssign}, }; use std::f32; -use hashql_core::id::{Id as _, bit_vec::DenseBitSet}; - +pub(crate) use self::analysis::{BasicBlockCostAnalysis, BasicBlockCostVec}; +use super::block_partitioned_vec::BlockPartitionedVec; use crate::{ - body::{ - Body, - basic_block::{BasicBlockId, BasicBlockSlice}, - basic_blocks::BasicBlocks, - local::{Local, LocalVec}, - location::Location, - }, + body::{basic_block::BasicBlockId, basic_blocks::BasicBlocks, location::Location}, macros::{forward_ref_binop, forward_ref_op_assign}, - pass::transform::Traversals, + pass::analysis::size_estimation::InformationUnit, }; +mod analysis; +#[cfg(test)] +mod tests; + /// Execution cost for a statement on a particular target. /// /// Lower values indicate cheaper execution. When multiple targets can execute a statement, the @@ -116,6 +119,14 @@ impl Cost { Self::new_saturating(raw.saturating_add(other.0.as_inner())) } + #[inline] + #[must_use] + pub const fn saturating_mul(self, other: u32) -> Self { + let raw = self.0.as_inner(); + + Self::new_saturating(raw.saturating_mul(other)) + } + #[expect(clippy::cast_precision_loss)] #[inline] #[must_use] @@ -231,6 +242,13 @@ impl From for ApproxCost { } } +impl From for ApproxCost { + fn from(value: InformationUnit) -> Self { + #[expect(clippy::cast_precision_loss)] + Self(value.as_u32() as f32) + } +} + impl fmt::Display for ApproxCost { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.0, fmt) @@ -308,157 +326,58 @@ impl Sum for ApproxCost { } } -/// Sparse cost map for traversal locals. -/// -/// Traversals are locals that require data fetching from a backend (e.g., entity field access). -/// This map only stores costs for locals marked as traversals; insertions for non-traversal -/// locals are ignored. This allows the execution planner to focus on the operations that actually -/// require backend coordination. -pub struct TraversalCostVec { - traversals: DenseBitSet, - costs: LocalVec, A>, -} - -impl TraversalCostVec { - /// Creates an empty traversal cost map for the given body. - /// - /// Only locals that are enabled traversals (per [`Traversals::enabled`]) will accept cost - /// insertions; other locals are silently ignored. - pub fn new_in<'heap>(body: &Body<'heap>, traversals: &Traversals<'heap>, alloc: A) -> Self { - Self { - traversals: traversals.enabled(body), - costs: LocalVec::new_in(alloc), - } - } - - /// Returns the cost assigned to `local`, or `None` if unassigned or not a traversal. - pub fn get(&self, local: Local) -> Option { - self.costs.lookup(local).copied() - } - - /// Records a cost for a traversal local. - /// - /// If `local` is not a traversal, the insertion is silently ignored. - pub fn insert(&mut self, local: Local, cost: Cost) { - if self.traversals.contains(local) { - self.costs.insert(local, cost); - } - } - - /// Iterates over all (local, cost) pairs that have assigned costs. - pub fn iter(&self) -> impl Iterator { - self.costs - .iter_enumerated() - .filter_map(|(local, cost)| cost.map(|cost| (local, cost))) - } -} - -impl IntoIterator for &TraversalCostVec { - type Item = (Local, Cost); - - type IntoIter = impl Iterator; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - /// Dense cost map for all statements in a body. /// /// Stores the execution cost for every statement, indexed by [`Location`]. A `None` cost /// indicates the target cannot execute that statement. The execution planner compares costs /// across targets to determine the optimal execution strategy. #[derive(Debug)] -pub(crate) struct StatementCostVec { - offsets: Box, A>, - costs: Vec, A>, -} - -impl StatementCostVec { - #[expect(unsafe_code)] - fn offsets( - mut iter: impl ExactSizeIterator, - alloc: A, - ) -> (Box, A>, usize) { - let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); - - let mut offset = 0_u32; - - offsets[0].write(0); - - let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { - let next = iter.next()?; - - offset += next; - - Some(offset) - })); - - debug_assert!(rest.is_empty()); - debug_assert_eq!(iter.len(), 0); - - // SAFETY: We have initialized all elements of the slice. - let offsets = unsafe { offsets.assume_init() }; - let offsets = BasicBlockSlice::from_boxed_slice(offsets); - - (offsets, offset as usize) - } +pub(crate) struct StatementCostVec(BlockPartitionedVec, A>); - fn from_iter(iter: impl ExactSizeIterator, alloc: A) -> Self - where - A: Clone, - { - let (offsets, length) = Self::offsets(iter, alloc.clone()); - let costs = alloc::vec::from_elem_in(None, length, alloc); - - Self { offsets, costs } +impl StatementCostVec { + #[cfg(test)] + pub(crate) fn from_iter(iter: impl ExactSizeIterator, alloc: A) -> Self { + Self(BlockPartitionedVec::new_in(iter, None, alloc)) } /// Creates a cost map with space for all statements in the given blocks. /// /// All costs are initialized to `None` (unsupported). Use indexing to assign costs. #[expect(clippy::cast_possible_truncation)] - pub(crate) fn new_in(blocks: &BasicBlocks, alloc: A) -> Self - where - A: Clone, - { - Self::from_iter( + pub(crate) fn new_in(blocks: &BasicBlocks, alloc: A) -> Self { + Self(BlockPartitionedVec::new_in( blocks.iter().map(|block| block.statements.len() as u32), + None, alloc, - ) + )) } +} +impl StatementCostVec { /// Rebuilds the offset table for a new block layout. /// /// Call after transforms that change statement counts per block. Does not resize or clear - /// the cost data — callers must ensure the total statement count remains unchanged. + /// the cost data; callers must ensure the total statement count remains unchanged. #[expect(clippy::cast_possible_truncation)] pub(crate) fn remap(&mut self, blocks: &BasicBlocks) where A: Clone, { - let alloc = Box::allocator(&self.offsets).clone(); - - let (offsets, _) = Self::offsets( - blocks.iter().map(|block| block.statements.len() as u32), - alloc, - ); - self.offsets = offsets; + self.0 + .remap(blocks.iter().map(|block| block.statements.len() as u32)); } /// Returns `true` if no statements have assigned costs. #[cfg(test)] pub(crate) fn all_unassigned(&self) -> bool { - self.costs.iter().all(Option::is_none) + self.0.iter().all(Option::is_none) } /// Returns the cost slice for all statements in `block`. /// /// The returned slice is indexed by statement position (0-based within the block). pub(crate) fn of(&self, block: BasicBlockId) -> &[Option] { - let range = (self.offsets[block] as usize)..(self.offsets[block.plus(1)] as usize); - - &self.costs[range] + self.0.of(block) } pub(crate) fn sum_approx(&self, block: BasicBlockId) -> ApproxCost { @@ -468,11 +387,8 @@ impl StatementCostVec { /// Returns the cost at `location`, or `None` if out of bounds or unassigned. #[cfg(test)] pub(crate) fn get(&self, location: Location) -> Option { - let range = (self.offsets[location.block] as usize) - ..(self.offsets[location.block.plus(1)] as usize); - - // statement_index is 1-based - self.costs[range] + self.0 + .of(location.block) .get(location.statement_index - 1) .copied() .flatten() @@ -483,181 +399,14 @@ impl Index for StatementCostVec { type Output = Option; fn index(&self, index: Location) -> &Self::Output { - let range = - (self.offsets[index.block] as usize)..(self.offsets[index.block.plus(1)] as usize); - // statement_index is 1-based - &self.costs[range][index.statement_index - 1] + &self.0.of(index.block)[index.statement_index - 1] } } impl IndexMut for StatementCostVec { fn index_mut(&mut self, index: Location) -> &mut Self::Output { - let range = - (self.offsets[index.block] as usize)..(self.offsets[index.block.plus(1)] as usize); - // statement_index is 1-based - &mut self.costs[range][index.statement_index - 1] - } -} - -#[cfg(test)] -mod tests { - use alloc::alloc::Global; - - use super::{Cost, StatementCostVec}; - use crate::body::{basic_block::BasicBlockId, location::Location}; - - /// `Cost::new` succeeds for valid values (0 and 100). - #[test] - fn cost_new_valid_values() { - let zero = Cost::new(0); - assert!(zero.is_some()); - - let hundred = Cost::new(100); - assert!(hundred.is_some()); - } - - /// `Cost::new(u32::MAX)` returns `None` (reserved as niche for `Option`). - #[test] - fn cost_new_max_returns_none() { - let max = Cost::new(u32::MAX); - assert!(max.is_none()); - } - - /// `Cost::new(u32::MAX - 1)` succeeds (largest valid cost value). - #[test] - fn cost_new_max_minus_one_valid() { - let max_valid = Cost::new(u32::MAX - 1); - assert!(max_valid.is_some()); - } - - /// `Cost::new_unchecked` with valid values works correctly. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - #[expect(unsafe_code)] - fn cost_new_unchecked_valid() { - // SAFETY: 0 is not u32::MAX - let zero = unsafe { Cost::new_unchecked(0) }; - assert_eq!(Cost::new(0), Some(zero)); - - // SAFETY: 100 is not u32::MAX - let hundred = unsafe { Cost::new_unchecked(100) }; - assert_eq!(Cost::new(100), Some(hundred)); - } - - /// `StatementCostVec` correctly indexes by `Location` across multiple blocks. - #[test] - fn statement_cost_vec_indexing() { - // bb0: 2 statements, bb1: 3 statements, bb2: 1 statement - let mut costs = StatementCostVec::from_iter([2, 3, 1].into_iter(), Global); - - // Assign costs at various locations - let loc_0_1 = Location { - block: BasicBlockId::new(0), - statement_index: 1, - }; - let loc_0_2 = Location { - block: BasicBlockId::new(0), - statement_index: 2, - }; - let loc_1_2 = Location { - block: BasicBlockId::new(1), - statement_index: 2, - }; - let loc_2_1 = Location { - block: BasicBlockId::new(2), - statement_index: 1, - }; - - costs[loc_0_1] = Some(cost!(10)); - costs[loc_0_2] = Some(cost!(20)); - costs[loc_1_2] = Some(cost!(30)); - costs[loc_2_1] = Some(cost!(40)); - - // Verify retrieval - assert_eq!(costs.get(loc_0_1), Some(cost!(10))); - assert_eq!(costs.get(loc_0_2), Some(cost!(20))); - assert_eq!(costs.get(loc_1_2), Some(cost!(30))); - assert_eq!(costs.get(loc_2_1), Some(cost!(40))); - - // Unassigned locations return None - let loc_1_1 = Location { - block: BasicBlockId::new(1), - statement_index: 1, - }; - assert_eq!(costs.get(loc_1_1), None); - } - - /// `StatementCostVec` initialization with a single block. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_single_block() { - // Single block with 5 statements - let mut costs = StatementCostVec::from_iter([5].into_iter(), Global); - - // All 5 statements should be accessible - for index in 1..=5_u32 { - let location = Location { - block: BasicBlockId::new(0), - statement_index: index as usize, - }; - - costs[location] = Some(Cost::new(index).expect("should be non-zero")); - } - - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(0), - statement_index: index as usize, - }; - - assert_eq!(costs.get(location), Cost::new(index)); - } - } - - /// `StatementCostVec` initialization with multiple blocks of varying sizes. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_multiple_blocks() { - // 0 statements, 1 statement, 5 statements - let mut costs = StatementCostVec::from_iter([0, 1, 5].into_iter(), Global); - - // bb1 has 1 statement - let loc_1_1 = Location { - block: BasicBlockId::new(1), - statement_index: 1, - }; - costs[loc_1_1] = Some(cost!(100)); - assert_eq!(costs.get(loc_1_1), Some(cost!(100))); - - // bb2 has 5 statements - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(2), - statement_index: index as usize, - }; - - costs[location] = Some(Cost::new(index).expect("non-zero")); - } - for index in 1..=5 { - let location = Location { - block: BasicBlockId::new(2), - statement_index: index as usize, - }; - assert_eq!(costs.get(location), Cost::new(index)); - } - } - - /// `StatementCostVec` initialization with zero blocks. - /// - /// This test exercises unsafe code and should be run under Miri. - #[test] - fn statement_cost_vec_init_empty() { - // Should not panic - let _costs = StatementCostVec::from_iter(core::iter::empty::(), Global); + &mut self.0.of_mut(index.block)[index.statement_index - 1] } } diff --git a/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs b/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs new file mode 100644 index 00000000000..6f9588f2f00 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/cost/tests.rs @@ -0,0 +1,78 @@ +use alloc::alloc::Global; + +use super::{Cost, StatementCostVec}; +use crate::body::{basic_block::BasicBlockId, location::Location}; + +/// `Cost::new` succeeds for valid values (0 and 100). +#[test] +fn cost_new_valid_values() { + let zero = Cost::new(0); + assert!(zero.is_some()); + + let hundred = Cost::new(100); + assert!(hundred.is_some()); +} + +/// `Cost::new(u32::MAX)` returns `None` (reserved as niche for `Option`). +#[test] +fn cost_new_max_returns_none() { + let max = Cost::new(u32::MAX); + assert!(max.is_none()); +} + +/// `Cost::new(u32::MAX - 1)` succeeds (largest valid cost value). +#[test] +fn cost_new_max_minus_one_valid() { + let max_valid = Cost::new(u32::MAX - 1); + assert!(max_valid.is_some()); +} + +/// `Cost::new_unchecked` with valid values works correctly. +/// +/// This test exercises unsafe code and should be run under Miri. +#[test] +#[expect(unsafe_code)] +fn cost_new_unchecked_valid() { + // SAFETY: 0 is not u32::MAX + let zero = unsafe { Cost::new_unchecked(0) }; + assert_eq!(Cost::new(0), Some(zero)); + + // SAFETY: 100 is not u32::MAX + let hundred = unsafe { Cost::new_unchecked(100) }; + assert_eq!(Cost::new(100), Some(hundred)); +} + +/// `StatementCostVec` uses 1-based `Location` indexing to address the underlying +/// 0-based `BlockPartitionedVec`. +#[test] +fn statement_cost_vec_location_indexing() { + let mut costs = StatementCostVec::from_iter([2, 3].into_iter(), Global); + + let loc_0_1 = Location { + block: BasicBlockId::new(0), + statement_index: 1, + }; + let loc_0_2 = Location { + block: BasicBlockId::new(0), + statement_index: 2, + }; + let loc_1_2 = Location { + block: BasicBlockId::new(1), + statement_index: 2, + }; + + costs[loc_0_1] = Some(cost!(10)); + costs[loc_0_2] = Some(cost!(20)); + costs[loc_1_2] = Some(cost!(30)); + + assert_eq!(costs.get(loc_0_1), Some(cost!(10))); + assert_eq!(costs.get(loc_0_2), Some(cost!(20))); + assert_eq!(costs.get(loc_1_2), Some(cost!(30))); + + // Unassigned location returns None + let loc_1_1 = Location { + block: BasicBlockId::new(1), + statement_index: 1, + }; + assert_eq!(costs.get(loc_1_1), None); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs index d10c00d0969..563a57c0fb1 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/mod.rs @@ -21,7 +21,7 @@ use alloc::alloc::Global; use core::{alloc::Allocator, convert::Infallible, mem}; -use hashql_core::{graph::Predecessors as _, heap::Heap, id::Id as _}; +use hashql_core::{graph::Predecessors as _, id::Id as _}; use super::target::TargetId; use crate::{ @@ -226,47 +226,40 @@ fn fuse_blocks( /// /// [`BasicBlock`]: crate::body::basic_block::BasicBlock /// [`BasicBlockSplitting`]: super::splitting::BasicBlockSplitting -pub(crate) struct BasicBlockFusion { - alloc: A, +pub(crate) struct BasicBlockFusion { + scratch: S, } impl BasicBlockFusion { /// Creates a new pass using the global allocator. #[must_use] + #[cfg(test)] pub(crate) const fn new() -> Self { Self::new_in(Global) } } -impl BasicBlockFusion { +impl BasicBlockFusion { /// Creates a new pass using the provided allocator. - pub(crate) const fn new_in(alloc: A) -> Self { - Self { alloc } + pub(crate) const fn new_in(scratch: S) -> Self { + Self { scratch } } /// Fuses blocks in `body` that share the same target assignment. /// /// Modifies both `body` and `targets` in place. The `targets` vec is compacted to match /// the new block layout. - pub(crate) fn fuse<'heap>( + pub(crate) fn fuse( &self, - body: &mut Body<'heap>, - targets: &mut BasicBlockVec, - ) where - A: Clone, - { + body: &mut Body<'_>, + targets: &mut BasicBlockVec, + ) { debug_assert_eq!( body.basic_blocks.len(), targets.len(), "target vec length must match basic block count" ); - fuse_blocks(self.alloc.clone(), body, targets); - } -} - -impl Default for BasicBlockFusion { - fn default() -> Self { - Self::new() + fuse_blocks(&self.scratch, body, targets); } } diff --git a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs index b3b5919de9f..1fad31b2a09 100644 --- a/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs @@ -1,6 +1,7 @@ //! Tests for basic block fusion. #![expect(clippy::min_ident_chars)] +use alloc::alloc::Global; use core::assert_matches; use std::{io::Write as _, path::PathBuf}; @@ -26,11 +27,8 @@ use crate::{ pretty::TextFormatOptions, }; -fn make_targets<'heap>( - heap: &'heap Heap, - assignments: &[TargetId], -) -> BasicBlockVec { - let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), heap); +fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); for &target in assignments { targets.push(target); } @@ -42,7 +40,7 @@ fn assert_fusion<'heap>( name: &'static str, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - targets: &BasicBlockVec, + targets: &BasicBlockVec, ) { let formatter = Formatter::new(context.heap); let type_formatter = TypeFormatter::new(&formatter, context.env, TypeFormatterOptions::terse()); @@ -94,7 +92,7 @@ fn fusable_into_same_target_goto() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, Some(BasicBlockId::new(0))); @@ -119,7 +117,7 @@ fn fusable_into_different_targets() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, None); @@ -151,15 +149,12 @@ fn fusable_into_multiple_predecessors() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); // bb3 has two predecessors — not fusable let result = fusable_into(&body, &targets, BasicBlockId::new(3)); @@ -184,7 +179,7 @@ fn fusable_into_goto_with_args() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); // The Goto carries an argument — not fusable even though targets match. let result = fusable_into(&body, &targets, BasicBlockId::new(1)); @@ -212,7 +207,7 @@ fn fusable_into_target_has_params() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); let result = fusable_into(&body, &targets, BasicBlockId::new(1)); assert_eq!(result, None); @@ -241,7 +236,7 @@ fn fuse_no_changes_needed() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -276,7 +271,7 @@ fn fuse_two_same_target_blocks() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -320,10 +315,7 @@ fn fuse_chain_of_three() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets( - &heap, - &[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres], - ); + let mut targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -365,14 +357,11 @@ fn fuse_preserves_different_targets() { }; // bb0 and bb1 are Interpreter, bb2 is Postgres — bb2 cannot fuse into bb1 - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -423,15 +412,12 @@ fn fuse_partial_chain() { }; // bb0-bb1 are Interpreter, bb2-bb3 are Postgres - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Postgres, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Postgres, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -479,15 +465,12 @@ fn fuse_updates_branch_references() { }; // bb0 and bb1 same target — fusable. bb2 and bb3 are leaves. - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -535,15 +518,12 @@ fn fuse_does_not_fuse_join_points() { }; // All same target, but bb3 has 2 predecessors — not fusable - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -577,7 +557,7 @@ fn fuse_goto_with_args_not_fused() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Interpreter]); + let mut targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -637,17 +617,14 @@ fn fuse_diamond_non_monotonic_rpo() { // bb2 and bb3 same target (bb3 fuses into bb2), bb1 and bb4 same target (bb4 fuses // into bb1). bb5 has two predecessors — not fusable. - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); @@ -704,15 +681,12 @@ fn fuse_backward_chain() { diagnostics: DiagnosticIssues::new(), }; - let mut targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); + let mut targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); BasicBlockFusion::new().fuse(&mut body, &mut targets); diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index 52d95518f2f..be767150b88 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -15,10 +15,18 @@ use hashql_core::{ id::{self, bit_vec::DenseBitSet}, }; -use super::target::TargetId; -use crate::body::{ - Body, - basic_block::{BasicBlockId, BasicBlockSlice, BasicBlockUnionFind, BasicBlockVec}, +use super::{ + VertexType, + target::TargetId, + traversal::{TraversalAnalysisVisitor, TraversalLattice, TraversalPathBitSet, TraversalResult}, +}; +use crate::{ + body::{ + Body, + basic_block::{BasicBlockId, BasicBlockSlice, BasicBlockUnionFind, BasicBlockVec}, + }, + pass::analysis::dataflow::lattice::HasBottom as _, + visit::Visitor as _, }; #[cfg(test)] @@ -43,6 +51,7 @@ id::newtype_collections!(pub type Island* from IslandId); pub struct Island { target: TargetId, members: DenseBitSet, + traversals: TraversalPathBitSet, } impl Island { @@ -74,6 +83,11 @@ impl Island { self.members.is_empty() } + #[must_use] + pub const fn traversals(&self) -> TraversalPathBitSet { + self.traversals + } + /// Iterates over the [`BasicBlockId`]s in this island in ascending order. #[inline] pub fn iter(&self) -> impl Iterator + '_ { @@ -117,12 +131,16 @@ impl IslandPlacement { pub(crate) fn run( &self, body: &Body<'_>, + vertex: VertexType, + targets: &BasicBlockSlice, + alloc: A, ) -> IslandVec where A: Allocator, { + let lattice = TraversalLattice::new(vertex); let mut union = BasicBlockUnionFind::new_in(body.basic_blocks.len(), self.scratch.clone()); for bb in body.basic_blocks.ids() { @@ -144,12 +162,24 @@ impl IslandPlacement { islands.push(Island { target: targets[root], members: DenseBitSet::new_empty(body.basic_blocks.len()), + traversals: lattice.bottom(), }) }); islands[index].members.insert(bb); } + for island in &mut islands { + let mut visitor = TraversalAnalysisVisitor::new(vertex, |_, result| match result { + TraversalResult::Path(path) => island.traversals.insert(path), + TraversalResult::Complete => island.traversals.insert_all(), + }); + + for id in &island.members { + Ok(()) = visitor.visit_basic_block(id, &body.basic_blocks[id]); + } + } + islands } } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index d5846243f0c..d24ef1b7f8e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -3,23 +3,22 @@ use alloc::alloc::Global; -use hashql_core::{heap::Heap, r#type::environment::Environment}; +use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; use crate::{ body::basic_block::{BasicBlockId, BasicBlockVec}, builder::body, intern::Interner, pass::execution::{ + VertexType, island::{IslandId, IslandPlacement}, target::TargetId, + traversal::EntityPath, }, }; -fn make_targets<'heap>( - heap: &'heap Heap, - assignments: &[TargetId], -) -> BasicBlockVec { - let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), heap); +fn make_targets(assignments: &[TargetId]) -> BasicBlockVec { + let mut targets = BasicBlockVec::with_capacity_in(assignments.len(), Global); for &target in assignments { targets.push(target); } @@ -42,8 +41,8 @@ fn single_block() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -71,8 +70,8 @@ fn same_target_chain() { } }); - let targets = make_targets(&heap, &[TargetId::Postgres, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Postgres); @@ -101,8 +100,8 @@ fn different_targets() { } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 2); @@ -148,16 +147,13 @@ fn diamond_same_target() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - TargetId::Interpreter, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + TargetId::Interpreter, + ]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -196,16 +192,13 @@ fn diamond_mixed_targets() { // bb1 nor bb2 has the same target as bb3, so bb0 and bb3 are only connected // transitively through different-target blocks. No direct same-target edge between // bb0 and bb3, so they must be separate islands. - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Embedding, - TargetId::Interpreter, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Embedding, + TargetId::Interpreter, + ]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); // bb0 alone, bb1 alone, bb2 alone, bb3 alone — 4 islands, since no same-target // edges exist between any pair of connected blocks. @@ -221,6 +214,9 @@ fn diamond_mixed_targets() { } /// Linear chain with alternating targets — each block is its own island. +/// +/// Also verifies that same-target blocks separated by a different-target block (bb0 and bb2 +/// are both Interpreter but bb1 is Postgres between them) end up in separate islands. #[test] fn alternating_targets() { let heap = Heap::new(); @@ -248,79 +244,105 @@ fn alternating_targets() { } }); - let targets = make_targets( - &heap, - &[ - TargetId::Interpreter, - TargetId::Postgres, - TargetId::Interpreter, - TargetId::Postgres, - ], - ); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[ + TargetId::Interpreter, + TargetId::Postgres, + TargetId::Interpreter, + TargetId::Postgres, + ]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 4); for island_id in islands.ids() { let island = &islands[island_id]; + assert_eq!(island.count(), 1); + let block = island.iter().next().expect("island is non-empty"); assert_eq!(island.target(), targets[block]); + assert!(island.contains(block)); } + + // bb0 and bb2 share a target (Interpreter) but must be in different islands + // because no direct same-target edge connects them. + let bb0_island = islands + .ids() + .find(|&id| islands[id].contains(BasicBlockId::new(0))) + .expect("bb0 is present"); + let bb2_island = islands + .ids() + .find(|&id| islands[id].contains(BasicBlockId::new(2))) + .expect("bb2 is present"); + assert_ne!(bb0_island, bb2_island); } -/// `Island::is_empty` is false for any island produced by the pass. +/// Three same-target blocks in a chain — union-find transitively merges into one island. #[test] -fn island_is_never_empty() { +fn transitive_same_target_chain() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + decl a: Int, b: Int, c: Int; bb0() { - x = load 1; - return x; + a = load 1; + goto bb1(); + }, + bb1() { + b = load 2; + goto bb2(); + }, + bb2() { + c = load 3; + return c; } }); - let targets = make_targets(&heap, &[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, &targets, Global); + let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - for island_id in islands.ids() { - assert!(!islands[island_id].is_empty()); - } + assert_eq!(islands.len(), 1); + assert_eq!(islands[IslandId::new(0)].count(), 3); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(0))); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(1))); + assert!(islands[IslandId::new(0)].contains(BasicBlockId::new(2))); } -/// `Island::iter` yields exactly the blocks reported by `contains`. +/// Island traversals are the join of per-block paths for all blocks in the island. +/// +/// Two same-target blocks access different vertex paths (.properties and +/// .metadata.provenance.edition). The island's traversals must contain both. #[test] -fn iter_matches_contains() { +fn island_joins_traversal_paths() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, y: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val1: ?, val2: ?; + @proj props = vertex.properties: ?, + metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; bb0() { - x = load 1; + val1 = load props; goto bb1(); }, bb1() { - y = load 2; - return y; + val2 = load edition; + return val2; } }); - let targets = make_targets(&heap, &[TargetId::Interpreter, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, &targets, Global); - - for island_id in islands.ids() { - let island = &islands[island_id]; - let members: Vec<_> = island.iter().collect(); - assert_eq!(members.len(), island.count()); + let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); + let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); - for &block in &members { - assert!(island.contains(block)); - } - } + assert_eq!(islands.len(), 1); + let island = &islands[IslandId::new(0)]; + let traversal_paths = island.traversals(); + let joined = traversal_paths.as_entity().expect("entity vertex"); + assert!(joined.contains(EntityPath::Properties)); + assert!(joined.contains(EntityPath::ProvenanceEdition)); } diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 7d0472a7dbd..f1920d860c8 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -7,29 +7,24 @@ macro_rules! cost { #[cfg(test)] mod tests; +mod block_partitioned_vec; mod cost; mod fusion; mod island; mod placement; mod splitting; mod statement_placement; -pub mod storage; mod target; mod terminator_placement; +pub mod traversal; mod vertex; use core::{alloc::Allocator, assert_matches}; use hashql_core::heap::{BumpAllocator, Heap}; -pub use self::{ - cost::{ApproxCost, Cost}, - island::{Island, IslandId, IslandVec}, - placement::error::PlacementDiagnosticCategory, - target::TargetId, - vertex::VertexType, -}; use self::{ + cost::BasicBlockCostAnalysis, fusion::BasicBlockFusion, island::IslandPlacement, placement::{ArcConsistency, PlacementSolverContext}, @@ -37,17 +32,24 @@ use self::{ statement_placement::{StatementPlacement as _, TargetPlacementStatement}, target::TargetArray, terminator_placement::TerminatorPlacement, + traversal::TransferCostConfig, }; -use super::{analysis::size_estimation::BodyFootprint, transform::Traversals}; +pub use self::{ + cost::{ApproxCost, Cost}, + island::{Island, IslandId, IslandVec}, + placement::error::PlacementDiagnosticCategory, + target::TargetId, + vertex::VertexType, +}; +use super::analysis::size_estimation::BodyFootprint; use crate::{ - body::{Body, Source, basic_block::BasicBlockVec}, + body::{Body, Source, basic_block::BasicBlockVec, local::Local}, context::MirContext, def::DefIdSlice, pass::analysis::size_estimation::InformationRange, }; pub struct ExecutionAnalysis<'ctx, 'heap, S: Allocator> { - pub traversals: &'ctx DefIdSlice>>, pub footprints: &'ctx DefIdSlice>, pub scratch: S, } @@ -63,55 +65,62 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { ) { assert_matches!(body.source, Source::GraphReadFilter(_)); - let traversals = self - .traversals - .lookup(body.id) - .unwrap_or_else(|| unreachable!()); + let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) + else { + unreachable!("unsupported graph read target") + }; - let mut traversal_costs: TargetArray<_> = TargetArray::from_fn(|_| None); let mut statement_costs: TargetArray<_> = TargetArray::from_fn(|_| None); - let mut targets = TargetId::all(); - targets.reverse(); // We reverse the order, so that earlier targets (aka the interpreter) can have access to traversal costs + for target in TargetId::all() { + let mut statement = TargetPlacementStatement::new_in(target, &self.scratch); + let statement_cost = + statement.statement_placement_in(context, body, vertex, &self.scratch); - for target in targets { - let mut statement = - TargetPlacementStatement::new_in(target, &traversal_costs, &self.scratch); - let (traversal_cost, statement_cost) = - statement.statement_placement_in(context, body, traversals, &self.scratch); - - traversal_costs[target] = Some(traversal_cost); statement_costs[target] = Some(statement_cost); } let mut statement_costs = statement_costs.map(|cost| cost.unwrap_or_else(|| unreachable!())); - let mut possibilities = BasicBlockSplitting::new_in(&self.scratch).split_in( + let mut assignments = BasicBlockSplitting::new_in(&self.scratch).split_in( context, body, &mut statement_costs, &self.scratch, ); - let terminators = TerminatorPlacement::new_in(InformationRange::full(), &self.scratch); + let terminators = TerminatorPlacement::new_in( + TransferCostConfig::new(InformationRange::full()), + &self.scratch, + ); let mut terminator_costs = terminators.terminator_placement_in( body, + vertex, &self.footprints[body.id], - traversals, - &possibilities, + &assignments, &self.scratch, ); ArcConsistency { - blocks: &mut possibilities, + blocks: &mut assignments, terminators: &mut terminator_costs, } .run_in(body, &self.scratch); + let block_costs = BasicBlockCostAnalysis { + vertex, + assignments: &assignments, + costs: &statement_costs, + } + .analyze_in( + &TransferCostConfig::new(InformationRange::full()), + &body.basic_blocks, + &self.scratch, + ); + let mut solver = PlacementSolverContext { - assignment: &possibilities, - statements: &statement_costs, + blocks: &block_costs, terminators: &terminator_costs, } .build_in(body, &self.scratch); @@ -121,7 +130,8 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { let fusion = BasicBlockFusion::new_in(&self.scratch); fusion.fuse(body, &mut assignment); - let islands = IslandPlacement::new_in(&self.scratch).run(body, &assignment, context.heap); + let islands = + IslandPlacement::new_in(&self.scratch).run(body, vertex, &assignment, context.heap); (assignment, islands) } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs index 6e84a40bfbd..ec9ea086bcf 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs @@ -156,7 +156,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> id: member, heap: TargetHeap::new(), target: HeapElement::EMPTY, - possible: self.solver.data.assignment[member], + possible: self.solver.data.blocks.assignments(member), } } } @@ -283,7 +283,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> fn replay_narrowing(&mut self, body: &Body<'_>) { // Reset unfixed domains to their original AC-3 state for block in &mut self.region.blocks[self.depth..] { - block.possible = self.solver.data.assignment[block.id]; + block.possible = self.solver.data.blocks.assignments(block.id); } self.region.fixed.clear(); @@ -395,17 +395,17 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> /// Computes a lower bound on the cost of completing the current partial assignment. /// - /// Sums `min(statement_cost)` and `min(transition_cost)` independently over unfixed blocks. + /// Sums `min(block_cost)` and `min(transition_cost)` independently over unfixed blocks. /// Used for `BnB` pruning: a branch is skipped when `cost_so_far + lower_bound ≥ /// worst_retained`. /// /// This is *not* redundant with [`CostEstimation`] despite operating on the same data. /// [`CostEstimation::estimate`] computes a per-block heuristic that jointly optimizes - /// `statement + transition` costs and double-counts edges (both predecessor and successor + /// `block + transition` costs and double-counts edges (both predecessor and successor /// sides) for join-point influence. This method instead: /// - /// - **Independently minimizes** statement and transition costs (`min(stmt) + min(trans) ≤ - /// min(stmt + trans)`), producing a weaker but valid lower bound. + /// - **Independently minimizes** block and transition costs (`min(block) + min(trans) ≤ + /// min(block + trans)`), producing a weaker but valid lower bound. /// - **Single-counts edges** — only outgoing edges from each unfixed block — to avoid inflating /// the bound when both endpoints are unfixed. /// - **Omits boundary dampening** — the bound should be optimistic, not weighted. @@ -413,19 +413,16 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> let unfixed = &self.region.blocks[self.depth..]; let mut bound = ApproxCost::ZERO; - // Per-unassigned-block: minimum statement cost over remaining domain + // Per-unassigned-block: minimum block cost over remaining domain for block in unfixed { - let mut min_stmt = ApproxCost::INF; + let mut min_block = ApproxCost::INF; for target in &block.possible { - min_stmt = cmp::min( - min_stmt, - self.solver.data.statements[target].sum_approx(block.id), - ); + min_block = cmp::min(min_block, self.solver.data.blocks.cost(block.id, target)); } - if min_stmt < ApproxCost::INF { - bound += min_stmt; + if min_block < ApproxCost::INF { + bound += min_block; } } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs index f77da3e3e38..57b38f9ae90 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/tests.rs @@ -2,11 +2,11 @@ use core::mem; -use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; +use hashql_core::{heap::Heap, id::IdArray, symbol::sym, r#type::environment::Environment}; use super::{super::PlacementSolver, CyclicPlacementRegion}; use crate::{ - body::{basic_block::BasicBlockSlice, location::Location}, + body::location::Location, builder::body, intern::Interner, pass::execution::{ @@ -16,7 +16,9 @@ use crate::{ PlacementRegionId, PlacementSolverContext, condensation::PlacementRegionKind, csp::ConstraintSatisfaction, - tests::{all_targets, bb, fix_block, stmt_costs, target_set, terminators}, + tests::{ + all_targets, bb, fix_block, make_block_costs, stmt_costs, target_set, terminators, + }, }, target::{TargetArray, TargetId}, terminator_placement::{TerminatorCostVec, TransMatrix}, @@ -57,8 +59,8 @@ fn narrow_restricts_successor_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -78,10 +80,9 @@ fn narrow_restricts_successor_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -108,8 +109,8 @@ fn narrow_restricts_predecessor_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -129,10 +130,9 @@ fn narrow_restricts_predecessor_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -161,8 +161,8 @@ fn narrow_to_empty_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -180,10 +180,9 @@ fn narrow_to_empty_domain() { bb(1): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -210,8 +209,8 @@ fn narrow_multiple_edges_intersect() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb2, bb2→bb0, bb2→bb3 - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -234,10 +233,9 @@ fn narrow_multiple_edges_intersect() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -273,8 +271,8 @@ fn replay_narrowing_resets_then_repropagates() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -294,10 +292,9 @@ fn replay_narrowing_resets_then_repropagates() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -330,18 +327,18 @@ fn replay_narrowing_resets_then_repropagates() { // --- Group 4: Lower Bound --- -/// Lower bound sums the minimum statement cost across each unfixed block's domain. +/// Lower bound sums the minimum block cost across each unfixed block's domain. /// /// With zero transition costs, the bound reduces to the sum of per-block minimum -/// statement costs: min(10, 20) + min(5, 15) = 15. +/// block costs: min(10, 20) + min(5, 15) = 15. #[test] -fn lower_bound_min_statement_cost_per_block() { +fn lower_bound_min_block_cost_per_block() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -373,10 +370,9 @@ fn lower_bound_min_statement_cost_per_block() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -394,7 +390,7 @@ fn lower_bound_min_statement_cost_per_block() { /// Lower bound includes the minimum valid transition cost for each inter-block edge. /// -/// With zero statement costs, the bound is determined by the cheapest compatible +/// With zero block costs, the bound is determined by the cheapest compatible /// transition across each edge between unfixed blocks. #[test] fn lower_bound_min_transition_cost_per_edge() { @@ -402,8 +398,8 @@ fn lower_bound_min_transition_cost_per_edge() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -429,10 +425,9 @@ fn lower_bound_min_transition_cost_per_edge() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -461,8 +456,8 @@ fn lower_bound_skips_self_loop_edges() { let env = Environment::new(&heap); // bb0→bb0 (self-loop), bb0→bb1, bb1→bb0, bb1→bb2 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb0() else bb1(); }, bb1() { cond = load true; if cond then bb0() else bb2(); }, bb2() { x = load 0; return x; } @@ -484,10 +479,9 @@ fn lower_bound_skips_self_loop_edges() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -511,8 +505,8 @@ fn lower_bound_fixed_successor_uses_concrete_target() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -538,10 +532,9 @@ fn lower_bound_fixed_successor_uses_concrete_target() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -571,8 +564,8 @@ fn lower_bound_all_fixed_returns_zero() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -595,10 +588,9 @@ fn lower_bound_all_fixed_returns_zero() { bb(1): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -625,8 +617,8 @@ fn mrv_selects_smallest_domain() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -651,10 +643,9 @@ fn mrv_selects_smallest_domain() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -679,8 +670,8 @@ fn mrv_tiebreak_by_constraint_degree() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb0, bb2→bb0, bb0→bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; switch cond [0 => bb1(), 1 => bb2(), _ => bb3()]; }, bb1() { x = load 0; goto bb0(); }, bb2() { x = load 0; goto bb0(); }, @@ -702,10 +693,9 @@ fn mrv_tiebreak_by_constraint_degree() { bb(2): [complete(1)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -730,8 +720,8 @@ fn mrv_skips_fixed_blocks() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -756,10 +746,9 @@ fn mrv_skips_fixed_blocks() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -779,7 +768,7 @@ fn mrv_skips_fixed_blocks() { /// Greedy solver assigns both blocks in a 2-block SCC to the cheapest same-target. /// -/// Both blocks prefer P (statement cost 3 vs 8). Same-target transitions cost 0, +/// Both blocks prefer P (block cost 3 vs 8). Same-target transitions cost 0, /// so greedy converges on all-P without rollback. #[test] fn greedy_solves_two_block_loop() { @@ -787,8 +776,8 @@ fn greedy_solves_two_block_loop() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { cond = load true; if cond then bb0() else bb2(); }, bb2() { return x; } @@ -814,10 +803,9 @@ fn greedy_solves_two_block_loop() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -842,8 +830,8 @@ fn greedy_rollback_finds_alternative() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { cond = load true; if cond then bb0() else bb3(); }, @@ -869,10 +857,9 @@ fn greedy_rollback_finds_alternative() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -907,8 +894,8 @@ fn greedy_fails_when_infeasible() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -927,10 +914,9 @@ fn greedy_fails_when_infeasible() { bb(1): [P->P = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -954,8 +940,8 @@ fn bnb_finds_optimal() { let env = Environment::new(&heap); // bb0→bb1, bb0→bb2, bb1→bb0, bb2→bb0, bb0→bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; switch cond [0 => bb1(), 1 => bb2(), _ => bb3()]; }, bb1() { x = load 0; goto bb0(); }, bb2() { x = load 0; goto bb0(); }, @@ -985,10 +971,9 @@ fn bnb_finds_optimal() { bb(2): [diagonal(0), I->P = 20, P->I = 20] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1015,8 +1000,8 @@ fn bnb_retains_ranked_solutions() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1041,10 +1026,9 @@ fn bnb_retains_ranked_solutions() { bb(1): [diagonal(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1085,8 +1069,8 @@ fn bnb_pruning_preserves_optimal() { let env = Environment::new(&heap); // 4-block SCC: bb0→bb1→bb2→bb3→bb0, plus bb4 exit - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; goto bb1(); }, bb1() { x = load 0; goto bb2(); }, bb2() { x = load 0; goto bb3(); }, @@ -1118,10 +1102,9 @@ fn bnb_pruning_preserves_optimal() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1148,8 +1131,8 @@ fn retry_returns_ranked_solutions_in_order() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1175,10 +1158,9 @@ fn retry_returns_ranked_solutions_in_order() { bb(1): [diagonal(0), I->P = 5, P->I = 5] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1220,8 +1202,8 @@ fn retry_exhausts_then_perturbs() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1246,10 +1228,9 @@ fn retry_exhausts_then_perturbs() { bb(1): [diagonal(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1285,8 +1266,8 @@ fn greedy_rollback_on_empty_heap() { // 2-block SCC: bb0↔bb1, bb2 exit // bb0: `if cond then bb1 else bb2` → [bb2(arm0), bb1(arm1)] - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1314,10 +1295,9 @@ fn greedy_rollback_on_empty_heap() { bb(1): [I->P = 0, I->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1358,8 +1338,8 @@ fn retry_perturbation_after_ranked_exhaustion() { let env = Environment::new(&heap); // 2-block SCC: bb0↔bb1, bb2 exit - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; if cond then bb1() else bb2(); }, bb1() { x = load 0; goto bb0(); }, bb2() { return x; } @@ -1385,10 +1365,9 @@ fn retry_perturbation_after_ranked_exhaustion() { bb(1): [complete(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs index 5c69ddd2268..5805ddf67ae 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs @@ -1,8 +1,8 @@ //! Cost estimation for placement target selection. //! //! The estimator computes an approximate cost for assigning a basic block to a given execution -//! target. Cost includes statement execution cost plus transition costs to and from predecessor and -//! successor blocks. +//! target. Cost includes the block's own cost (statement base + path transfer premium) plus +//! transition costs to and from predecessor and successor blocks. //! //! Cross-region transitions are weighted by a configurable [`CostEstimationConfig`] to //! de-emphasize boundary costs relative to intra-region costs. Self-loop edges are skipped because @@ -12,10 +12,10 @@ //! optimal option. Transition costs are counted from both predecessor and successor sides — //! intentional double-counting that gives each edge proportional influence at join points. //! -//! The double-counting inflates transition costs relative to statement costs. This is acceptable -//! (and possibly desirable) as long as transitions dominate. If statement costs ever become +//! The double-counting inflates transition costs relative to block costs. This is acceptable +//! (and possibly desirable) as long as transitions dominate. If block costs ever become //! comparable and the greedy value ordering consistently disagrees with BnB-optimal solutions, -//! consider halving the transition weight here rather than single-counting — single-counting +//! consider halving the transition weight here rather than single-counting; single-counting //! would make source-side blocks blind to downstream target demand. use core::{alloc::Allocator, cmp}; @@ -186,17 +186,16 @@ where ) -> Option { match (source, target) { (Some(source), None) => { - // Minimize over the target block's domain, weighted by statement + transition cost + // Minimize over the target block's domain, weighted by block + transition cost let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; - for target in &self.solver.data.assignment[edge.target.block] { + for target in &self.solver.data.blocks.assignments(edge.target.block) { let Some(cost) = edge.matrix.get(source, target) else { continue; }; - let mut block_cost = - self.solver.data.statements[target].sum_approx(edge.target.block); + let mut block_cost = self.solver.data.blocks.cost(edge.target.block, target); block_cost += cost; if block_cost < current_minimum { @@ -208,17 +207,16 @@ where minimum_transition_cost } (None, Some(target)) => { - // Minimize over the source block's domain, weighted by statement + transition cost + // Minimize over the source block's domain, weighted by block + transition cost let mut current_minimum = ApproxCost::INF; let mut minimum_transition_cost = None; - for source in &self.solver.data.assignment[edge.source.block] { + for source in &self.solver.data.blocks.assignments(edge.source.block) { let Some(cost) = edge.matrix.get(source, target) else { continue; }; - let mut block_cost = - self.solver.data.statements[source].sum_approx(edge.source.block); + let mut block_cost = self.solver.data.blocks.cost(edge.source.block, source); block_cost += cost; if block_cost < current_minimum { @@ -247,12 +245,13 @@ where block: BasicBlockId, target: TargetId, ) -> Option { - // Start with the block's own statement cost, then add transition costs from each - // predecessor and to each successor. Transitions are counted on both sides (double-counted) - // so that join edges get proportional influence without frequency data. - // If a neighbor has no assignment yet, we optimistically assume its best local option. - // Returns `None` if any assigned neighbor lacks a valid transition to this target. - let mut cost = self.solver.data.statements[target].sum_approx(block); + // Start with the block's own cost (statement base + path transfer premium), then add + // transition costs from each predecessor and to each successor. Transitions are counted on + // both sides (double-counted) so that join edges get proportional influence without + // frequency data. If a neighbor has no assignment yet, we optimistically assume its best + // local option. Returns `None` if any assigned neighbor lacks a valid transition to this + // target. + let mut cost = self.solver.data.blocks.cost(block, target); for pred in body.basic_blocks.predecessors(block) { if pred == block { @@ -328,7 +327,7 @@ where ) -> TargetHeap { let mut heap = TargetHeap::new(); - for target in &self.solver.data.assignment[block] { + for target in &self.solver.data.blocks.assignments(block) { if let Some(cost) = self.estimate_target(body, region, block, target) { heap.insert(target, cost); } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs index 43d7b64ea49..f773a30a004 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/tests.rs @@ -1,16 +1,16 @@ #![expect(clippy::min_ident_chars)] -use hashql_core::{heap::Heap, id::IdArray, r#type::environment::Environment}; +use hashql_core::{heap::Heap, id::IdArray, symbol::sym, r#type::environment::Environment}; use super::{ super::{ PlacementSolverContext, - tests::{bb, find_region_of, stmt_costs, target_set, terminators}, + tests::{bb, find_region_of, make_block_costs, stmt_costs, target_set, terminators}, }, *, }; use crate::{ - body::{basic_block::BasicBlockSlice, location::Location}, + body::location::Location, builder::body, intern::Interner, pass::execution::{ @@ -135,8 +135,8 @@ fn self_loop_edges_excluded_from_cost() { let env = Environment::new(&heap); // bb0: self-loop via `if cond then bb0() else bb1()`, bb1: return - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -163,10 +163,9 @@ fn self_loop_edges_excluded_from_cost() { ] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -200,8 +199,8 @@ fn boundary_multiplier_applied_to_cross_region_edges() { let env = Environment::new(&heap); // bb0 → bb1 → bb2, three trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -228,10 +227,9 @@ fn boundary_multiplier_applied_to_cross_region_edges() { bb(1): [diagonal(0), I->P = 0, P->I = 20] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -279,8 +277,8 @@ fn infeasible_transition_returns_none() { let env = Environment::new(&heap); // bb0 → bb1, two trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -301,10 +299,9 @@ fn infeasible_transition_returns_none() { bb(0): [I->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let solver = data.build_in(&body, &heap); @@ -329,7 +326,7 @@ fn infeasible_transition_returns_none() { /// Verifies that unassigned neighbors use the heuristic minimum over their domain. /// /// When a neighbor has no committed target, the estimator picks the cheapest -/// `(statement_cost + transition_cost)` combination across the neighbor's +/// `(block_cost + transition_cost)` combination across the neighbor's /// domain to produce an optimistic lower bound. #[test] fn unassigned_neighbor_uses_heuristic_minimum() { @@ -338,8 +335,8 @@ fn unassigned_neighbor_uses_heuristic_minimum() { let env = Environment::new(&heap); // bb0 → bb1, two trivial SCCs - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -367,10 +364,9 @@ fn unassigned_neighbor_uses_heuristic_minimum() { bb(0): [diagonal(0), I->P = 10, P->I = 5] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; // bb0 is NOT assigned — determine_target returns None diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs index cb567828f97..d5355ea276d 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs @@ -33,9 +33,7 @@ use crate::{ }, context::MirContext, pass::execution::{ - ApproxCost, - cost::StatementCostVec, - target::{TargetArray, TargetBitSet, TargetId}, + ApproxCost, cost::BasicBlockCostVec, target::TargetId, terminator_placement::TerminatorCostVec, }, }; @@ -84,12 +82,12 @@ fn back_edge_span(body: &Body<'_>, members: &[BasicBlockId]) -> SpanId { /// Input data for placement solving. /// -/// Bundles the per-block target domains (`assignment`), per-target statement costs -/// (`statements`), and terminator transition costs (`terminators`). +/// Bundles the precomputed per-block costs (`blocks`) and terminator transition costs +/// (`terminators`). #[derive(Debug, Copy, Clone)] pub(crate) struct PlacementSolverContext<'ctx, A: Allocator> { - pub assignment: &'ctx BasicBlockSlice, - pub statements: &'ctx TargetArray>, + pub blocks: &'ctx BasicBlockCostVec, + pub terminators: &'ctx TerminatorCostVec, } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index d075c605474..527c4c6dd9f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -5,6 +5,7 @@ use core::alloc::Allocator; use hashql_core::{ heap::{BumpAllocator, Heap}, id::{IdArray, bit_vec::FiniteBitSet}, + symbol::sym, r#type::environment::Environment, }; use hashql_diagnostics::severity::Severity; @@ -23,12 +24,16 @@ use crate::{ context::MirContext, error::MirDiagnosticCategory, intern::Interner, - pass::execution::{ - ApproxCost, Cost, - cost::StatementCostVec, - placement::error::PlacementDiagnosticCategory, - target::{TargetArray, TargetBitSet, TargetId}, - terminator_placement::{TerminatorCostVec, TransMatrix}, + pass::{ + analysis::size_estimation::{InformationRange, InformationUnit}, + execution::{ + ApproxCost, Cost, VertexType, + cost::{BasicBlockCostAnalysis, BasicBlockCostVec, StatementCostVec}, + placement::error::PlacementDiagnosticCategory, + target::{TargetArray, TargetBitSet, TargetId}, + terminator_placement::{TerminatorCostVec, TransMatrix}, + traversal::TransferCostConfig, + }, }, }; @@ -113,8 +118,40 @@ pub(crate) fn bb(index: u32) -> BasicBlockId { BasicBlockId::new(index) } +pub(crate) fn make_block_costs<'heap>( + body: &Body<'_>, + domains: &[TargetBitSet], + statements: &TargetArray>, + alloc: &'heap Heap, +) -> BasicBlockCostVec<&'heap Heap> { + make_block_costs_with_config( + body, + domains, + statements, + &TransferCostConfig::new(InformationRange::full()), + alloc, + ) +} + +pub(crate) fn make_block_costs_with_config<'heap>( + body: &Body<'_>, + domains: &[TargetBitSet], + statements: &TargetArray>, + config: &TransferCostConfig, + alloc: &'heap Heap, +) -> BasicBlockCostVec<&'heap Heap> { + let assignments = BasicBlockSlice::from_raw(domains); + BasicBlockCostAnalysis { + vertex: VertexType::Entity, + assignments, + costs: statements, + } + .analyze_in(config, &body.basic_blocks, alloc) +} + const I: TargetId = TargetId::Interpreter; const P: TargetId = TargetId::Postgres; +const E: TargetId = TargetId::Embedding; pub(crate) fn run_solver<'heap>( body: &Body<'heap>, @@ -125,10 +162,9 @@ pub(crate) fn run_solver<'heap>( terminators: &TerminatorCostVec<&'heap Heap>, ) -> BasicBlockVec { let mut context = MirContext::new(env, interner); - let assignment = BasicBlockSlice::from_raw(domains); + let block_costs = make_block_costs(body, domains, statements, env.heap); let data = PlacementSolverContext { - assignment, - statements, + blocks: &block_costs, terminators, }; let mut solver = data.build_in(body, env.heap); @@ -185,8 +221,8 @@ fn forward_pass_assigns_all_blocks() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -249,8 +285,8 @@ fn backward_pass_improves_suboptimal_forward() { // bb1=P look cheap. But bb3 ultimately gets I (because bb2=I with diagonal- // only forces bb3=I after backward). Backward then re-evaluates bb1 with // bb3=I known and sees P→I=50, correcting bb1 to I. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -327,8 +363,8 @@ fn rewind_triggers_on_join_with_conflicting_predecessors() { // bb3=I: bb1→bb3 I→I ok, bb2→bb3 I→I missing → infeasible // bb3=P: bb1→bb3 I→P missing → infeasible // bb3 heap empty → rewind flips bb2 (or bb1) to resolve the conflict. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -412,8 +448,8 @@ fn rewind_skips_exhausted_region() { // Rewind: bb2 has no alternatives (domain {I}) → skip. bb1 has alternative P. // bb1=P, resume. bb2=I (re-estimated). bb3: bb1→bb3 P→I ok, bb2→bb3 I→I ok. // bb3=I succeeds. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { x = load 0; @@ -465,18 +501,18 @@ fn rewind_skips_exhausted_region() { assert_eq!(result[bb(3)], I); } -/// Verifies the trivial region fast path picks the cheapest target by statement cost. +/// Verifies the trivial region fast path picks the cheapest target by block cost. /// /// Single block with a return terminator and no edges. The solver should select -/// the target with the lowest per-statement cost without consulting any neighbors. +/// the target with the lowest block cost without consulting any neighbors. #[test] fn single_block_trivial_region() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int; bb0() { x = load 0; @@ -511,8 +547,8 @@ fn cyclic_region_in_forward_backward() { let env = Environment::new(&heap); // bb0 → bb1, bb1 → bb2, bb2 → bb1 (loop), bb2 → bb3 (exit) - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -570,7 +606,7 @@ fn cyclic_region_in_forward_backward() { /// Verifies that rewind walks back into a cyclic region and uses `retry()` to find an alternative. /// /// The SCC exit edge is diagonal, so the SCC solver sees both all-I and all-P -/// as feasible (each can reach some target in bb3's domain). Statement costs +/// as feasible (each can reach some target in bb3's domain). Block costs /// bias the SCC toward all-I. With SCC=all-I, the diagonal exit forces bb3 /// to match bb2=I, but bb3→bb4 only allows P→I, making bb3 infeasible for /// both I (outgoing fails) and P (incoming fails). Rewind reaches the SCC, @@ -590,8 +626,8 @@ fn rewind_retries_cyclic_region() { // bb3=P: diagonal I→P missing → infeasible. // Rewind reaches the SCC; retry() picks all-P. With SCC=all-P: // bb3=P: diagonal P→P ok, bb3→bb4 P→I ok → feasible. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -690,8 +726,8 @@ fn rewind_skips_exhausted_cyclic_region() { // With bb0=P: // SCC: all-P (forced). bb3 predecessors: bb0=P, bb2=P. // bb3=I: bb0→bb3 P→I ok. bb2→bb3 P→I ok. bb3→bb4 I→I ok. Feasible! - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { cond = load true; @@ -770,8 +806,8 @@ fn rewind_exhausts_all_regions() { let env = Environment::new(&heap); // Diamond: bb0→bb1(then), bb0→bb2(else), bb1→bb3, bb2→bb3. All trivial SCCs. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -803,10 +839,9 @@ fn rewind_exhausts_all_regions() { bb(2): [I->P = 0, P->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -834,8 +869,8 @@ fn forward_pass_rewinds_on_cyclic_failure() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -903,8 +938,8 @@ fn backward_pass_keeps_assignment_when_csp_fails() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3→bb4. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -955,10 +990,9 @@ fn backward_pass_keeps_assignment_when_csp_fails() { bb(3): [complete(0)] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1010,8 +1044,8 @@ fn backward_pass_adopts_better_cyclic_solution() { let env = Environment::new(&heap); // bb0→bb1→bb2→bb1(loop)/bb2→bb3→bb4. - let body = body!(interner, env; fn@0/0 -> Int { - decl x: Int, cond: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, cond: Bool; bb0() { x = load 0; @@ -1086,8 +1120,8 @@ fn trivial_failure_emits_diagnostic() { // bb1→bb3: diagonal only. bb2→bb3: swap only (I→P, P→I). // No assignment for bb3 satisfies both predecessors simultaneously, and // rewind exhausts all alternatives. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -1119,10 +1153,9 @@ fn trivial_failure_emits_diagnostic() { bb(2): [I->P = 0, P->I = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1154,8 +1187,8 @@ fn cyclic_failure_emits_diagnostic() { // bb0 branches to bb1(then) and bb2(else). bb1→bb0 closes the cycle. // bb2 is the exit. SCC = {bb0, bb1}, processed first. - let body = body!(interner, env; fn@0/0 -> Int { - decl cond: Bool, x: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], cond: Bool, x: Int; bb0() { cond = load true; @@ -1189,10 +1222,9 @@ fn cyclic_failure_emits_diagnostic() { bb(1): [I->P = 0] } - let assignment = BasicBlockSlice::from_raw(&domains); + let block_costs = make_block_costs(&body, &domains, &statements, &heap); let data = PlacementSolverContext { - assignment, - statements: &statements, + blocks: &block_costs, terminators: &terminators, }; let mut solver = data.build_in(&body, &heap); @@ -1208,3 +1240,150 @@ fn cyclic_failure_emits_diagnostic() { MirDiagnosticCategory::Placement(PlacementDiagnosticCategory::UnsatisfiablePlacement), ); } + +/// Path premiums steer the solver toward origin backends. +/// +/// bb0 accesses `vertex.encodings.vectors` (Embedding-origin) and `vertex.properties` +/// (Postgres-origin). With equal base block costs and permissive transitions, the solver +/// picks the backend that minimizes the combined path premium. Embedding avoids the Vectors +/// premium (3072) but pays the Properties premium. Postgres avoids the Properties premium +/// but pays the Vectors premium. Interpreter pays both. +/// +/// The solver should not pick Interpreter for bb0 since both specialized backends have lower +/// total cost. +#[test] +fn path_premiums_influence_placement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj properties = vertex.properties: ?, + encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + result = tuple properties, vectors; + return result; + } + }); + + let all = target_set(&[I, P, E]); + let domains = [all]; + + let mut statements: TargetArray> = + IdArray::from_fn(|_: TargetId| StatementCostVec::new_in(&body.basic_blocks, &heap)); + + // Equal base costs so the path premium is the deciding factor. + stmt_costs! { statements; bb(0): I = 1, P = 1, E = 1 } + + let terminators = TerminatorCostVec::new(&body.basic_blocks, &heap); + + let config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(100))); + let block_costs = make_block_costs_with_config(&body, &domains, &statements, &config, &heap); + + // Verify the premiums are as expected: Interpreter pays both, others pay one each. + let interp_cost = block_costs.cost(bb(0), I); + let pg_cost = block_costs.cost(bb(0), P); + let emb_cost = block_costs.cost(bb(0), E); + + assert!( + interp_cost > pg_cost, + "Interpreter ({interp_cost}) should be more expensive than Postgres ({pg_cost})" + ); + assert!( + interp_cost > emb_cost, + "Interpreter ({interp_cost}) should be more expensive than Embedding ({emb_cost})" + ); + + // Run the solver end-to-end. + let data = PlacementSolverContext { + blocks: &block_costs, + terminators: &terminators, + }; + let mut context = MirContext::new(&env, &interner); + let mut solver = data.build_in(&body, &heap); + let result = solver.run(&mut context, &body); + + assert_ne!( + result[bb(0)], + I, + "solver should prefer a specialized backend over Interpreter when path premiums dominate" + ); +} + +/// Provenance variants produce different path premiums due to different size estimates. +/// +/// `ProvenanceEdition` has size `3..=20` (midpoint 11) while `ProvenanceInferred` has size +/// `3..=5` (midpoint 4). A block accessing edition provenance should have a higher load cost +/// than one accessing inferred provenance, and this difference should be visible in the +/// solver's block cost inputs. +#[test] +fn provenance_variants_produce_different_premiums() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body_edition = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?; + + bb0() { + val = load edition; + return val; + } + }); + + let body_inferred = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + inferred = prov.inferred: ?; + + bb0() { + val = load inferred; + return val; + } + }); + + let ip = target_set(&[I, P]); + let domains = [ip]; + + let config = TransferCostConfig::new(InformationRange::zero()); + + let statements_edition: TargetArray> = + IdArray::from_fn(|_: TargetId| StatementCostVec::new_in(&body_edition.basic_blocks, &heap)); + let edition_costs = + make_block_costs_with_config(&body_edition, &domains, &statements_edition, &config, &heap); + + let statements_inferred: TargetArray> = + IdArray::from_fn(|_: TargetId| { + StatementCostVec::new_in(&body_inferred.basic_blocks, &heap) + }); + let inferred_costs = make_block_costs_with_config( + &body_inferred, + &domains, + &statements_inferred, + &config, + &heap, + ); + + // Both are Postgres-origin, so Interpreter pays the premium, Postgres doesn't. + let edition_interp = edition_costs.cost(bb(0), I); + let edition_pg = edition_costs.cost(bb(0), P); + let inferred_interp = inferred_costs.cost(bb(0), I); + let inferred_pg = inferred_costs.cost(bb(0), P); + + // Postgres pays no premium for either (it's the origin). + assert_eq!(edition_pg, inferred_pg, "Postgres is origin for both"); + + // Edition premium (midpoint 11) > Inferred premium (midpoint 4) on Interpreter. + assert!( + edition_interp > inferred_interp, + "Edition ({edition_interp}) should cost more than Inferred ({inferred_interp}) on \ + Interpreter" + ); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs index ca3f35b5ccc..7dacfa90feb 100644 --- a/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/splitting/mod.rs @@ -185,7 +185,8 @@ fn offset_basic_blocks<'heap, A: Allocator, S: Allocator + Clone>( // Unlike other regions, these may be empty. Mark empty blocks as supported everywhere. if costs[TargetId::Interpreter].is_empty() { - targets[start_id].insert_range(TargetId::MIN..=TargetId::MAX); + targets[start_id] + .insert_range(TargetId::MIN..=TargetId::MAX, TargetId::VARIANT_COUNT); } else { targets[start_id] = supported(&costs, 0); } @@ -305,9 +306,12 @@ impl BasicBlockSplitting { self.split_in(context, body, statement_costs, Global) } - /// Splits [`Body`] blocks and returns per-block [`TargetBitSet`] affinities. + /// Splits [`Body`] blocks and returns per-block [`TargetBitSet`] affinities along with + /// the per-block region counts used during splitting. /// - /// The returned vector is indexed by the new [`BasicBlockId`]s. + /// The first element is indexed by the new [`BasicBlockId`]s. The second element maps + /// each original block to the number of blocks it was split into, which callers can use + /// to redistribute parallel data structures. pub(crate) fn split_in<'heap, A: Allocator>( &self, context: &MirContext<'_, 'heap>, diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs index 06fb609d65e..aaf098cf66b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/common.rs @@ -29,8 +29,8 @@ use crate::{ }, execution::{ Cost, - cost::{StatementCostVec, TraversalCostVec}, - storage::{Access, EntityPath}, + cost::StatementCostVec, + traversal::{Access, EntityPath}, }, }, visit::Visitor, @@ -276,7 +276,6 @@ pub(crate) struct CostVisitor<'ctx, 'env, 'heap, S, A: Allocator> { pub cost: Cost, pub statement_costs: StatementCostVec, - pub traversal_costs: TraversalCostVec, pub supported: S, } @@ -305,12 +304,6 @@ where )) .then_some(self.cost); - if let Some(cost) = cost - && lhs.projections.is_empty() - { - self.traversal_costs.insert(lhs.local, cost); - } - self.statement_costs[location] = cost; } StatementKind::StorageDead(_) | StatementKind::StorageLive(_) | StatementKind::Nop => { diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs index 2a5e91af656..90a4da7c7bf 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/mod.rs @@ -9,14 +9,9 @@ use super::{ use crate::{ body::{Body, Source, local::Local, operand::Operand, place::Place, rvalue::RValue}, context::MirContext, - pass::{ - execution::{ - Cost, VertexType, - cost::{StatementCostVec, TraversalCostVec}, - statement_placement::common::entity_projection_access, - storage::Access, - }, - transform::Traversals, + pass::execution::{ + Cost, VertexType, cost::StatementCostVec, + statement_placement::common::entity_projection_access, traversal::Access, }, visit::Visitor as _, }; @@ -24,35 +19,34 @@ use crate::{ #[cfg(test)] mod tests; -fn is_supported_place<'heap>( - context: &MirContext<'_, 'heap>, - body: &Body<'heap>, - domain: &DenseBitSet, - place: &Place<'heap>, -) -> bool { - // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the - // projection path maps to an Embedding-accessible field. - if matches!(body.source, Source::GraphReadFilter(_)) && place.local.as_usize() == 1 { - let decl = &body.local_decls[place.local]; - let Some(vertex_type) = VertexType::from_local(context.env, decl) else { - unimplemented!("lookup for declared type") - }; +struct EmbeddingSupported { + vertex: VertexType, +} - match vertex_type { - VertexType::Entity => { - return matches!( - entity_projection_access(&place.projections), - Some(Access::Embedding(_)) - ); +impl EmbeddingSupported { + fn is_supported_place<'heap>( + &self, + body: &Body<'heap>, + domain: &DenseBitSet, + place: &Place<'heap>, + ) -> bool { + // For GraphReadFilter bodies, local 1 is the filter argument (vertex). Check if the + // projection path maps to an Embedding-accessible field. + if matches!(body.source, Source::GraphReadFilter(_)) && place.local == Local::VERTEX { + match self.vertex { + VertexType::Entity => { + return matches!( + entity_projection_access(&place.projections), + Some(Access::Embedding(_)) + ); + } } } - } - domain.contains(place.local) + domain.contains(place.local) + } } -struct EmbeddingSupported; - impl<'heap> Supported<'heap> for EmbeddingSupported { fn is_supported_rvalue( &self, @@ -73,13 +67,13 @@ impl<'heap> Supported<'heap> for EmbeddingSupported { fn is_supported_operand( &self, - context: &MirContext<'_, 'heap>, + _: &MirContext<'_, 'heap>, body: &Body<'heap>, domain: &DenseBitSet, operand: &Operand<'heap>, ) -> bool { match operand { - Operand::Place(place) => is_supported_place(context, body, domain, place), + Operand::Place(place) => self.is_supported_place(body, domain, place), Operand::Constant(_) => false, } } @@ -110,23 +104,22 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + vertex: VertexType, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc); + ) -> StatementCostVec { + let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } let dispatchable = SupportedAnalysis { body, context, - supported: &EmbeddingSupported, + supported: &EmbeddingSupported { vertex }, initialize_boundary: OnceValue::new( |body: &Body<'heap>, domain: &mut DenseBitSet| { match body.source { @@ -155,12 +148,11 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> cost: self.statement_cost, statement_costs, - traversal_costs, - supported: &EmbeddingSupported, + supported: &EmbeddingSupported { vertex }, }; visitor.visit_body(body); - (visitor.traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs index df2105b7b25..3e72f682221 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/embedding/tests.rs @@ -37,7 +37,7 @@ fn only_vectors_projection_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -45,8 +45,7 @@ fn only_vectors_projection_supported() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "only_vectors_projection_supported", @@ -54,7 +53,6 @@ fn only_vectors_projection_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -80,7 +78,7 @@ fn all_args_excluded() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -88,8 +86,7 @@ fn all_args_excluded() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "all_args_excluded", @@ -97,7 +94,6 @@ fn all_args_excluded() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -121,7 +117,7 @@ fn non_vectors_entity_projection_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -129,8 +125,7 @@ fn non_vectors_entity_projection_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "non_vectors_entity_projection_rejected", @@ -138,7 +133,6 @@ fn non_vectors_entity_projection_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -164,7 +158,7 @@ fn storage_statements_zero_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -172,8 +166,7 @@ fn storage_statements_zero_cost() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -181,7 +174,6 @@ fn storage_statements_zero_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -219,7 +211,7 @@ fn other_operations_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -227,8 +219,7 @@ fn other_operations_rejected() { }; let mut placement = EmbeddingStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "other_operations_rejected", @@ -236,6 +227,5 @@ fn other_operations_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs index bd2acf00b89..419cccded41 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/mod.rs @@ -5,15 +5,12 @@ use crate::{ body::{ Body, Source, location::Location, - statement::{Assign, Statement, StatementKind}, + statement::{Statement, StatementKind}, }, context::MirContext, - pass::{ - execution::{ - cost::{Cost, StatementCostVec, TraversalCostVec}, - target::TargetArray, - }, - transform::Traversals, + pass::execution::{ + VertexType, + cost::{Cost, StatementCostVec}, }, visit::Visitor, }; @@ -21,14 +18,13 @@ use crate::{ #[cfg(test)] mod tests; -struct CostVisitor<'ctx, A: Allocator, B: Allocator> { +struct CostVisitor { cost: Cost, statement_costs: StatementCostVec, - traversal_costs: &'ctx TargetArray>>, } -impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> { +impl<'heap, A: Allocator> Visitor<'heap> for CostVisitor { type Result = Result<(), !>; fn visit_statement( @@ -36,25 +32,9 @@ impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> location: Location, statement: &Statement<'heap>, ) -> Self::Result { - // All statements are supported; TraversalExtraction provides backend data access match &statement.kind { - StatementKind::Assign(Assign { lhs, rhs: _ }) => { - // If it's a traversal load (aka we add the interpreter cost, as well as the cost to - // load the statement). We assume worst case for the traversal. - let cost = if lhs.projections.is_empty() - && let Some(cost) = self - .traversal_costs - .iter() - .filter_map(|costs| costs.as_ref()) - .filter_map(|costs| costs.get(lhs.local)) - .max() - { - self.cost.saturating_add(cost) - } else { - self.cost - }; - - self.statement_costs[location] = Some(cost); + StatementKind::Assign(_) => { + self.statement_costs[location] = Some(self.cost); } StatementKind::StorageDead(_) | StatementKind::StorageLive(_) | StatementKind::Nop => { self.statement_costs[location] = Some(cost!(0)); @@ -69,49 +49,41 @@ impl<'heap, A: Allocator, B: Allocator> Visitor<'heap> for CostVisitor<'_, A, B> /// target. /// /// Supports all statements unconditionally, serving as the universal fallback. -pub(crate) struct InterpreterStatementPlacement<'ctx, A: Allocator> { - traversal_costs: &'ctx TargetArray>>, +pub(crate) struct InterpreterStatementPlacement { statement_cost: Cost, } -impl<'ctx, A: Allocator> InterpreterStatementPlacement<'ctx, A> { - pub(crate) const fn new( - traversal_costs: &'ctx TargetArray>>, - ) -> Self { +impl InterpreterStatementPlacement { + pub(crate) const fn new() -> Self { Self { - traversal_costs, statement_cost: cost!(8), } } } -impl<'heap, A: Allocator + Clone, B: Allocator> StatementPlacement<'heap, A> - for InterpreterStatementPlacement<'_, B> -{ +impl<'heap, A: Allocator + Clone> StatementPlacement<'heap, A> for InterpreterStatementPlacement { fn statement_placement_in( &mut self, _: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + _: VertexType, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc.clone()); - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc); + ) -> StatementCostVec { + let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } let mut visitor = CostVisitor { cost: self.statement_cost, statement_costs, - traversal_costs: self.traversal_costs, }; visitor.visit_body(body); - (traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 43fc247081b..4ac8917a460 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -1,8 +1,6 @@ //! Tests for [`InterpreterStatementPlacement`]. #![expect(clippy::min_ident_chars)] -use alloc::alloc::Global; - use hashql_core::{heap::Heap, symbol::sym, r#type::environment::Environment}; use hashql_diagnostics::DiagnosticIssues; @@ -11,17 +9,9 @@ use crate::{ context::MirContext, def::DefId, intern::Interner, - pass::{ - Changed, TransformPass as _, - execution::{ - cost::TraversalCostVec, - statement_placement::{ - InterpreterStatementPlacement, StatementPlacement as _, - tests::{assert_placement, run_placement}, - }, - target::{TargetArray, TargetId}, - }, - transform::TraversalExtraction, + pass::execution::statement_placement::{ + InterpreterStatementPlacement, + tests::{assert_placement, run_placement}, }, }; @@ -61,18 +51,15 @@ fn all_statements_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let traversal_costs = TargetArray::from_fn(|_| None); - let mut placement: InterpreterStatementPlacement<'_, Global> = - InterpreterStatementPlacement::new(&traversal_costs); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "all_statements_supported", @@ -80,243 +67,212 @@ fn all_statements_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } -/// `StorageLive`/`StorageDead`/`Nop` get `cost!(0)`, assignments get `cost!(8)`. +/// All assignments get uniform cost 8 regardless of vertex projections. /// -/// Tests the cost differentiation: storage management statements have zero cost -/// because they don't perform computation, while assignments have cost 8. +/// Path costs are charged at the block level via `BasicBlockCostVec`, not per statement. #[test] -fn storage_statements_zero_cost() { +fn traversal_single_path_cost() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { - decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, y: Int, result: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], archived: Bool, result: Bool; + @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool; bb0() { - let (x.local); - x = load 10; - let (y.local); - y = load 20; - result = bin.+ x y; - drop (x.local); - drop (y.local); + archived = load archived_proj; + result = un.! archived; return result; } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let traversal_costs = TargetArray::from_fn(|_| None); - let mut placement: InterpreterStatementPlacement<'_, Global> = - InterpreterStatementPlacement::new(&traversal_costs); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( - "storage_statements_zero_cost", + "traversal_single_path_cost", "interpret", &body, &context, &statement_costs, - &traversal_costs, ); } -/// Traversal locals receive the backend cost added to the base interpreter cost. +/// Multiple vertex projections in a statement still get uniform cost 8. /// -/// When Postgres assigns a traversal cost of 4, the interpreter adds it to the base cost (8) -/// via `saturating_add`, yielding 12 for the traversal assignment. Non-traversal assignments -/// remain at the base cost. +/// Path costs from `_1.properties` and `_1.metadata.archived` are charged at the block level. #[test] -fn traversal_single_backend_cost() { +fn traversal_multiple_paths_cost() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: [Opaque sym::path::Entity; ?], result: Bool; - @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, Bool) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + single: ?, both: (?, Bool); + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; bb0() { - result = un.! archived; - return result; + single = load properties; + both = tuple properties, archived; + return both; } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - for local in body.local_decls.ids() { - if traversals.contains(local) { - postgres_costs.insert(local, cost!(4)); - } - } - - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); - - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( - "traversal_single_backend_cost", + "traversal_multiple_paths_cost", "interpret", &body, &context, &statement_costs, - &traversal_cost_out, ); } -/// The interpreter picks the maximum traversal cost across all backends. +/// Composite swallowing is handled at the block level by `BasicBlockCostVec`. /// -/// With Postgres assigning cost 4 and Embedding assigning cost 6 to different traversal -/// locals, the interpreter adds the per-local maximum to its base cost. Each traversal -/// assignment reflects the worst-case backend cost for that specific local. +/// The interpreter assigns uniform cost 8 to all assignments. #[test] -fn traversal_worst_case_multiple_backends() { +fn traversal_swallowing_reduces_cost() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], archived: Bool, vectors: ?; - @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool, - encodings = vertex.encodings: ?, vectors_proj = encodings.vectors: ?; + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; bb0() { - archived = load archived_proj; - vectors = load vectors_proj; - return vectors; + result = tuple web_id, record_id; + return result; } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - // Assign different costs per backend per local so the interpreter picks the max for each. - // First traversal local gets Postgres cost 4, second gets Embedding cost 6. - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - let mut embedding_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - let traversal_locals: Vec<_> = body - .local_decls - .ids() - .filter(|local| traversals.contains(*local)) - .collect(); - postgres_costs.insert(traversal_locals[0], cost!(4)); - embedding_costs.insert(traversal_locals[1], cost!(6)); - - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); - traversal_costs[TargetId::Embedding] = Some(embedding_costs); - - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( - "traversal_worst_case_multiple_backends", + "traversal_swallowing_reduces_cost", "interpret", &body, &context, &statement_costs, - &traversal_cost_out, ); } -/// Non-traversal assignments are unaffected by traversal costs. +/// All assignments get uniform cost 8 whether or not they access vertex projections. /// -/// Even when traversal costs are present for entity projection locals, assignments to -/// non-traversal locals (like arithmetic results) retain the base interpreter cost of 8. +/// Path-based cost differentiation is handled at the block level. #[test] fn non_traversal_unaffected_by_costs() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let mut body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { decl env: (), vertex: [Opaque sym::path::Entity; ?], - archived: Bool, x: Int, y: Int, sum: Int, result: Bool; - @proj metadata = vertex.metadata: ?, archived_proj = metadata.archived: Bool; + props: ?, x: Int, result: Bool; + @proj properties = vertex.properties: ?; bb0() { - archived = load archived_proj; - x = load 10; - y = load 20; - sum = bin.+ x y; - result = bin.> sum 15; + props = load properties; + x = load 42; + result = bin.> x 10; return result; } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, diagnostics: DiagnosticIssues::new(), }; - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(&mut context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); - let mut postgres_costs = TraversalCostVec::new_in(&body, &traversals, &heap); - for local in body.local_decls.ids() { - if traversals.contains(local) { - postgres_costs.insert(local, cost!(4)); + assert_placement( + "non_traversal_unaffected_by_costs", + "interpret", + &body, + &context, + &statement_costs, + ); +} + +/// `StorageLive`/`StorageDead`/`Nop` get `cost!(0)`, assignments get `cost!(8)`. +/// +/// Tests the cost differentiation: storage management statements have zero cost +/// because they don't perform computation, while assignments have cost 8. +#[test] +fn storage_statements_zero_cost() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], x: Int, y: Int, result: Int; + + bb0() { + let (x.local); + x = load 10; + let (y.local); + y = load 20; + result = bin.+ x y; + drop (x.local); + drop (y.local); + return result; } - } + }); - let mut traversal_costs: TargetArray>> = - TargetArray::from_fn(|_| None); - traversal_costs[TargetId::Postgres] = Some(postgres_costs); + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; - let mut interpreter = InterpreterStatementPlacement::new(&traversal_costs); - let (traversal_cost_out, statement_costs) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( - "non_traversal_unaffected_by_costs", + "storage_statements_zero_cost", "interpret", &body, &context, &statement_costs, - &traversal_cost_out, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs index d39c0c77200..3b9ca7852be 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/mod.rs @@ -23,15 +23,8 @@ pub(crate) use self::{ embedding::EmbeddingStatementPlacement, interpret::InterpreterStatementPlacement, postgres::PostgresStatementPlacement, }; -use super::target::{TargetArray, TargetId}; -use crate::{ - body::Body, - context::MirContext, - pass::{ - execution::cost::{StatementCostVec, TraversalCostVec}, - transform::Traversals, - }, -}; +use super::{VertexType, target::TargetId}; +use crate::{body::Body, context::MirContext, pass::execution::cost::StatementCostVec}; /// Computes statement placement costs for a specific execution target. /// @@ -55,28 +48,22 @@ pub(crate) trait StatementPlacement<'heap, A: Allocator> { &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + vertex: VertexType, alloc: A, - ) -> (TraversalCostVec, StatementCostVec); + ) -> StatementCostVec; } -pub(crate) enum TargetPlacementStatement<'ctx, 'heap, S: Allocator> { - Interpreter(InterpreterStatementPlacement<'ctx, S>), +pub(crate) enum TargetPlacementStatement<'heap, S: Allocator> { + Interpreter(InterpreterStatementPlacement), Postgres(PostgresStatementPlacement<'heap, S>), Embedding(EmbeddingStatementPlacement), } -impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { +impl TargetPlacementStatement<'_, S> { #[must_use] - pub(crate) fn new_in( - target: TargetId, - traversals: &'ctx TargetArray>>, - scratch: S, - ) -> Self { + pub(crate) fn new_in(target: TargetId, scratch: S) -> Self { match target { - TargetId::Interpreter => { - Self::Interpreter(InterpreterStatementPlacement::new(traversals)) - } + TargetId::Interpreter => Self::Interpreter(InterpreterStatementPlacement::new()), TargetId::Postgres => Self::Postgres(PostgresStatementPlacement::new_in(scratch)), TargetId::Embedding => Self::Embedding(EmbeddingStatementPlacement::new_in(scratch)), } @@ -84,25 +71,25 @@ impl<'ctx, S: Allocator + Clone> TargetPlacementStatement<'ctx, '_, S> { } impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> - for TargetPlacementStatement<'_, 'heap, S> + for TargetPlacementStatement<'heap, S> { #[inline] fn statement_placement_in( &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + vertex: VertexType, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { + ) -> StatementCostVec { match self { TargetPlacementStatement::Interpreter(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } TargetPlacementStatement::Postgres(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } TargetPlacementStatement::Embedding(placement) => { - placement.statement_placement_in(context, body, traversals, alloc) + placement.statement_placement_in(context, body, vertex, alloc) } } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs index 2a34f13341e..10be791ff9c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/mod.rs @@ -27,14 +27,11 @@ use crate::{ rvalue::{Aggregate, AggregateKind, BinOp, Binary, RValue, Unary}, }, context::MirContext, - pass::{ - execution::{ - VertexType, - cost::{Cost, StatementCostVec, TraversalCostVec}, - statement_placement::common::entity_projection_access, - storage::Access, - }, - transform::Traversals, + pass::execution::{ + VertexType, + cost::{Cost, StatementCostVec}, + statement_placement::common::entity_projection_access, + traversal::Access, }, visit::Visitor as _, }; @@ -334,6 +331,7 @@ struct PostgresSupported<'ctx, 'heap, A: Allocator> { /// /// Fields containing closures or dicts with non-string keys are excluded. env_domain: &'ctx DenseBitSet, + vertex: VertexType, guard: LocalLock<&'ctx mut RecursiveVisitorGuard<'heap, A>>, } @@ -345,13 +343,7 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { /// any other local (falls through to the regular domain check). /// /// [`GraphReadFilter`]: Source::GraphReadFilter - fn is_supported_place_graph_read_filter( - &self, - context: &MirContext<'_, 'heap>, - body: &Body<'heap>, - - place: &Place<'heap>, - ) -> Option { + fn is_supported_place_graph_read_filter(&self, place: &Place<'heap>) -> Option { match place.local { Local::ENV => { // The environment projections depend on the first projection, because that @@ -372,26 +364,19 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { Some(self.env_domain.contains(field)) } - Local::VERTEX => { - let decl = &body.local_decls[place.local]; - let Some(vertex_type) = VertexType::from_local(context.env, decl) else { - unimplemented!("lookup for declared type") - }; - - match vertex_type { - VertexType::Entity => Some(matches!( - entity_projection_access(&place.projections), - Some(Access::Postgres(_)) - )), - } - } + Local::VERTEX => match self.vertex { + VertexType::Entity => Some(matches!( + entity_projection_access(&place.projections), + Some(Access::Postgres(_)) + )), + }, _ => None, } } fn is_supported_place( &self, - context: &MirContext<'_, 'heap>, + _: &MirContext<'_, 'heap>, body: &Body<'heap>, domain: &DenseBitSet, place: &Place<'heap>, @@ -400,7 +385,7 @@ impl<'heap, A: Allocator> PostgresSupported<'_, 'heap, A> { // env fields are checked against env_domain, vertex projections against entity // field access. Other locals fall through to the regular domain check. if matches!(body.source, Source::GraphReadFilter(_)) - && let Some(result) = self.is_supported_place_graph_read_filter(context, body, place) + && let Some(result) = self.is_supported_place_graph_read_filter(place) { return result; } @@ -711,16 +696,15 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> &mut self, context: &MirContext<'_, 'heap>, body: &Body<'heap>, - traversals: &Traversals<'heap>, + vertex: VertexType, alloc: A, - ) -> (TraversalCostVec, StatementCostVec) { - let traversal_costs = TraversalCostVec::new_in(body, traversals, alloc.clone()); + ) -> StatementCostVec { let statement_costs = StatementCostVec::new_in(&body.basic_blocks, alloc); match body.source { Source::GraphReadFilter(_) => {} Source::Ctor(_) | Source::Closure(..) | Source::Thunk(..) | Source::Intrinsic(_) => { - return (traversal_costs, statement_costs); + return statement_costs; } } @@ -728,6 +712,7 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> let supported = PostgresSupported { env_domain: &env_domain, + vertex, guard: LocalLock::new(&mut self.type_visitor_guard), }; @@ -755,12 +740,11 @@ impl<'heap, A: Allocator + Clone, S: Allocator> StatementPlacement<'heap, A> cost: self.statement_cost, statement_costs, - traversal_costs, supported: &supported, }; visitor.visit_body(body); - (visitor.traversal_costs, visitor.statement_costs) + visitor.statement_costs } } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index b0a982458eb..7770c6d45b5 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -26,12 +26,12 @@ use crate::{ def::DefId, intern::Interner, op, - pass::{ - execution::statement_placement::{ + pass::execution::{ + VertexType, + statement_placement::{ PostgresStatementPlacement, StatementPlacement as _, tests::{assert_placement, run_placement}, }, - transform::Traversals, }, }; @@ -58,7 +58,7 @@ fn binary_unary_ops_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -66,8 +66,7 @@ fn binary_unary_ops_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "binary_unary_ops_supported", @@ -75,7 +74,6 @@ fn binary_unary_ops_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -100,7 +98,7 @@ fn aggregate_tuple_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -108,8 +106,7 @@ fn aggregate_tuple_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "aggregate_tuple_supported", @@ -117,7 +114,6 @@ fn aggregate_tuple_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -145,7 +141,7 @@ fn aggregate_closure_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -153,8 +149,7 @@ fn aggregate_closure_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "aggregate_closure_rejected", @@ -162,7 +157,6 @@ fn aggregate_closure_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -193,7 +187,7 @@ fn apply_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -201,8 +195,7 @@ fn apply_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "apply_rejected", @@ -210,7 +203,6 @@ fn apply_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -234,7 +226,7 @@ fn input_supported() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -242,8 +234,7 @@ fn input_supported() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "input_supported", @@ -251,7 +242,6 @@ fn input_supported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -277,7 +267,7 @@ fn env_with_closure_type_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -285,8 +275,7 @@ fn env_with_closure_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_with_closure_type_rejected", @@ -294,7 +283,6 @@ fn env_with_closure_type_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -319,7 +307,7 @@ fn env_without_closure_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -327,8 +315,7 @@ fn env_without_closure_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_without_closure_accepted", @@ -336,7 +323,6 @@ fn env_without_closure_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -360,7 +346,7 @@ fn entity_projection_column() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -368,8 +354,7 @@ fn entity_projection_column() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "entity_projection_column", @@ -377,7 +362,6 @@ fn entity_projection_column() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -402,7 +386,7 @@ fn entity_projection_jsonb() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -410,8 +394,7 @@ fn entity_projection_jsonb() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "entity_projection_jsonb", @@ -419,7 +402,6 @@ fn entity_projection_jsonb() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -448,7 +430,7 @@ fn storage_statements_zero_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -456,8 +438,7 @@ fn storage_statements_zero_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "storage_statements_zero_cost", @@ -465,7 +446,6 @@ fn storage_statements_zero_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -512,7 +492,7 @@ fn diamond_must_analysis() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -520,8 +500,7 @@ fn diamond_must_analysis() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "diamond_must_analysis", @@ -529,7 +508,6 @@ fn diamond_must_analysis() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -554,7 +532,7 @@ fn graph_read_edge_unsupported() { let mut builder = BodyBuilder::new(&interner); let _env_local = builder.local("env", unit_ty); - let vertex = builder.local("vertex", entity_ty); + let _vertex = builder.local("vertex", entity_ty); let axis = builder.local("axis", int_ty); let graph_result = builder.local("graph_result", int_ty); let local_val = builder.local("local_val", int_ty); @@ -596,11 +574,9 @@ fn graph_read_edge_unsupported() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::with_capacity_in(vertex.local, body.local_decls.len(), &heap); - let mut placement = PostgresStatementPlacement::new_in(Global); - let (traversal_costs, statement_costs) = - placement.statement_placement_in(&context, &body, &traversals, &heap); + let statement_costs = + placement.statement_placement_in(&context, &body, VertexType::Entity, &heap); assert_placement( "graph_read_edge_unsupported", @@ -608,7 +584,6 @@ fn graph_read_edge_unsupported() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -634,7 +609,7 @@ fn env_closure_field_rejected_other_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -642,8 +617,7 @@ fn env_closure_field_rejected_other_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_closure_field_rejected_other_accepted", @@ -651,7 +625,6 @@ fn env_closure_field_rejected_other_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -676,7 +649,7 @@ fn env_dict_non_string_key_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -684,8 +657,7 @@ fn env_dict_non_string_key_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_non_string_key_rejected", @@ -693,7 +665,6 @@ fn env_dict_non_string_key_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -717,7 +688,7 @@ fn env_dict_string_key_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -725,8 +696,7 @@ fn env_dict_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_string_key_accepted", @@ -734,7 +704,6 @@ fn env_dict_string_key_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -762,7 +731,7 @@ fn env_dict_opaque_string_key_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -770,8 +739,7 @@ fn env_dict_opaque_string_key_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "env_dict_opaque_string_key_accepted", @@ -779,7 +747,6 @@ fn env_dict_opaque_string_key_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -833,7 +800,7 @@ fn fnptr_constant_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -841,8 +808,7 @@ fn fnptr_constant_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "fnptr_constant_rejected", @@ -850,7 +816,6 @@ fn fnptr_constant_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -880,7 +845,7 @@ fn eq_dict_vs_struct_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -888,8 +853,7 @@ fn eq_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_dict_vs_struct_rejected", @@ -897,7 +861,6 @@ fn eq_dict_vs_struct_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -927,7 +890,7 @@ fn eq_list_vs_tuple_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -935,8 +898,7 @@ fn eq_list_vs_tuple_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_list_vs_tuple_rejected", @@ -944,7 +906,6 @@ fn eq_list_vs_tuple_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -974,7 +935,7 @@ fn eq_unknown_type_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -982,8 +943,7 @@ fn eq_unknown_type_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_unknown_type_rejected", @@ -991,7 +951,6 @@ fn eq_unknown_type_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1021,7 +980,7 @@ fn eq_same_type_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -1029,8 +988,7 @@ fn eq_same_type_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_same_type_accepted", @@ -1038,7 +996,6 @@ fn eq_same_type_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1068,7 +1025,7 @@ fn ne_dict_vs_struct_rejected() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -1076,8 +1033,7 @@ fn ne_dict_vs_struct_rejected() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "ne_dict_vs_struct_rejected", @@ -1085,7 +1041,6 @@ fn ne_dict_vs_struct_rejected() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -1644,7 +1599,7 @@ fn eq_place_vs_constant_accepted() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -1652,8 +1607,7 @@ fn eq_place_vs_constant_accepted() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "eq_place_vs_constant_accepted", @@ -1661,7 +1615,6 @@ fn eq_place_vs_constant_accepted() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -2181,7 +2134,7 @@ fn serialization_unsafe_statement_no_cost() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -2189,8 +2142,7 @@ fn serialization_unsafe_statement_no_cost() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "serialization_unsafe_statement_no_cost", @@ -2198,7 +2150,6 @@ fn serialization_unsafe_statement_no_cost() { &body, &context, &statement_costs, - &traversal_costs, ); } @@ -2237,7 +2188,7 @@ fn serialization_unsafe_edge_propagates() { } }); - let mut context = MirContext { + let context = MirContext { heap: &heap, env: &env, interner: &interner, @@ -2245,8 +2196,7 @@ fn serialization_unsafe_edge_propagates() { }; let mut placement = PostgresStatementPlacement::new_in(Global); - let (body, statement_costs, traversal_costs) = - run_placement(&mut context, &mut placement, body); + let (body, statement_costs) = run_placement(&context, &mut placement, body); assert_placement( "serialization_unsafe_edge_propagates", @@ -2254,6 +2204,5 @@ fn serialization_unsafe_edge_propagates() { &body, &context, &statement_costs, - &traversal_costs, ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs index 542d7bf494b..cf254b59d4b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs @@ -3,7 +3,7 @@ use alloc::alloc::Global; use core::{alloc::Allocator, fmt::Display}; -use std::{io::Write as _, path::PathBuf}; +use std::path::PathBuf; use hashql_core::{ heap::Heap, @@ -19,17 +19,12 @@ use crate::{ builder::body, context::MirContext, intern::Interner, - pass::{ - Changed, TransformPass as _, - execution::{ - cost::{StatementCostVec, TraversalCostVec}, - statement_placement::{ - EmbeddingStatementPlacement, InterpreterStatementPlacement, - PostgresStatementPlacement, - }, - target::TargetArray, + pass::execution::{ + VertexType, + cost::StatementCostVec, + statement_placement::{ + EmbeddingStatementPlacement, InterpreterStatementPlacement, PostgresStatementPlacement, }, - transform::{TraversalExtraction, Traversals}, }, pretty::{TextFormatAnnotations, TextFormatOptions}, }; @@ -56,17 +51,6 @@ impl TextFormatAnnotations for CostAnnotations<'_, A> { } } -/// Formats traversal costs as a summary section. -fn format_traversals(traversal_costs: &TraversalCostVec) -> impl Display { - core::fmt::from_fn(move |f| { - writeln!(f, "Traversals:")?; - for (local, cost) in traversal_costs { - writeln!(f, " {local}: {cost}")?; - } - Ok(()) - }) -} - /// Runs statement placement analysis and asserts the result matches a snapshot. #[track_caller] pub(crate) fn assert_placement<'heap, A: Allocator>( @@ -75,7 +59,6 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( body: &Body<'heap>, context: &MirContext<'_, 'heap>, statement_costs: &StatementCostVec, - traversal_costs: &TraversalCostVec, ) { let formatter = Formatter::new(context.heap); let type_formatter = TypeFormatter::new(&formatter, context.env, TypeFormatterOptions::terse()); @@ -95,16 +78,6 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( text_format.format_body(body).expect("formatting failed"); - write!( - text_format.writer, - "\n\n{:=^50}\n\n", - format!(" Traversals ") - ) - .expect("infallible"); - - write!(text_format.writer, "{}", format_traversals(traversal_costs)) - .expect("formatting failed"); - // Snapshot configuration let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut settings = Settings::clone_current(); @@ -121,29 +94,19 @@ pub(crate) fn assert_placement<'heap, A: Allocator>( /// Helper to set up a test context and run placement analysis. /// -/// Returns the body, context components, and cost vectors for assertion. +/// Returns the body and statement cost vector for assertion. #[track_caller] pub(crate) fn run_placement<'heap>( - context: &mut MirContext<'_, 'heap>, + context: &MirContext<'_, 'heap>, placement: &mut impl StatementPlacement<'heap, &'heap Heap>, - mut body: Body<'heap>, -) -> ( - Body<'heap>, - StatementCostVec<&'heap Heap>, - TraversalCostVec<&'heap Heap>, -) { - // Run TraversalExtraction to produce Traversals - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(context, &mut body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - // Run placement analysis - let (traversal_costs, statement_costs) = - placement.statement_placement_in(context, &body, &traversals, context.heap); - - (body, statement_costs, traversal_costs) + body: Body<'heap>, +) -> (Body<'heap>, StatementCostVec<&'heap Heap>) { + let vertex = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) + .unwrap_or_else(|| unimplemented!("lookup for declared type")); + + let statement_costs = placement.statement_placement_in(context, &body, vertex, context.heap); + + (body, statement_costs) } // ============================================================================= @@ -180,27 +143,16 @@ fn non_graph_read_filter_returns_empty() { diagnostics: DiagnosticIssues::new(), }; - let traversals = Traversals::with_capacity_in(Local::new(1), body.local_decls.len(), &heap); - - let traversal_costs = TargetArray::from_fn(|_| None); - let mut postgres = PostgresStatementPlacement::new_in(Global); - let mut interpreter = InterpreterStatementPlacement::::new(&traversal_costs); + let mut interpreter = InterpreterStatementPlacement::new(); let mut embedding = EmbeddingStatementPlacement::new_in(Global); - let (postgres_traversal, postgres_statement) = - postgres.statement_placement_in(&context, &body, &traversals, &heap); - let (interpreter_traversal, interpreter_statement) = - interpreter.statement_placement_in(&context, &body, &traversals, &heap); - let (embedding_traversal, embedding_statement) = - embedding.statement_placement_in(&context, &body, &traversals, &heap); + let vertex = VertexType::Entity; + let postgres_statement = postgres.statement_placement_in(&context, &body, vertex, &heap); + let interpreter_statement = interpreter.statement_placement_in(&context, &body, vertex, &heap); + let embedding_statement = embedding.statement_placement_in(&context, &body, vertex, &heap); - assert_eq!(postgres_traversal.iter().count(), 0); assert!(postgres_statement.all_unassigned()); - - assert_eq!(interpreter_traversal.iter().count(), 0); assert!(interpreter_statement.all_unassigned()); - - assert_eq!(embedding_traversal.iter().count(), 0); assert!(embedding_statement.all_unassigned()); } diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs b/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs deleted file mode 100644 index c227f170a40..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/storage/entity.rs +++ /dev/null @@ -1,264 +0,0 @@ -use hashql_core::{ - id::{Id, bit_vec::FiniteBitSet}, - symbol::{ConstantSymbol, sym}, -}; - -use super::access::{Access, AccessMode}; -use crate::body::place::{Projection, ProjectionKind}; - -macro_rules! sym { - ($($sym:tt)::*) => { - sym::$($sym)::*::CONST - }; -} - -/// Resolved entity field path. -/// -/// Each variant identifies a specific storage location in the entity schema. Consumers can -/// exhaustively match on this to generate backend-specific access (SQL expressions, placement -/// decisions, etc.) without duplicating path resolution logic. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] -pub enum EntityPath { - /// `properties.*` — JSONB column in `entity_editions`. - Properties, - /// `encodings.vectors` — embedding backend. - Vectors, - - /// `metadata.record_id` — composite of [`EntityId`] + [`EditionId`]. - /// - /// [`EntityId`]: Self::EntityId - /// [`EditionId`]: Self::EditionId - RecordId, - /// `metadata.record_id.entity_id` — composite of `web_id` + `entity_uuid` + `draft_id`. - EntityId, - /// `metadata.record_id.entity_id.web_id` — `entity_temporal_metadata.web_id`. - WebId, - /// `metadata.record_id.entity_id.entity_uuid` — `entity_temporal_metadata.entity_uuid`. - EntityUuid, - /// `metadata.record_id.entity_id.draft_id` — `entity_temporal_metadata.draft_id`. - DraftId, - /// `metadata.record_id.edition_id` — `entity_temporal_metadata.entity_edition_id`. - EditionId, - - /// `metadata.temporal_versioning` — composite of [`DecisionTime`] + [`TransactionTime`]. - /// - /// [`DecisionTime`]: Self::DecisionTime - /// [`TransactionTime`]: Self::TransactionTime - TemporalVersioning, - /// `metadata.temporal_versioning.decision_time` — `entity_temporal_metadata.decision_time`. - DecisionTime, - /// `metadata.temporal_versioning.transaction_time` — - /// `entity_temporal_metadata.transaction_time`. - TransactionTime, - - /// `metadata.entity_type_ids` — `entity_is_of_type` table (via JOIN). - EntityTypeIds, - /// `metadata.archived` — `entity_editions.archived`. - Archived, - /// `metadata.confidence` — `entity_editions.confidence`. - Confidence, - - /// `metadata.provenance.inferred` — JSONB in `entity_ids.provenance`. - ProvenanceInferred, - /// `metadata.provenance.edition` — JSONB in `entity_editions.provenance`. - ProvenanceEdition, - /// `metadata.properties.*` — JSONB (`property_metadata`) in `entity_editions`. - PropertyMetadata, - - /// `link_data.left_entity_id.web_id` — `entity_edge.target_web_id` (via - /// `entity_has_left_entity`). - LeftEntityWebId, - /// `link_data.left_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via - /// `entity_has_left_entity`). - LeftEntityUuid, - /// `link_data.right_entity_id.web_id` — `entity_edge.target_web_id` (via - /// `entity_has_right_entity`). - RightEntityWebId, - /// `link_data.right_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via - /// `entity_has_right_entity`). - RightEntityUuid, - /// `link_data.left_entity_confidence` — `entity_edge.confidence` (via - /// `entity_has_left_entity`). - LeftEntityConfidence, - /// `link_data.right_entity_confidence` — `entity_edge.confidence` (via - /// `entity_has_right_entity`). - RightEntityConfidence, - /// `link_data.left_entity_provenance` — JSONB in `entity_edge.provenance` (via - /// `entity_has_left_entity`). - LeftEntityProvenance, - /// `link_data.right_entity_provenance` — JSONB in `entity_edge.provenance` (via - /// `entity_has_right_entity`). - RightEntityProvenance, -} - -type FiniteBitSetWidth = u32; -const _: () = { - assert!( - (FiniteBitSetWidth::BITS as usize) >= core::mem::variant_count::(), - "entity path count exceeds finite bitset width" - ); -}; - -pub type EntityPathBitSet = FiniteBitSet; - -impl EntityPath { - #[must_use] - pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { - resolve(projections) - } - - /// Returns the backend access mode for this path. - pub(crate) const fn access(self) -> Access { - match self { - Self::Vectors => Access::Embedding(AccessMode::Direct), - - Self::RecordId | Self::EntityId | Self::TemporalVersioning => { - Access::Postgres(AccessMode::Composite) - } - - Self::Properties - | Self::WebId - | Self::EntityUuid - | Self::DraftId - | Self::EditionId - | Self::DecisionTime - | Self::TransactionTime - | Self::EntityTypeIds - | Self::Archived - | Self::Confidence - | Self::ProvenanceInferred - | Self::ProvenanceEdition - | Self::PropertyMetadata - | Self::LeftEntityWebId - | Self::LeftEntityUuid - | Self::RightEntityWebId - | Self::RightEntityUuid - | Self::LeftEntityConfidence - | Self::RightEntityConfidence - | Self::LeftEntityProvenance - | Self::RightEntityProvenance => Access::Postgres(AccessMode::Direct), - } - } - - const fn is_jsonb(self) -> bool { - matches!( - self, - Self::Properties - | Self::ProvenanceInferred - | Self::ProvenanceEdition - | Self::PropertyMetadata - | Self::LeftEntityProvenance - | Self::RightEntityProvenance - ) - } -} - -#[inline] -fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { - let projection = projections.get(*index).and_then(|projection| { - if let ProjectionKind::FieldByName(name) = projection.kind { - name.as_constant() - } else { - None - } - }); - - if projection.is_some() { - *index += 1; - } - - projection -} - -/// Resolves an entity field path to an [`EntityPath`]. -/// -/// Walks a sequence of field name projections through the entity schema and returns the resolved -/// path, or `None` if the path doesn't map to any known storage location (including synthesized -/// fields like `link_data.*.draft_id`). -#[expect(clippy::match_same_arms, clippy::allow_attributes)] -fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { - #[allow(clippy::enum_glob_use, reason = "clarity")] - use EntityPath::*; - - let mut index = 0; - - macro_rules! next { - () => { - project(projections, &mut index) - }; - - (else $cond:expr) => {{ - if index >= projections.len() { - return Some(($cond, index)); - } - - next!()? - }}; - } - - let path = match next!()? { - // entity_editions.properties (JSONB) - sym!(properties) => Properties, - sym!(encodings) => match next!()? { - sym!(vectors) => Vectors, - _ => return None, - }, - sym!(metadata) => match next!()? { - sym!(record_id) => match next!(else RecordId) { - sym!(entity_id) => match next!(else EntityId) { - sym!(web_id) => WebId, - sym!(entity_uuid) => EntityUuid, - sym!(draft_id) => DraftId, - _ => return None, - }, - sym!(edition_id) => EditionId, - _ => return None, - }, - sym!(temporal_versioning) => match next!(else TemporalVersioning) { - sym!(decision_time) => DecisionTime, - sym!(transaction_time) => TransactionTime, - _ => return None, - }, - sym!(entity_type_ids) => EntityTypeIds, - sym!(archived) => Archived, - sym!(confidence) => Confidence, - sym!(provenance) => match next!()? { - sym!(inferred) => ProvenanceInferred, - sym!(edition) => ProvenanceEdition, - _ => return None, - }, - sym!(properties) => PropertyMetadata, - _ => return None, - }, - sym!(link_data) => match next!()? { - sym!(left_entity_id) => match next!()? { - sym!(web_id) => LeftEntityWebId, - sym!(entity_uuid) => LeftEntityUuid, - // draft_id is synthesized (always None), not stored - sym!(draft_id) => return None, - _ => return None, - }, - sym!(right_entity_id) => match next!()? { - sym!(web_id) => RightEntityWebId, - sym!(entity_uuid) => RightEntityUuid, - // draft_id is synthesized (always None), not stored - sym!(draft_id) => return None, - _ => return None, - }, - sym!(left_entity_confidence) => LeftEntityConfidence, - sym!(right_entity_confidence) => RightEntityConfidence, - sym!(left_entity_provenance) => LeftEntityProvenance, - sym!(right_entity_provenance) => RightEntityProvenance, - _ => return None, - }, - - _ => return None, - }; - - // JSONB paths allow arbitrary sub-paths; all others must be fully resolved - if !path.is_jsonb() && projections.get(index).is_some() { - return None; - } - - Some((path, index)) -} diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs b/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs deleted file mode 100644 index b7caca00e8a..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/storage/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod access; -mod entity; - -#[cfg(test)] -mod tests; - -pub use entity::{EntityPath, EntityPathBitSet}; - -pub(crate) use self::access::Access; diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs b/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs deleted file mode 100644 index 5dda38abefd..00000000000 --- a/libs/@local/hashql/mir/src/pass/execution/storage/tests.rs +++ /dev/null @@ -1,238 +0,0 @@ -//! Unit tests for entity projection path lookup. - -use hashql_core::{symbol::sym, r#type::TypeId}; - -use super::access::{Access, AccessMode}; -use crate::{ - body::{ - local::Local, - place::{Projection, ProjectionKind}, - }, - pass::execution::storage::EntityPath, -}; - -/// Helper to create a `FieldByName` projection. -fn proj(name: impl Into>) -> Projection<'static> { - Projection { - kind: ProjectionKind::FieldByName(name.into()), - r#type: TypeId::PLACEHOLDER, - } -} - -/// `[.properties]` → `Access::Postgres(Direct)` (JSONB column). -#[test] -fn properties_is_postgres() { - let projections = &[proj(sym::properties)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.properties.foo.bar]` → Postgres (JSONB otherwise). -/// -/// JSONB nodes have `otherwise` set, so any sub-path is also Postgres-accessible. -#[test] -fn properties_subpath_is_postgres() { - let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Postgres(AccessMode::Direct))); -} - -/// `[.encodings.vectors]` → `Access::Embedding(Direct)`. -#[test] -fn vectors_is_embedding() { - let projections = &[proj(sym::encodings), proj(sym::vectors)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, Some(Access::Embedding(AccessMode::Direct))); -} - -/// Various metadata paths map to Postgres columns. -#[test] -fn metadata_columns_are_postgres() { - // metadata.archived -> Direct - let projections = &[proj(sym::metadata), proj(sym::archived)]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.record_id -> Composite - let projections = &[proj(sym::metadata), proj(sym::record_id)]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Composite)) - ); - - // metadata.record_id.entity_id.web_id -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - proj(sym::web_id), - ]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); - - // metadata.temporal_versioning.decision_time -> Direct - let projections = &[ - proj(sym::metadata), - proj(sym::temporal_versioning), - proj(sym::decision_time), - ]; - assert_eq!( - EntityPath::resolve(projections).map(|(path, _)| path.access()), - Some(Access::Postgres(AccessMode::Direct)) - ); -} - -/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). -#[test] -fn link_data_synthesized_is_none() { - let projections = &[ - proj(sym::link_data), - proj(sym::left_entity_id), - proj(sym::draft_id), - ]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, None); -} - -/// Invalid path like `[.unknown]` → `None`. -#[test] -fn unknown_path_returns_none() { - let projections = &[proj(sym::unknown)]; - let access = EntityPath::resolve(projections).map(|(path, _)| path.access()); - - assert_eq!(access, None); -} - -/// The returned index reflects how many projections were consumed during resolution. -#[test] -fn index_counts_consumed_projections() { - // Single-segment: `.properties` consumes 1 - let projections = &[proj(sym::properties)]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::Properties, 1)) - ); - - // Two segments: `.encodings.vectors` consumes 2 - let projections = &[proj(sym::encodings), proj(sym::vectors)]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::Vectors, 2)) - ); - - // Three segments: `.metadata.provenance.inferred` consumes 3 - let projections = &[ - proj(sym::metadata), - proj(sym::provenance), - proj(sym::inferred), - ]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::ProvenanceInferred, 3)) - ); - - // Four segments: `.metadata.record_id.entity_id.web_id` consumes 4 - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - proj(sym::web_id), - ]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::WebId, 4)) - ); -} - -/// Composite paths that stop early via `next!(else ...)` return the correct index. -#[test] -fn index_for_composite_early_exit() { - // `.metadata.record_id` with no further projections → RecordId at index 2 - let projections = &[proj(sym::metadata), proj(sym::record_id)]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::RecordId, 2)) - ); - - // `.metadata.record_id.entity_id` without a leaf → EntityId at index 3 - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - ]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::EntityId, 3)) - ); - - // `.metadata.temporal_versioning` without a leaf → TemporalVersioning at index 2 - let projections = &[proj(sym::metadata), proj(sym::temporal_versioning)]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::TemporalVersioning, 2)) - ); -} - -/// A non-FieldByName projection (e.g. `Index`) after a composite node must return `None`, not -/// the composite path. Previously the `next!(else ...)` macro conflated "no more projections" with -/// "non-FieldByName projection", bypassing the exhaustion guard. -#[test] -fn non_field_projection_after_composite_returns_none() { - let index_projection = Projection { - kind: ProjectionKind::Index(Local::new(0)), - r#type: TypeId::PLACEHOLDER, - }; - - // `.metadata.record_id` followed by an index projection: not a valid entity path - let projections = &[proj(sym::metadata), proj(sym::record_id), index_projection]; - assert_eq!(EntityPath::resolve(projections), None); - - // `.metadata.record_id.entity_id` followed by an index projection - let projections = &[ - proj(sym::metadata), - proj(sym::record_id), - proj(sym::entity_id), - index_projection, - ]; - assert_eq!(EntityPath::resolve(projections), None); - - // `.metadata.temporal_versioning` followed by an index projection - let projections = &[ - proj(sym::metadata), - proj(sym::temporal_versioning), - index_projection, - ]; - assert_eq!(EntityPath::resolve(projections), None); -} - -/// JSONB paths stop consuming at the storage boundary; sub-path projections are excess. -#[test] -fn jsonb_index_excludes_subpath() { - // `.properties.foo.bar` → Properties at index 1, leaving 2 excess projections - let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::Properties, 1)) - ); - - // `.metadata.provenance.inferred.foo.bar` → ProvenanceInferred at index 3 - let projections = &[ - proj(sym::metadata), - proj(sym::provenance), - proj(sym::inferred), - proj(sym::foo), - proj(sym::bar), - ]; - assert_eq!( - EntityPath::resolve(projections), - Some((EntityPath::ProvenanceInferred, 3)) - ); -} diff --git a/libs/@local/hashql/mir/src/pass/execution/target.rs b/libs/@local/hashql/mir/src/pass/execution/target.rs index c7c9a50ab06..304e7a39762 100644 --- a/libs/@local/hashql/mir/src/pass/execution/target.rs +++ b/libs/@local/hashql/mir/src/pass/execution/target.rs @@ -10,6 +10,7 @@ use hashql_core::id::{Id, IdArray, bit_vec::FiniteBitSet}; /// estimation during placement. The interpreter is evaluated last so it can incorporate traversal /// costs computed by the other backends. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] +#[id(const)] pub enum TargetId { /// In-process evaluator that supports all MIR operations. /// diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index 8937633aff8..f9903d47b10 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -34,7 +34,6 @@ use alloc::alloc::Global; use core::{ alloc::Allocator, - iter, ops::{Index, IndexMut}, }; @@ -51,8 +50,10 @@ use hashql_core::{ }; use super::{ - Cost, + Cost, VertexType, + block_partitioned_vec::BlockPartitionedVec, target::{TargetBitSet, TargetId}, + traversal::{TransferCostConfig, TraversalPathBitSet}, }; use crate::{ body::{ @@ -62,15 +63,12 @@ use crate::{ local::Local, terminator::TerminatorKind, }, - pass::{ - analysis::{ - dataflow::{ - TraversalLivenessAnalysis, - framework::{DataflowAnalysis as _, DataflowResults}, - }, - size_estimation::{BodyFootprint, Cardinality, InformationRange}, + pass::analysis::{ + dataflow::{ + TraversalLivenessAnalysis, + framework::{DataflowAnalysis as _, DataflowResults}, }, - transform::Traversals, + size_estimation::{BodyFootprint, Cardinality, InformationRange}, }, }; @@ -100,13 +98,13 @@ impl TransMatrix { } #[inline] - fn offset(from: TargetId, to: TargetId) -> usize { + const fn offset(from: TargetId, to: TargetId) -> usize { from.as_usize() * TargetId::VARIANT_COUNT + to.as_usize() } #[inline] #[expect(clippy::integer_division, clippy::integer_division_remainder_used)] - fn from_offset(offset: usize) -> (TargetId, TargetId) { + const fn from_offset(offset: usize) -> (TargetId, TargetId) { let from = TargetId::from_usize(offset / TargetId::VARIANT_COUNT); let to = TargetId::from_usize(offset % TargetId::VARIANT_COUNT); (from, to) @@ -115,13 +113,13 @@ impl TransMatrix { /// Returns the cost for transitioning from `from` to `to`, or `None` if disallowed. #[inline] #[must_use] - pub(crate) fn get(&self, from: TargetId, to: TargetId) -> Option { + pub(crate) const fn get(&self, from: TargetId, to: TargetId) -> Option { self.matrix[Self::offset(from, to)] } #[inline] #[must_use] - pub(crate) fn contains(&self, from: TargetId, to: TargetId) -> bool { + pub(crate) const fn contains(&self, from: TargetId, to: TargetId) -> bool { self.matrix[Self::offset(from, to)].is_some() } @@ -238,54 +236,16 @@ impl IndexMut<(TargetId, TargetId)> for TransMatrix { /// [`Return`]: TerminatorKind::Return /// [`Unreachable`]: TerminatorKind::Unreachable #[derive(Debug)] -pub(crate) struct TerminatorCostVec { - offsets: Box, A>, - matrices: Vec, -} - -impl TerminatorCostVec { - #[expect(unsafe_code)] - fn compute_offsets( - mut iter: impl ExactSizeIterator, - alloc: A, - ) -> (Box, A>, usize) { - let mut offsets = Box::new_uninit_slice_in(iter.len() + 1, alloc); - let mut running_offset = 0_u32; - - offsets[0].write(0); - - let (_, rest) = offsets[1..].write_iter(iter::from_fn(|| { - let successor_count = iter.next()?; - running_offset += successor_count; - Some(running_offset) - })); - - debug_assert!(rest.is_empty()); - debug_assert_eq!(iter.len(), 0); - - // SAFETY: All elements initialized by write_iter loop. - let offsets = unsafe { offsets.assume_init() }; - let offsets = BasicBlockSlice::from_boxed_slice(offsets); - - (offsets, running_offset as usize) - } - - fn from_successor_counts(iter: impl ExactSizeIterator, alloc: A) -> Self - where - A: Clone, - { - let (offsets, total_edges) = Self::compute_offsets(iter, alloc.clone()); - let matrices = alloc::vec::from_elem_in(TransMatrix::new(), total_edges, alloc); - - Self { offsets, matrices } - } +pub(crate) struct TerminatorCostVec(BlockPartitionedVec); +impl TerminatorCostVec { /// Creates a cost vector sized for `blocks`, with all transitions initially disallowed. - pub(crate) fn new(blocks: &BasicBlocks, alloc: A) -> Self - where - A: Clone, - { - Self::from_successor_counts(blocks.iter().map(Self::successor_count), alloc) + pub(crate) fn new(blocks: &BasicBlocks, alloc: A) -> Self { + Self(BlockPartitionedVec::new_in( + blocks.iter().map(|block| Self::successor_count(block)), + TransMatrix::new(), + alloc, + )) } #[expect(clippy::cast_possible_truncation)] @@ -296,25 +256,27 @@ impl TerminatorCostVec { TerminatorKind::Return(_) | TerminatorKind::Unreachable => 0, } } +} +impl TerminatorCostVec { pub(crate) const fn len(&self) -> usize { - self.matrices.len() + self.0.len() + } + + /// Returns the number of blocks in the partition. + #[cfg(test)] + pub(crate) fn block_count(&self) -> usize { + self.0.block_count() } /// Returns the transition matrices for all successor edges of `block`. pub(crate) fn of(&self, block: BasicBlockId) -> &[TransMatrix] { - let start = self.offsets[block] as usize; - let end = self.offsets[block.plus(1)] as usize; - - &self.matrices[start..end] + self.0.of(block) } /// Returns mutable transition matrices for all successor edges of `block`. pub(crate) fn of_mut(&mut self, block: BasicBlockId) -> &mut [TransMatrix] { - let start = self.offsets[block] as usize; - let end = self.offsets[block.plus(1)] as usize; - - &mut self.matrices[start..end] + self.0.of_mut(block) } } @@ -458,33 +420,34 @@ impl PopulateEdgeMatrix { /// ``` pub(crate) struct TerminatorPlacement { scratch: S, - entity_size: InformationRange, + transfer_config: TransferCostConfig, } impl TerminatorPlacement { /// Creates a new placement analyzer. /// - /// The `entity_size` estimate is used when computing transfer costs — it represents the - /// expected size of entity data that may need to cross backend boundaries. + /// The [`TransferCostConfig`] provides size estimates for the variable-cost entity fields + /// (properties, embeddings, provenance). Fixed-size fields (UUIDs, timestamps, scalars) + /// use constants derived from the entity schema. #[inline] #[must_use] - pub(crate) const fn new_in(entity_size: InformationRange, scratch: S) -> Self { + pub(crate) const fn new_in(transfer_config: TransferCostConfig, scratch: S) -> Self { Self { scratch, - entity_size, + transfer_config, } } - fn compute_liveness<'heap>( + fn compute_liveness( &self, - body: &Body<'heap>, - traversals: &Traversals<'heap>, - ) -> BasicBlockVec, &S> { + body: &Body<'_>, + vertex: VertexType, + ) -> BasicBlockVec<(DenseBitSet, TraversalPathBitSet), &S> { let DataflowResults { analysis: _, entry_states: live_in, exit_states: _, - } = TraversalLivenessAnalysis { traversals }.iterate_to_fixpoint_in(body, &self.scratch); + } = TraversalLivenessAnalysis { vertex }.iterate_to_fixpoint_in(body, &self.scratch); live_in } @@ -500,11 +463,11 @@ impl TerminatorPlacement { pub(crate) fn terminator_placement<'heap>( &self, body: &Body<'heap>, + vertex: VertexType, footprint: &BodyFootprint<&'heap Heap>, - traversals: &Traversals<'heap>, targets: &BasicBlockSlice, ) -> TerminatorCostVec { - self.terminator_placement_in(body, footprint, traversals, targets, Global) + self.terminator_placement_in(body, vertex, footprint, targets, Global) } /// Computes transition costs for all terminator edges in `body`. @@ -519,12 +482,12 @@ impl TerminatorPlacement { pub(crate) fn terminator_placement_in<'heap, A: Allocator + Clone>( &self, body: &Body<'heap>, + vertex: VertexType, footprint: &BodyFootprint<&'heap Heap>, - traversals: &Traversals<'heap>, targets: &BasicBlockSlice, alloc: A, ) -> TerminatorCostVec { - let live_in = self.compute_liveness(body, traversals); + let live_in = self.compute_liveness(body, vertex); let scc = self.compute_scc(body); let mut output = TerminatorCostVec::new(&body.basic_blocks, alloc); @@ -562,18 +525,21 @@ impl TerminatorPlacement { /// Computes the cost of transferring live data across an edge to `successor`. /// - /// The cost is the sum of estimated sizes for all locals that are: - /// - Live at the successor's entry - /// - Passed as parameters to the successor block + /// The cost has two components: + /// - **Local cost**: estimated sizes of all non-vertex locals that are live at the successor's + /// entry or passed as block parameters. + /// - **Path cost**: estimated sizes of all live entity field paths, computed from per-path + /// transfer sizes rather than the monolithic entity size. fn compute_transfer_cost( &self, required_locals: &mut DenseBitSet, body: &Body, footprint: &BodyFootprint<&Heap>, - live_in: &BasicBlockSlice>, + live_in: &BasicBlockSlice<(DenseBitSet, TraversalPathBitSet)>, successor: BasicBlockId, ) -> Cost { - required_locals.clone_from(&live_in[successor]); + let (locals, _) = &live_in[successor]; + required_locals.clone_from(locals); for ¶m in body.basic_blocks[successor].params { required_locals.insert(param); @@ -595,7 +561,10 @@ impl TerminatorPlacement { for local in locals { let Some(size_estimate) = footprint.locals[local].average( - &[InformationRange::zero(), self.entity_size], + &[ + InformationRange::zero(), + self.transfer_config.properties_size, + ], &[Cardinality::one(), Cardinality::one()], ) else { return Cost::MAX; diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index 28b8cd8d898..0b18f2d16d7 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -12,6 +12,7 @@ use hashql_core::{ heap::Heap, id::{Id as _, bit_vec::FiniteBitSet}, pretty::Formatter, + symbol::sym, r#type::{TypeFormatter, TypeFormatterOptions, builder::TypeBuilder, environment::Environment}, }; use hashql_diagnostics::DiagnosticIssues; @@ -22,18 +23,20 @@ use crate::{ body::{ Body, basic_block::{BasicBlockId, BasicBlockSlice}, - local::{Local, LocalVec}, + local::LocalVec, operand::Operand, - place::{FieldIndex, Place, ProjectionKind}, terminator::{GraphRead, GraphReadHead, GraphReadTail, TerminatorKind}, }, builder::{BodyBuilder, body}, context::MirContext, intern::Interner, pass::{ - analysis::size_estimation::{BodyFootprint, Footprint, InformationRange}, - execution::target::{TargetBitSet, TargetId}, - transform::Traversals, + analysis::size_estimation::{BodyFootprint, Footprint, InformationRange, InformationUnit}, + execution::{ + VertexType, + target::{TargetBitSet, TargetId}, + traversal::TransferCostConfig, + }, }, pretty::TextFormatOptions, }; @@ -50,7 +53,7 @@ fn target_set(targets: &[TargetId]) -> TargetBitSet { #[expect(clippy::cast_possible_truncation)] fn all_targets() -> TargetBitSet { let mut set = FiniteBitSet::new_empty(TargetId::VARIANT_COUNT as u32); - set.insert_range(TargetId::MIN..=TargetId::MAX); + set.insert_range(TargetId::MIN..=TargetId::MAX, TargetId::VARIANT_COUNT); set } @@ -84,10 +87,6 @@ fn make_full_footprint<'heap>(body: &Body<'heap>, heap: &'heap Heap) -> BodyFoot } } -fn empty_traversals<'heap>(body: &Body<'heap>, heap: &'heap Heap) -> Traversals<'heap> { - Traversals::with_capacity_in(Local::new(0), body.local_decls.len(), heap) -} - fn assert_snapshot<'heap>( name: &'static str, context: &MirContext<'_, 'heap>, @@ -127,7 +126,7 @@ fn format_edge_summary( edges: &TerminatorCostVec, ) -> impl Display + '_ { fmt::from_fn(move |fmt| { - for block in 0..(edges.offsets.len() - 1) { + for block in 0..edges.block_count() { let block_id = BasicBlockId::from_usize(block); let matrices = edges.of(block_id); writeln!(fmt, "{block_id}:")?; @@ -191,8 +190,8 @@ fn goto_allows_cross_backend_non_postgres() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], param: Int; bb0() { goto bb1(1); @@ -208,12 +207,12 @@ fn goto_allows_cross_backend_non_postgres() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -230,8 +229,8 @@ fn switchint_blocks_cross_backend() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int, param: Int, result: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int, param: Int, result: Int; bb0() { selector = load 1; @@ -253,12 +252,12 @@ fn switchint_blocks_cross_backend() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -276,8 +275,8 @@ fn switchint_edge_targets_are_branch_specific() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int; bb0() { selector = load 1; @@ -298,12 +297,12 @@ fn switchint_edge_targets_are_branch_specific() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -382,12 +381,12 @@ fn graphread_interpreter_only() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -407,8 +406,8 @@ fn postgres_incoming_removed() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { goto bb1(); @@ -422,12 +421,12 @@ fn postgres_incoming_removed() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -446,8 +445,8 @@ fn postgres_removed_in_loops() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { value = load 0; @@ -461,12 +460,12 @@ fn postgres_removed_in_loops() { let targets = [all_targets(), all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -485,8 +484,8 @@ fn postgres_removed_in_self_loops() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl value: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], value: Int; bb0() { value = load 0; @@ -497,12 +496,12 @@ fn postgres_removed_in_self_loops() { let targets = [all_targets()]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -521,8 +520,8 @@ fn transfer_cost_counts_live_and_params() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl live: Int, param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], live: Int, param: Int; bb0() { live = load 10; @@ -543,12 +542,12 @@ fn transfer_cost_counts_live_and_params() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); @@ -560,111 +559,99 @@ fn transfer_cost_counts_live_and_params() { } #[test] -fn traversal_assignment_skips_source_transfer_cost() { +fn transfer_cost_is_max_for_unbounded() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl source: (Int, Int), dest: Int; - @proj source_0 = source.0: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], arg: [List Int], param: [List Int]; bb0() { - goto bb1(); + arg = list 1, 2; + if true then bb1(arg) else bb2(); }, - bb1() { - dest = load source_0; + bb1(param) { + return 0; + }, + bb2() { return 0; } }); - // _0 = source, _1 = dest - let source = Local::new(0); - let dest = Local::new(1); - - let mut traversals = Traversals::with_capacity_in(source, body.local_decls.len(), &heap); - traversals.insert( - dest, - Place::local(source).project( - &interner, - TypeBuilder::synthetic(&env).integer(), - ProjectionKind::Field(FieldIndex::new(0)), - ), - ); - let targets = [ target_set(&[TargetId::Interpreter, TargetId::Postgres]), target_set(&[TargetId::Interpreter, TargetId::Postgres]), + target_set(&[TargetId::Interpreter, TargetId::Postgres]), ]; - let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversal_costs = placement.terminator_placement( - &body, - &footprint, - &traversals, - build_targets(&body, &targets), - ); - let standard_costs = placement.terminator_placement( + let footprint = make_full_footprint(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); + let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &empty_traversals(&body, &heap), build_targets(&body, &targets), ); - let traversal_matrix = traversal_costs.of(BasicBlockId::new(0))[0]; - let standard_matrix = standard_costs.of(BasicBlockId::new(0))[0]; - assert_eq!( - traversal_matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(0)) - ); + let matrix = costs.of(BasicBlockId::new(0))[1]; assert_eq!( - standard_matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(1)) + matrix.get(TargetId::Postgres, TargetId::Interpreter), + Some(Cost::MAX) ); } +/// Edge transfer cost only accounts for live locals; path costs are charged at block level. +/// +/// A scalar local (`live`) costs 1. Entity paths (`ProvenanceEdition`, `Properties`) are live +/// in bb1 but do not contribute to edge transfer cost (path costs moved to `BasicBlockCostVec`). #[test] -fn transfer_cost_is_max_for_unbounded() { +fn transfer_cost_from_live_locals() { let heap = Heap::new(); let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl arg: [List Int], param: [List Int]; + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], live: Int, val1: ?, val2: ?; + @proj metadata = vertex.metadata: ?, + prov = metadata.provenance: ?, + edition = prov.edition: ?, + props = vertex.properties: ?; bb0() { - arg = list 1, 2; - if true then bb1(arg) else bb2(); - }, - bb1(param) { - return 0; + live = load 42; + goto bb1(); }, - bb2() { - return 0; + bb1() { + val1 = load edition; + val2 = load props; + return live; } }); let targets = [ target_set(&[TargetId::Interpreter, TargetId::Postgres]), target_set(&[TargetId::Interpreter, TargetId::Postgres]), - target_set(&[TargetId::Interpreter, TargetId::Postgres]), ]; - let footprint = make_full_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let footprint = make_scalar_footprint(&body, &heap); + let placement = TerminatorPlacement::new_in( + TransferCostConfig::new(InformationRange::value(InformationUnit::new(10))), + Global, + ); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); - let matrix = costs.of(BasicBlockId::new(0))[1]; + // local_cost: `live` scalar = 1 + let matrix = costs.of(BasicBlockId::new(0))[0]; assert_eq!( matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(Cost::MAX) + Some(cost!(1)) ); } @@ -674,8 +661,8 @@ fn terminator_placement_snapshot() { let interner = Interner::new(&heap); let env = Environment::new(&heap); - let body = body!(interner, env; fn@0/0 -> Int { - decl selector: Int, live: Int, param: Int; + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (), vertex: [Opaque sym::path::Entity; ?], selector: Int, live: Int, param: Int; bb0() { live = load 10; @@ -697,12 +684,12 @@ fn terminator_placement_snapshot() { ]; let footprint = make_scalar_footprint(&body, &heap); - let placement = TerminatorPlacement::new_in(InformationRange::zero(), Global); - let traversals = empty_traversals(&body, &heap); + let placement = + TerminatorPlacement::new_in(TransferCostConfig::new(InformationRange::zero()), Global); let costs = placement.terminator_placement( &body, + VertexType::Entity, &footprint, - &traversals, build_targets(&body, &targets), ); diff --git a/libs/@local/hashql/mir/src/pass/execution/tests.rs b/libs/@local/hashql/mir/src/pass/execution/tests.rs index 36300a48500..998891df660 100644 --- a/libs/@local/hashql/mir/src/pass/execution/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/tests.rs @@ -21,10 +21,9 @@ use crate::{ def::{DefId, DefIdSlice}, intern::Interner, pass::{ - Changed, GlobalAnalysisPass as _, TransformPass as _, + GlobalAnalysisPass as _, analysis::size_estimation::SizeEstimationAnalysis, execution::{ExecutionAnalysis, island::IslandVec, target::TargetId}, - transform::TraversalExtraction, }, }; @@ -66,7 +65,7 @@ fn assert_execution<'heap>( assert_snapshot!(name, output); } -/// Runs `TraversalExtraction` and `SizeEstimationAnalysis`, then `ExecutionAnalysis`. +/// Runs `SizeEstimationAnalysis`, then `ExecutionAnalysis`. #[track_caller] fn run_execution<'heap>( context: &mut MirContext<'_, 'heap>, @@ -75,22 +74,12 @@ fn run_execution<'heap>( BasicBlockVec, IslandVec, ) { - let mut extraction = TraversalExtraction::new_in(Global); - let _: Changed = extraction.run(context, body); - let traversals = extraction - .take_traversals() - .expect("expected GraphReadFilter body"); - - let traversals = [Some(traversals)]; - let traversals_slice = DefIdSlice::from_raw(&traversals); - let mut size_analysis = SizeEstimationAnalysis::new_in(Global); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(body))); let footprints = size_analysis.finish(); let mut scratch = Scratch::new(); let analysis = ExecutionAnalysis { - traversals: traversals_slice, footprints: &footprints, scratch: &mut scratch, }; diff --git a/libs/@local/hashql/mir/src/pass/execution/storage/access.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/access.rs similarity index 100% rename from libs/@local/hashql/mir/src/pass/execution/storage/access.rs rename to libs/@local/hashql/mir/src/pass/execution/traversal/access.rs diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs new file mode 100644 index 00000000000..fbfb0a1b0b6 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/mod.rs @@ -0,0 +1,83 @@ +#[cfg(test)] +mod tests; + +use super::TraversalPath; +use crate::{ + body::{ + local::Local, + location::Location, + place::{DefUse, Place, PlaceContext}, + }, + pass::execution::{VertexType, traversal::EntityPath}, + visit::{self, Visitor}, +}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) enum TraversalResult { + Path(TraversalPath), + Complete, +} + +// TODO: Each consumer (statement placement per target, island placement) resolves traversal paths +// independently. Consider caching resolved paths per body to avoid redundant work. +// See: https://linear.app/hash/issue/BE-435 +pub(crate) struct TraversalAnalysisVisitor { + vertex: VertexType, + on_traversal: F, +} + +impl TraversalAnalysisVisitor { + pub(crate) const fn new(vertex: VertexType, on_traversal: F) -> Self + where + F: FnMut(Location, TraversalResult), + { + Self { + vertex, + on_traversal, + } + } +} + +impl<'heap, F> Visitor<'heap> for TraversalAnalysisVisitor +where + F: FnMut(Location, TraversalResult), +{ + type Result = Result<(), !>; + + fn visit_place( + &mut self, + location: Location, + context: PlaceContext, + place: &Place<'heap>, + ) -> Self::Result { + if place.local != Local::VERTEX { + // We do not target the vertex itself, so no traversals need to be recorded. + return Ok(()); + } + + if context.into_def_use() != Some(DefUse::Use) { + // We're only interested in `DefUse::Use` + return Ok(()); + } + + match self.vertex { + VertexType::Entity => { + let path = EntityPath::resolve(&place.projections); + + if let Some((path, _)) = path { + (self.on_traversal)( + location, + TraversalResult::Path(TraversalPath::Entity(path)), + ); + } else { + // The path doesn't map to any known storage location (e.g. + // `link_data.*.draft_id` is synthesized, not stored). To use the value at + // runtime we must fully hydrate the entity so the runtime can construct it. + (self.on_traversal)(location, TraversalResult::Complete); + } + } + } + + visit::r#ref::walk_place(self, location, context, place) + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs new file mode 100644 index 00000000000..7bcb3f53e08 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/analysis/tests.rs @@ -0,0 +1,503 @@ +#![expect(clippy::min_ident_chars)] +use hashql_core::{heap::Heap, id::Id as _, symbol::sym, r#type::environment::Environment}; + +use super::{TraversalAnalysisVisitor, TraversalResult}; +use crate::{ + body::{Body, basic_block::BasicBlockId, location::Location}, + builder::body, + intern::Interner, + pass::execution::{ + VertexType, + traversal::{EntityPath, TraversalPathBitSet}, + }, + visit::Visitor as _, +}; + +struct TestTraversals(Vec>); + +impl core::ops::Index for TestTraversals { + type Output = TraversalPathBitSet; + + fn index(&self, index: Location) -> &TraversalPathBitSet { + &self.0[index.block.as_usize()][index.statement_index - 1] + } +} + +fn analyze(body: &Body<'_>) -> TestTraversals { + let vertex = VertexType::Entity; + let mut result: Vec> = body + .basic_blocks + .iter() + .map(|block| vec![TraversalPathBitSet::empty(vertex); block.statements.len() + 1]) + .collect(); + + let mut visitor = TraversalAnalysisVisitor::new(vertex, |location: Location, trav_result| { + let entry = &mut result[location.block.as_usize()][location.statement_index - 1]; + match trav_result { + TraversalResult::Path(path) => entry.insert(path), + TraversalResult::Complete => entry.insert_all(), + } + }); + let Ok(()) = visitor.visit_body(body); + + TestTraversals(result) +} + +fn location(block: usize, statement_index: usize) -> Location { + Location { + block: BasicBlockId::from_usize(block), + statement_index, + } +} + +/// Accessing `_1.properties` records `{Properties}` at the statement. +#[test] +fn single_leaf_path() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], props: ?; + @proj properties = vertex.properties: ?; + + bb0() { + props = load properties; + return props; + } + }); + + let traversals = analyze(&body); + + // statement 0: props = load _1.properties + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Properties)); + assert_eq!(stmt.len(), 1); + + // terminator: return props (not a vertex access) + let term = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.is_empty()); +} + +/// Chained projections `_1.metadata.archived` resolve to `{Archived}`. +#[test] +fn multi_segment_path() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: Bool; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + val = load archived; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Archived)); + assert_eq!(stmt.len(), 1); +} + +/// Bare vertex access (`load _1`) sets all bits via `insert_all`. +#[test] +fn bare_vertex_sets_all_bits() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + + bb0() { + val = load vertex; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + // Composites present, their children subsumed + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::Vectors)); + assert!(stmt.contains(EntityPath::RecordId)); + assert!(stmt.contains(EntityPath::TemporalVersioning)); + assert!(!stmt.contains(EntityPath::EntityId)); + assert!(!stmt.contains(EntityPath::WebId)); + assert!(!stmt.contains(EntityPath::DecisionTime)); + // 25 variants - 7 children = 18 top-level paths + assert_eq!(stmt.len(), 18); +} + +/// A tuple referencing two vertex projections records both paths at one location. +#[test] +fn multiple_paths_same_statement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, Bool) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, Bool); + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; + + bb0() { + result = tuple properties, archived; + return result; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::Archived)); + assert_eq!(stmt.len(), 2); +} + +/// Returning a vertex projection place records the path at the terminator position. +#[test] +fn terminator_vertex_access() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), vertex: [Opaque sym::path::Entity; ?]; + @proj metadata = vertex.metadata: ?, archived = metadata.archived: Bool; + + bb0() { + return archived; + } + }); + + let traversals = analyze(&body); + + // 0 statements, terminator at index 1 + let term = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.contains(EntityPath::Archived)); + assert_eq!(term.len(), 1); +} + +/// Accessing env fields (non-vertex local) produces no traversal entries. +#[test] +fn non_vertex_access_ignored() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { + decl env: (Int), vertex: [Opaque sym::path::Entity; ?], val: Int; + @proj env_0 = env.0: Int; + + bb0() { + val = load env_0; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.is_empty()); + + let term = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(term.is_empty()); +} + +/// Composite path `_1.metadata.record_id` records `{RecordId}`, not individual children. +#[test] +fn composite_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?; + + bb0() { + val = load record_id; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::RecordId)); + assert_eq!(stmt.len(), 1); +} + +/// Embedding path `_1.encodings.vectors` records `{Vectors}`. +#[test] +fn embedding_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj encodings = vertex.encodings: ?, + vectors = encodings.vectors: ?; + + bb0() { + val = load vectors; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::Vectors)); + assert_eq!(stmt.len(), 1); +} + +/// Vertex accesses in different blocks are recorded at the correct locations. +#[test] +fn paths_across_blocks() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + props: ?, val: Bool, cond: Bool; + @proj properties = vertex.properties: ?, + metadata = vertex.metadata: ?, + archived = metadata.archived: Bool; + + bb0() { + props = load properties; + cond = load true; + if cond then bb1() else bb2(); + }, + bb1() { + val = load archived; + return val; + }, + bb2() { + return cond; + } + }); + + let traversals = analyze(&body); + + // bb0[0]: props = load _1.properties + let bb0_s0 = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb0_s0.contains(EntityPath::Properties)); + assert_eq!(bb0_s0.len(), 1); + + // bb0[1]: cond = load true (no vertex access) + let bb0_s1 = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb0_s1.is_empty()); + + // bb1[0]: val = load _1.metadata.archived + let bb1_s0 = traversals[location(1, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb1_s0.contains(EntityPath::Archived)); + assert_eq!(bb1_s0.len(), 1); + + // bb2 terminator: return cond (no vertex access) + let bb2_term = traversals[location(2, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(bb2_term.is_empty()); +} + +/// Each statement records paths independently; no cross-statement interaction. +/// +/// A statement loading `_1.metadata.record_id` followed by one loading +/// `_1.metadata.record_id.entity_id.web_id`: the first records `{RecordId}`, +/// the second records `{WebId}`. Swallowing only applies within a single statement. +#[test] +fn paths_recorded_independently_per_statement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + rid: ?, wid: ?; + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; + + bb0() { + rid = load record_id; + wid = load web_id; + return rid; + } + }); + + let traversals = analyze(&body); + + // Each statement records independently + let stmt0 = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt0.contains(EntityPath::RecordId)); + assert_eq!(stmt0.len(), 1); + + let stmt1 = traversals[location(0, 2)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt1.contains(EntityPath::WebId)); + assert_eq!(stmt1.len(), 1); +} + +/// An unresolvable vertex projection (e.g., `_1.unknown`) triggers `insert_all`. +/// +/// When `EntityPath::resolve` returns `None`, the analysis conservatively assumes the +/// entire entity is needed and sets all bits. +#[test] +fn unresolvable_projection_sets_all_bits() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj unknown = vertex.unknown: ?; + + bb0() { + val = load unknown; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + // Unresolvable path → insert_all → 25 variants - 7 children = 18 + assert_eq!(stmt.len(), 18); + assert!(stmt.contains(EntityPath::Properties)); + assert!(stmt.contains(EntityPath::RecordId)); +} + +/// `link_data.left_entity_id.web_id` resolves to `{LeftEntityWebId}`. +#[test] +fn link_data_path_recorded() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], val: ?; + @proj link_data = vertex.link_data: ?, + left_entity_id = link_data.left_entity_id: ?, + web_id = left_entity_id.web_id: ?; + + bb0() { + val = load web_id; + return val; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::LeftEntityWebId)); + assert_eq!(stmt.len(), 1); +} + +/// `TemporalVersioning` composite swallowing works end-to-end through analysis. +/// +/// A tuple referencing both `_1.metadata.temporal_versioning.decision_time` and +/// `_1.metadata.temporal_versioning`: `TemporalVersioning` swallows `DecisionTime`. +#[test] +fn temporal_versioning_swallowing_through_analysis() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + temporal_versioning = metadata.temporal_versioning: ?, + decision_time = temporal_versioning.decision_time: ?; + + bb0() { + result = tuple decision_time, temporal_versioning; + return result; + } + }); + + let traversals = analyze(&body); + + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::TemporalVersioning)); + assert!(!stmt.contains(EntityPath::DecisionTime)); + assert_eq!(stmt.len(), 1); +} + +/// Within a single statement, inserting a composite after its child swallows the child. +#[test] +fn swallowing_within_statement() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> (?, ?) { + decl env: (), vertex: [Opaque sym::path::Entity; ?], result: (?, ?); + @proj metadata = vertex.metadata: ?, + record_id = metadata.record_id: ?, + entity_id = record_id.entity_id: ?, + web_id = entity_id.web_id: ?; + + bb0() { + result = tuple web_id, record_id; + return result; + } + }); + + let traversals = analyze(&body); + + // Both operands reference _1. WebId is inserted first, then RecordId swallows it. + let stmt = traversals[location(0, 1)] + .as_entity() + .expect("should be an entity path bitset"); + assert!(stmt.contains(EntityPath::RecordId)); + assert!(!stmt.contains(EntityPath::WebId)); + assert_eq!(stmt.len(), 1); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs new file mode 100644 index 00000000000..f184abf0f0b --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -0,0 +1,651 @@ +use core::{debug_assert_matches, num::NonZero, ops::Bound}; + +use hashql_core::{ + id::{ + Id, + bit_vec::{BitRelations as _, FiniteBitSet}, + }, + symbol::{ConstantSymbol, sym}, +}; + +use super::{ + TraversalLattice, VertexType, + access::{Access, AccessMode}, +}; +use crate::{ + body::place::{Projection, ProjectionKind}, + pass::{ + analysis::{ + dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + size_estimation::{InformationRange, InformationUnit}, + }, + execution::target::{TargetArray, TargetBitSet, TargetId}, + }, +}; + +macro_rules! sym { + ($($sym:tt)::*) => { + sym::$($sym)::*::CONST + }; +} + +/// Resolved entity field path. +/// +/// Each variant identifies a specific storage location in the entity schema. Consumers can +/// exhaustively match on this to generate backend-specific access (SQL expressions, placement +/// decisions, etc.) without duplicating path resolution logic. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Id)] +#[id(const)] +pub enum EntityPath { + /// `properties.*` — JSONB column in `entity_editions`. + Properties, + /// `encodings.vectors` — embedding backend. + Vectors, + + /// `metadata.record_id` — composite of [`EntityId`] + [`EditionId`]. + /// + /// [`EntityId`]: Self::EntityId + /// [`EditionId`]: Self::EditionId + RecordId, + /// `metadata.record_id.entity_id` — composite of `web_id` + `entity_uuid` + `draft_id`. + EntityId, + /// `metadata.record_id.entity_id.web_id` — `entity_temporal_metadata.web_id`. + WebId, + /// `metadata.record_id.entity_id.entity_uuid` — `entity_temporal_metadata.entity_uuid`. + EntityUuid, + /// `metadata.record_id.entity_id.draft_id` — `entity_temporal_metadata.draft_id`. + DraftId, + /// `metadata.record_id.edition_id` — `entity_temporal_metadata.entity_edition_id`. + EditionId, + + /// `metadata.temporal_versioning` — composite of [`DecisionTime`] + [`TransactionTime`]. + /// + /// [`DecisionTime`]: Self::DecisionTime + /// [`TransactionTime`]: Self::TransactionTime + TemporalVersioning, + /// `metadata.temporal_versioning.decision_time` — `entity_temporal_metadata.decision_time`. + DecisionTime, + /// `metadata.temporal_versioning.transaction_time` — + /// `entity_temporal_metadata.transaction_time`. + TransactionTime, + + /// `metadata.entity_type_ids` — `entity_is_of_type` table (via JOIN). + EntityTypeIds, + /// `metadata.archived` — `entity_editions.archived`. + Archived, + /// `metadata.confidence` — `entity_editions.confidence`. + Confidence, + + /// `metadata.provenance.inferred` — JSONB in `entity_ids.provenance`. + ProvenanceInferred, + /// `metadata.provenance.edition` — JSONB in `entity_editions.provenance`. + ProvenanceEdition, + /// `metadata.properties.*` — JSONB (`property_metadata`) in `entity_editions`. + PropertyMetadata, + + /// `link_data.left_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_left_entity`). + LeftEntityWebId, + /// `link_data.left_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_left_entity`). + LeftEntityUuid, + /// `link_data.right_entity_id.web_id` — `entity_edge.target_web_id` (via + /// `entity_has_right_entity`). + RightEntityWebId, + /// `link_data.right_entity_id.entity_uuid` — `entity_edge.target_entity_uuid` (via + /// `entity_has_right_entity`). + RightEntityUuid, + /// `link_data.left_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_left_entity`). + LeftEntityConfidence, + /// `link_data.right_entity_confidence` — `entity_edge.confidence` (via + /// `entity_has_right_entity`). + RightEntityConfidence, + /// `link_data.left_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_left_entity`). + LeftEntityProvenance, + /// `link_data.right_entity_provenance` — JSONB in `entity_edge.provenance` (via + /// `entity_has_right_entity`). + RightEntityProvenance, +} + +/// Configuration for entity field transfer cost estimation. +/// +/// Separates the variable-size components (properties, embeddings, provenance) from the +/// fixed-size schema fields. The fixed costs (UUIDs, timestamps, scalars) are constants on +/// [`EntityPath::estimate_size`]; this config provides the values that vary per entity type +/// or deployment. +#[derive(Debug, Copy, Clone)] +pub(crate) struct TransferCostConfig { + /// Size of the entity's properties (the `T` parameter in `Entity`). + pub properties_size: InformationRange, + /// Size of a single embedding vector. + pub embedding_size: InformationRange, + /// Size of `EntityEditionProvenance` JSONB (`entity_editions.provenance`). + /// + /// Variable structure: `created_by_id` + optional `archived_by_id` + `actor_type` + + /// `OriginProvenance` (tag + optional strings) + `Vec` (typically 0-2 + /// items, each with optional entity ID, authors, location, and timestamps). + pub edition_provenance_size: InformationRange, + /// Size of `PropertyProvenance` JSONB on entity edges (`entity_edge.provenance`). + /// + /// Just `Vec`. Incoming edges are always empty; outgoing edges + /// carry the caller-provided provenance, typically 0-1 sources. + pub edge_provenance_size: InformationRange, + /// Divisor for estimating property metadata size from properties size. + /// + /// Property metadata stores per-key metadata (confidence, provenance) rather than values, + /// so it is lighter than properties. The estimate is `properties_size / divisor`. + /// + /// This is a placeholder until the confirmed entity type set is available, at which point + /// the metadata size can be computed directly from the property key count. + pub property_metadata_divisor: NonZero, + /// Multiplier for the cost of transferring an entity to a target. + /// + /// For example, if the multiplier for Postgres is 2, then transferring an entity to Postgres + /// costs twice as much as transferring it to the interpreter. + pub target_multiplier: TargetArray>, +} + +impl TransferCostConfig { + /// Creates a config with the current HASH schema defaults. + /// + /// Uses the known embedding dimension (`vector(3072)`) and a metadata-to-properties ratio + /// of 1:4. Provenance sizes are derived from the actual JSONB structures stored by the + /// graph service. Only `properties_size` varies per entity type. + #[must_use] + pub(crate) const fn new(properties_size: InformationRange) -> Self { + Self { + properties_size, + embedding_size: InformationRange::value(InformationUnit::new(3072)), + edition_provenance_size: InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(20)), + ), + edge_provenance_size: InformationRange::new( + InformationUnit::new(0), + Bound::Included(InformationUnit::new(10)), + ), + property_metadata_divisor: NonZero::new(4).expect("infallible"), + target_multiplier: TargetArray::from_raw([NonZero::new(1).expect("infallible"); _]), + } + } +} + +type FiniteBitSetWidth = u32; +const _: () = { + assert!( + (FiniteBitSetWidth::BITS as usize) >= core::mem::variant_count::(), + "entity path count exceeds finite bitset width" + ); +}; + +impl EntityPath { + #[must_use] + pub fn resolve(projections: &[Projection<'_>]) -> Option<(Self, usize)> { + resolve(projections) + } + + /// Returns the set of execution targets that natively serve this path. + pub(crate) const fn origin(self) -> TargetBitSet { + let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); + + match self.access() { + Access::Postgres(_) => set.insert(TargetId::Postgres), + Access::Embedding(_) => set.insert(TargetId::Embedding), + } + + set + } + + /// Returns the backend access mode for this path. + pub(crate) const fn access(self) -> Access { + match self { + Self::Vectors => Access::Embedding(AccessMode::Direct), + + Self::RecordId | Self::EntityId | Self::TemporalVersioning => { + Access::Postgres(AccessMode::Composite) + } + + Self::Properties + | Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::DecisionTime + | Self::TransactionTime + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => Access::Postgres(AccessMode::Direct), + } + } + + /// Returns the transitive children of this path in the composite hierarchy. + /// + /// Composites cover their children: [`RecordId`](Self::RecordId) covers + /// [`EntityId`](Self::EntityId) and all of its children, plus [`EditionId`](Self::EditionId). + /// Leaf paths return an empty slice. + const fn children(self) -> &'static [Self] { + match self { + Self::RecordId => &[ + Self::EntityId, + Self::WebId, + Self::EntityUuid, + Self::DraftId, + Self::EditionId, + ], + Self::EntityId => &[Self::WebId, Self::EntityUuid, Self::DraftId], + Self::TemporalVersioning => &[Self::DecisionTime, Self::TransactionTime], + Self::Properties + | Self::Vectors + | Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::DecisionTime + | Self::TransactionTime + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => &[], + } + } + + /// Returns the ancestor composites of this path, nearest first. + /// + /// For example, [`WebId`](Self::WebId) has ancestors + /// [`EntityId`](Self::EntityId) and [`RecordId`](Self::RecordId). + /// Top-level paths return an empty slice. + pub(crate) const fn ancestors(self) -> &'static [Self] { + match self { + Self::WebId | Self::EntityUuid | Self::DraftId => &[Self::EntityId, Self::RecordId], + Self::EntityId | Self::EditionId => &[Self::RecordId], + Self::DecisionTime | Self::TransactionTime => &[Self::TemporalVersioning], + Self::Properties + | Self::Vectors + | Self::RecordId + | Self::TemporalVersioning + | Self::EntityTypeIds + | Self::Archived + | Self::Confidence + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid + | Self::LeftEntityConfidence + | Self::RightEntityConfidence + | Self::LeftEntityProvenance + | Self::RightEntityProvenance => &[], + } + } + + /// Returns the estimated transfer size for this path in information units. + /// + /// Fixed-size fields (UUIDs, timestamps, scalars) return known constants derived from the + /// entity schema. [`Properties`](Self::Properties) depends on the entity's type parameter. + /// [`PropertyMetadata`](Self::PropertyMetadata) is estimated at 1/4 of properties size, + /// since it stores lightweight per-property-key metadata rather than values. + pub(crate) fn estimate_size(self, config: &TransferCostConfig) -> InformationRange { + #[expect(clippy::match_same_arms, reason = "readability")] + #[expect(clippy::integer_division)] + match self { + Self::Properties => config.properties_size, + Self::PropertyMetadata => { + let divisor = config.property_metadata_divisor; + let min = InformationUnit::new(config.properties_size.min().as_u32() / divisor); + config.properties_size.inclusive_max().map_or_else( + || InformationRange::new(min, Bound::Unbounded), + |max| { + InformationRange::new( + min, + Bound::Included(InformationUnit::new(max.as_u32() / divisor)), + ) + }, + ) + } + + Self::Vectors => config.embedding_size, + + // Composites: sum of leaf children + Self::RecordId => InformationRange::value(InformationUnit::new(4)), + Self::EntityId => InformationRange::value(InformationUnit::new(3)), + Self::TemporalVersioning => InformationRange::value(InformationUnit::new(4)), + + // UUID fields + Self::WebId + | Self::EntityUuid + | Self::DraftId + | Self::EditionId + | Self::LeftEntityWebId + | Self::LeftEntityUuid + | Self::RightEntityWebId + | Self::RightEntityUuid => InformationRange::one(), + + // Temporal intervals (start + end timestamps) + Self::DecisionTime | Self::TransactionTime => { + InformationRange::value(InformationUnit::new(2)) + } + + // Type ID list (variable length, at least one type) + Self::EntityTypeIds => InformationRange::new(InformationUnit::new(1), Bound::Unbounded), + + // Scalar metadata + Self::Archived + | Self::Confidence + | Self::LeftEntityConfidence + | Self::RightEntityConfidence => InformationRange::one(), + + // Provenance: inferred is a fixed structure (3 required + 2 optional scalars) + Self::ProvenanceInferred => InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(5)), + ), + // Provenance: edition and edge have Vec, sized from config + Self::ProvenanceEdition => config.edition_provenance_size, + Self::LeftEntityProvenance | Self::RightEntityProvenance => config.edge_provenance_size, + } + } + + const fn is_jsonb(self) -> bool { + matches!( + self, + Self::Properties + | Self::ProvenanceInferred + | Self::ProvenanceEdition + | Self::PropertyMetadata + | Self::LeftEntityProvenance + | Self::RightEntityProvenance + ) + } +} + +const HAS_ANCESTORS: [EntityPath; HAS_ANCESTOR_COUNT] = { + let mut out = [EntityPath::Archived; HAS_ANCESTOR_COUNT]; + + let mut index = 0; + let mut ptr = 0; + let paths = EntityPath::all(); + + while ptr < paths.len() { + if !paths[ptr].ancestors().is_empty() { + out[index] = paths[ptr]; + index += 1; + } + + ptr += 1; + } + + out +}; +const HAS_ANCESTOR_COUNT: usize = { + let mut count = 0; + let mut index = 0; + let paths = EntityPath::all(); + + while index < paths.len() { + if !paths[index].ancestors().is_empty() { + count += 1; + } + + index += 1; + } + + count +}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct EntityPathBitSet(FiniteBitSet); + +impl EntityPathBitSet { + const BOTTOM: Self = Self::new_empty(); + #[expect(clippy::cast_possible_truncation)] + const TOP: Self = { + let mut set = FiniteBitSet::new_empty(core::mem::variant_count::() as u32); + + set.insert_range(.., core::mem::variant_count::()); + + let mut index = 0; + while index < HAS_ANCESTOR_COUNT { + set.remove(HAS_ANCESTORS[index]); + index += 1; + } + + Self(set) + }; + + #[expect(clippy::cast_possible_truncation)] + #[must_use] + pub const fn new_empty() -> Self { + Self(FiniteBitSet::new_empty( + core::mem::variant_count::() as u32, + )) + } + + /// Inserts this path into `bitset` with composite swallowing. + /// + /// If an ancestor composite is already present, the insertion is a no-op (the ancestor + /// already implies this path). If this path is a composite, any children already in the + /// set are removed (the composite subsumes them). + pub(crate) fn insert(&mut self, path: EntityPath) { + for &ancestor in path.ancestors() { + if self.0.contains(ancestor) { + return; + } + } + + self.0.insert(path); + + for &child in path.children() { + self.0.remove(child); + } + } + + fn normalize(&mut self) { + for path in &self.0 { + for &ancestor in path.ancestors() { + if self.0.contains(ancestor) { + self.0.remove(path); + } + } + } + } + + #[inline] + pub(crate) const fn insert_all(&mut self) { + *self = Self::TOP; + } +} + +impl HasTop for TraversalLattice { + fn top(&self) -> EntityPathBitSet { + debug_assert_matches!(self.vertex(), VertexType::Entity); + EntityPathBitSet::TOP + } + + fn is_top(&self, value: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); + *value == EntityPathBitSet::TOP + } +} + +impl HasBottom for TraversalLattice { + fn bottom(&self) -> EntityPathBitSet { + debug_assert_matches!(self.vertex(), VertexType::Entity); + EntityPathBitSet::BOTTOM + } + + fn is_bottom(&self, value: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); + *value == EntityPathBitSet::BOTTOM + } +} + +impl JoinSemiLattice for TraversalLattice { + fn join(&self, lhs: &mut EntityPathBitSet, rhs: &EntityPathBitSet) -> bool { + debug_assert_matches!(self.vertex(), VertexType::Entity); + + let mut new = *lhs; + + new.0.union(&rhs.0); + new.normalize(); + + let has_changed = new != *lhs; + *lhs = new; + has_changed + } + + fn join_owned(&self, mut lhs: EntityPathBitSet, rhs: &EntityPathBitSet) -> EntityPathBitSet + where + EntityPathBitSet: Sized, + { + debug_assert_matches!(self.vertex(), VertexType::Entity); + + lhs.0.union(&rhs.0); + lhs.normalize(); + + lhs + } +} + +impl const core::ops::Deref for EntityPathBitSet { + type Target = FiniteBitSet; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[inline] +fn project(projections: &[Projection<'_>], index: &mut usize) -> Option { + let projection = projections.get(*index).and_then(|projection| { + if let ProjectionKind::FieldByName(name) = projection.kind { + name.as_constant() + } else { + None + } + }); + + if projection.is_some() { + *index += 1; + } + + projection +} + +/// Resolves an entity field path to an [`EntityPath`]. +/// +/// Walks a sequence of field name projections through the entity schema and returns the resolved +/// path, or `None` if the path doesn't map to any known storage location (including synthesized +/// fields like `link_data.*.draft_id`). +#[expect(clippy::match_same_arms, clippy::allow_attributes)] +fn resolve(projections: &[Projection<'_>]) -> Option<(EntityPath, usize)> { + #[allow(clippy::enum_glob_use, reason = "clarity")] + use EntityPath::*; + + let mut index = 0; + + macro_rules! next { + () => { + project(projections, &mut index) + }; + + (else $cond:expr) => {{ + if index >= projections.len() { + return Some(($cond, index)); + } + + next!()? + }}; + } + + let path = match next!()? { + // entity_editions.properties (JSONB) + sym!(properties) => Properties, + sym!(encodings) => match next!()? { + sym!(vectors) => Vectors, + _ => return None, + }, + sym!(metadata) => match next!()? { + sym!(record_id) => match next!(else RecordId) { + sym!(entity_id) => match next!(else EntityId) { + sym!(web_id) => WebId, + sym!(entity_uuid) => EntityUuid, + sym!(draft_id) => DraftId, + _ => return None, + }, + sym!(edition_id) => EditionId, + _ => return None, + }, + sym!(temporal_versioning) => match next!(else TemporalVersioning) { + sym!(decision_time) => DecisionTime, + sym!(transaction_time) => TransactionTime, + _ => return None, + }, + sym!(entity_type_ids) => EntityTypeIds, + sym!(archived) => Archived, + sym!(confidence) => Confidence, + sym!(provenance) => match next!()? { + sym!(inferred) => ProvenanceInferred, + sym!(edition) => ProvenanceEdition, + _ => return None, + }, + sym!(properties) => PropertyMetadata, + _ => return None, + }, + sym!(link_data) => match next!()? { + sym!(left_entity_id) => match next!()? { + sym!(web_id) => LeftEntityWebId, + sym!(entity_uuid) => LeftEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(right_entity_id) => match next!()? { + sym!(web_id) => RightEntityWebId, + sym!(entity_uuid) => RightEntityUuid, + // draft_id is synthesized (always None), not stored + sym!(draft_id) => return None, + _ => return None, + }, + sym!(left_entity_confidence) => LeftEntityConfidence, + sym!(right_entity_confidence) => RightEntityConfidence, + sym!(left_entity_provenance) => LeftEntityProvenance, + sym!(right_entity_provenance) => RightEntityProvenance, + _ => return None, + }, + + _ => return None, + }; + + // JSONB paths allow arbitrary sub-paths; all others must be fully resolved + if !path.is_jsonb() && projections.get(index).is_some() { + return None; + } + + Some((path, index)) +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs new file mode 100644 index 00000000000..809f1b57d53 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -0,0 +1,228 @@ +//! Traversal path resolution, storage mapping, and transfer cost estimation. +//! +//! Maps property access projections on graph vertices to their backend storage locations. +//! Each vertex type has its own path enum ([`EntityPath`] for entities) that resolves +//! dot-notation field accesses to specific columns, JSONB paths, or embedding stores. +//! +//! Each path carries its origin backend (which execution targets serve it natively) and an +//! estimated transfer size used by the cost analysis to charge a transfer premium on targets +//! that are not the natural origin for a path. +//! +//! [`TraversalPathBitSet`] and [`TraversalPath`] wrap the per-vertex-type path types so that +//! the execution pipeline can handle different vertex types uniformly. + +mod access; +mod entity; + +mod analysis; +#[cfg(test)] +mod tests; + +pub(crate) use analysis::{TraversalAnalysisVisitor, TraversalResult}; + +pub use self::entity::{EntityPath, EntityPathBitSet}; +pub(crate) use self::{access::Access, entity::TransferCostConfig}; +use super::{VertexType, target::TargetBitSet}; +use crate::pass::analysis::{ + dataflow::lattice::{HasBottom, HasTop, JoinSemiLattice}, + size_estimation::InformationRange, +}; + +/// Lattice structure for traversal path bitsets. +/// +/// Carries the [`VertexType`] so that [`bottom`](HasBottom::bottom) and [`top`](HasTop::top) +/// construct the correct variant of [`TraversalPathBitSet`]. +#[derive(Debug, Copy, Clone)] +pub struct TraversalLattice { + vertex: VertexType, +} + +impl TraversalLattice { + #[must_use] + pub const fn new(vertex: VertexType) -> Self { + Self { vertex } + } + + #[must_use] + pub const fn vertex(self) -> VertexType { + self.vertex + } +} + +/// Set of resolved traversal paths for a single vertex type. +/// +/// Each variant wraps the bitset for a specific vertex type. A [`GraphReadFilter`] body operates +/// over exactly one vertex type, so all traversal locals within a body share the same variant. +/// +/// An all-bits-set bitset indicates full vertex access is required (the path could not be +/// resolved to a specific field). +/// +/// [`GraphReadFilter`]: crate::body::Source::GraphReadFilter +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TraversalPathBitSet { + /// Paths into the entity schema. + Entity(EntityPathBitSet), +} + +#[expect( + clippy::unnecessary_wraps, + reason = "currently only entities are supported, this will change in the future" +)] +impl TraversalPathBitSet { + /// Creates an empty bitset for the given vertex type. + #[must_use] + pub const fn empty(vertex: VertexType) -> Self { + match vertex { + VertexType::Entity => Self::Entity(EntityPathBitSet::new_empty()), + } + } + + /// Returns the inner [`EntityPathBitSet`] if this is the [`Entity`](Self::Entity) variant. + #[inline] + #[must_use] + pub const fn as_entity(&self) -> Option<&EntityPathBitSet> { + match self { + Self::Entity(bitset) => Some(bitset), + } + } + + /// Returns a mutable reference to the inner [`EntityPathBitSet`] if this is the + /// [`Entity`](Self::Entity) variant. + #[inline] + #[must_use] + pub const fn as_entity_mut(&mut self) -> Option<&mut EntityPathBitSet> { + match self { + Self::Entity(bitset) => Some(bitset), + } + } + + /// Returns `true` if no paths are set. + #[inline] + #[must_use] + pub const fn is_empty(self) -> bool { + match self { + Self::Entity(bitset) => bitset.is_empty(), + } + } + + /// Returns the number of paths set. + #[inline] + #[must_use] + pub fn len(self) -> usize { + match self { + Self::Entity(bitset) => bitset.len(), + } + } + + /// Inserts a resolved path with composite swallowing. + /// + /// If an ancestor composite is already present in the set, the insertion is a no-op. + /// If the path is a composite, any children already in the set are removed. + pub fn insert(&mut self, path: TraversalPath) { + match (self, path) { + (Self::Entity(bitset), TraversalPath::Entity(path)) => bitset.insert(path), + } + } + + /// Inserts all possible paths into the set. + #[inline] + pub const fn insert_all(&mut self) { + match self { + Self::Entity(bitset) => bitset.insert_all(), + } + } + + #[must_use] + #[inline] + pub fn iter(&self) -> impl ExactSizeIterator { + self.into_iter() + } +} + +impl IntoIterator for &TraversalPathBitSet { + type Item = TraversalPath; + + type IntoIter = impl ExactSizeIterator; + + fn into_iter(self) -> Self::IntoIter { + match self { + TraversalPathBitSet::Entity(bitset) => bitset.into_iter().map(TraversalPath::Entity), + } + } +} + +impl HasBottom for TraversalLattice { + fn bottom(&self) -> TraversalPathBitSet { + match self.vertex { + VertexType::Entity => TraversalPathBitSet::Entity(self.bottom()), + } + } + + fn is_bottom(&self, value: &TraversalPathBitSet) -> bool { + match value { + TraversalPathBitSet::Entity(bitset) => self.is_bottom(bitset), + } + } +} + +impl HasTop for TraversalLattice { + fn top(&self) -> TraversalPathBitSet { + match self.vertex { + VertexType::Entity => TraversalPathBitSet::Entity(self.top()), + } + } + + fn is_top(&self, value: &TraversalPathBitSet) -> bool { + match value { + TraversalPathBitSet::Entity(bitset) => self.is_top(bitset), + } + } +} + +impl JoinSemiLattice for TraversalLattice { + fn join(&self, lhs: &mut TraversalPathBitSet, rhs: &TraversalPathBitSet) -> bool { + match (lhs, rhs) { + (TraversalPathBitSet::Entity(lhs), TraversalPathBitSet::Entity(rhs)) => { + self.join(lhs, rhs) + } + } + } + + fn join_owned( + &self, + lhs: TraversalPathBitSet, + rhs: &TraversalPathBitSet, + ) -> TraversalPathBitSet { + match (lhs, rhs) { + (TraversalPathBitSet::Entity(lhs), TraversalPathBitSet::Entity(rhs)) => { + TraversalPathBitSet::Entity(self.join_owned(lhs, rhs)) + } + } + } +} + +/// A single resolved traversal path for a specific vertex type. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TraversalPath { + /// A path into the entity schema. + Entity(EntityPath), +} + +impl TraversalPath { + /// Returns the set of execution targets that natively serve this path. + #[inline] + #[must_use] + pub const fn origin(self) -> TargetBitSet { + match self { + Self::Entity(path) => path.origin(), + } + } + + /// Returns the estimated transfer size for this path. + #[inline] + pub(crate) fn estimate_size(self, config: &TransferCostConfig) -> InformationRange { + match self { + Self::Entity(path) => path.estimate_size(config), + } + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs new file mode 100644 index 00000000000..07217d8f6d2 --- /dev/null +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/tests.rs @@ -0,0 +1,534 @@ +//! Unit tests for entity projection path lookup, composite swallowing, transfer sizing, +//! and traversal analysis. + +use core::ops::Bound; + +use hashql_core::{symbol::sym, r#type::TypeId}; + +use crate::{ + body::{ + local::Local, + place::{Projection, ProjectionKind}, + }, + pass::{ + analysis::{ + dataflow::lattice::{ + HasTop as _, JoinSemiLattice as _, + laws::{assert_bounded_join_semilattice, assert_is_top_consistent}, + }, + size_estimation::{InformationRange, InformationUnit}, + }, + execution::{ + VertexType, + traversal::{ + EntityPath, EntityPathBitSet, TransferCostConfig, TraversalLattice, + TraversalPathBitSet, + }, + }, + }, +}; + +/// Helper to create a `FieldByName` projection. +fn proj(name: impl Into>) -> Projection<'static> { + Projection { + kind: ProjectionKind::FieldByName(name.into()), + r#type: TypeId::PLACEHOLDER, + } +} + +/// `link_data.left_entity_id.draft_id` → `None` (synthesized, not stored). +#[test] +fn link_data_synthesized_is_none() { + let projections = &[ + proj(sym::link_data), + proj(sym::left_entity_id), + proj(sym::draft_id), + ]; + assert_eq!(EntityPath::resolve(projections), None); +} + +/// Invalid path like `[.unknown]` → `None`. +#[test] +fn unknown_path_returns_none() { + let projections = &[proj(sym::unknown)]; + assert_eq!(EntityPath::resolve(projections), None); +} + +/// The returned index reflects how many projections were consumed during resolution. +#[test] +fn index_counts_consumed_projections() { + // Single-segment: `.properties` consumes 1 + let projections = &[proj(sym::properties)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // Two segments: `.encodings.vectors` consumes 2 + let projections = &[proj(sym::encodings), proj(sym::vectors)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Vectors, 2)) + ); + + // Three segments: `.metadata.provenance.inferred` consumes 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); + + // Four segments: `.metadata.record_id.entity_id.web_id` consumes 4 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + proj(sym::web_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::WebId, 4)) + ); +} + +/// Composite paths that stop early via `next!(else ...)` return the correct index. +#[test] +fn index_for_composite_early_exit() { + // `.metadata.record_id` with no further projections → RecordId at index 2 + let projections = &[proj(sym::metadata), proj(sym::record_id)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::RecordId, 2)) + ); + + // `.metadata.record_id.entity_id` without a leaf → EntityId at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::EntityId, 3)) + ); + + // `.metadata.temporal_versioning` without a leaf → TemporalVersioning at index 2 + let projections = &[proj(sym::metadata), proj(sym::temporal_versioning)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::TemporalVersioning, 2)) + ); +} + +/// A non-FieldByName projection (e.g. `Index`) after a composite node must return `None`, not +/// the composite path. Previously the `next!(else ...)` macro conflated "no more projections" with +/// "non-FieldByName projection", bypassing the exhaustion guard. +#[test] +fn non_field_projection_after_composite_returns_none() { + let index_projection = Projection { + kind: ProjectionKind::Index(Local::new(0)), + r#type: TypeId::PLACEHOLDER, + }; + + // `.metadata.record_id` followed by an index projection: not a valid entity path + let projections = &[proj(sym::metadata), proj(sym::record_id), index_projection]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.record_id.entity_id` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::record_id), + proj(sym::entity_id), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); + + // `.metadata.temporal_versioning` followed by an index projection + let projections = &[ + proj(sym::metadata), + proj(sym::temporal_versioning), + index_projection, + ]; + assert_eq!(EntityPath::resolve(projections), None); +} + +/// JSONB paths stop consuming at the storage boundary; sub-path projections are excess. +#[test] +fn jsonb_index_excludes_subpath() { + // `.properties.foo.bar` → Properties at index 1, leaving 2 excess projections + let projections = &[proj(sym::properties), proj(sym::foo), proj(sym::bar)]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::Properties, 1)) + ); + + // `.metadata.provenance.inferred.foo.bar` → ProvenanceInferred at index 3 + let projections = &[ + proj(sym::metadata), + proj(sym::provenance), + proj(sym::inferred), + proj(sym::foo), + proj(sym::bar), + ]; + assert_eq!( + EntityPath::resolve(projections), + Some((EntityPath::ProvenanceInferred, 3)) + ); +} + +// --- Composite swallowing tests --- + +fn empty_bitset() -> EntityPathBitSet { + EntityPathBitSet::new_empty() +} + +/// Inserting a leaf path into an empty set adds that path. +#[test] +fn insert_leaf_into_empty() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + + assert!(bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::RecordId)); +} + +/// Inserting a composite removes any children already in the set. +#[test] +fn composite_swallows_children() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::DraftId); + + assert!(bitset.contains(EntityPath::WebId)); + assert!(bitset.contains(EntityPath::EntityUuid)); + assert!(bitset.contains(EntityPath::DraftId)); + + bitset.insert(EntityPath::EntityId); + + assert!(bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); +} + +/// Inserting a child when its ancestor composite is already present is a no-op. +#[test] +fn child_suppressed_by_ancestor() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::RecordId); + + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityId); + bitset.insert(EntityPath::EditionId); + + assert!(bitset.contains(EntityPath::RecordId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::EditionId)); +} + +/// Inserting a top-level composite swallows the entire subtree. +#[test] +fn record_id_swallows_entire_subtree() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::EditionId); + + bitset.insert(EntityPath::RecordId); + + assert!(bitset.contains(EntityPath::RecordId)); + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); + assert!(!bitset.contains(EntityPath::EditionId)); +} + +/// `TemporalVersioning` swallows `DecisionTime` and `TransactionTime`. +#[test] +fn temporal_versioning_swallows_children() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::DecisionTime); + bitset.insert(EntityPath::TransactionTime); + + bitset.insert(EntityPath::TemporalVersioning); + + assert!(bitset.contains(EntityPath::TemporalVersioning)); + assert!(!bitset.contains(EntityPath::DecisionTime)); + assert!(!bitset.contains(EntityPath::TransactionTime)); +} + +/// Non-composite paths are unaffected by each other. +#[test] +fn independent_leaves_coexist() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::Properties); + bitset.insert(EntityPath::Archived); + bitset.insert(EntityPath::Vectors); + + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::Archived)); + assert!(bitset.contains(EntityPath::Vectors)); +} + +/// Inserting `EntityId` into a set with `WebId` swallows `WebId`, but unrelated paths remain. +#[test] +fn swallow_selective() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::Properties); + bitset.insert(EntityPath::DecisionTime); + + bitset.insert(EntityPath::EntityId); + + assert!(bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + // Unrelated paths untouched + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::DecisionTime)); +} + +// --- insert_all tests --- + +/// `insert_all` sets exactly the top-level paths (composites replace their children). +#[test] +fn insert_all_sets_top_level_paths() { + let mut bitset = empty_bitset(); + bitset.insert_all(); + + // Top-level and childless paths are present + assert!(bitset.contains(EntityPath::Properties)); + assert!(bitset.contains(EntityPath::Vectors)); + assert!(bitset.contains(EntityPath::RecordId)); + assert!(bitset.contains(EntityPath::TemporalVersioning)); + assert!(bitset.contains(EntityPath::EntityTypeIds)); + assert!(bitset.contains(EntityPath::Archived)); + assert!(bitset.contains(EntityPath::Confidence)); + assert!(bitset.contains(EntityPath::ProvenanceInferred)); + assert!(bitset.contains(EntityPath::ProvenanceEdition)); + assert!(bitset.contains(EntityPath::PropertyMetadata)); + assert!(bitset.contains(EntityPath::LeftEntityWebId)); + assert!(bitset.contains(EntityPath::RightEntityWebId)); + + // Children subsumed by composites are absent + assert!(!bitset.contains(EntityPath::EntityId)); + assert!(!bitset.contains(EntityPath::WebId)); + assert!(!bitset.contains(EntityPath::EntityUuid)); + assert!(!bitset.contains(EntityPath::DraftId)); + assert!(!bitset.contains(EntityPath::EditionId)); + assert!(!bitset.contains(EntityPath::DecisionTime)); + assert!(!bitset.contains(EntityPath::TransactionTime)); +} + +/// `insert_all` produces the correct count: total variants minus children with ancestors. +#[test] +fn insert_all_len() { + let mut bitset = empty_bitset(); + bitset.insert_all(); + + // 25 variants - 7 children (EntityId, WebId, EntityUuid, DraftId, EditionId, + // DecisionTime, TransactionTime) = 18 + assert_eq!(bitset.len(), 18); +} + +/// An empty bitset has len 0. +#[test] +fn empty_bitset_len() { + let bitset = empty_bitset(); + assert_eq!(bitset.len(), 0); + assert!(bitset.is_empty()); +} + +/// `len` tracks individual inserts correctly. +#[test] +fn len_after_inserts() { + let mut bitset = empty_bitset(); + assert_eq!(bitset.len(), 0); + + bitset.insert(EntityPath::Properties); + assert_eq!(bitset.len(), 1); + + bitset.insert(EntityPath::Archived); + assert_eq!(bitset.len(), 2); + + // Duplicate insert doesn't change count + bitset.insert(EntityPath::Properties); + assert_eq!(bitset.len(), 2); +} + +/// Composite swallowing decreases `len` when children are removed. +#[test] +fn len_decreases_on_swallow() { + let mut bitset = empty_bitset(); + bitset.insert(EntityPath::WebId); + bitset.insert(EntityPath::EntityUuid); + bitset.insert(EntityPath::DraftId); + assert_eq!(bitset.len(), 3); + + // EntityId swallows all three children + bitset.insert(EntityPath::EntityId); + assert_eq!(bitset.len(), 1); +} + +// --- Lattice law tests --- + +/// Builds an `EntityPathBitSet` from a list of paths using `insert` (swallowing). +fn bitset_of(paths: &[EntityPath]) -> EntityPathBitSet { + let mut bitset = empty_bitset(); + for &path in paths { + bitset.insert(path); + } + bitset +} + +/// `EntityPathBitSet` satisfies `BoundedJoinSemiLattice` laws. +/// +/// Uses values that cross the composite hierarchy: leaves from different subtrees, +/// a mid-level composite, and a top-level composite with a sibling leaf. +#[test] +fn entity_path_bitset_bounded_join_semilattice() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let set_a = bitset_of(&[EntityPath::WebId, EntityPath::DecisionTime]); + let set_b = bitset_of(&[EntityPath::EntityId, EntityPath::Properties]); + let set_c = bitset_of(&[EntityPath::RecordId, EntityPath::TransactionTime]); + + assert_bounded_join_semilattice(&lattice, set_a, set_b, set_c); +} + +/// `is_top(top())` is consistent for `EntityPathBitSet`. +#[test] +fn entity_path_bitset_top_consistent() { + let lattice = TraversalLattice::new(VertexType::Entity); + assert_is_top_consistent::<_, EntityPathBitSet>(&lattice); +} + +/// `join(top, a) = top` for `EntityPathBitSet`. +#[test] +fn entity_path_bitset_top_absorbs_join() { + let lattice = TraversalLattice::new(VertexType::Entity); + let top: EntityPathBitSet = lattice.top(); + + for path in EntityPath::all() { + let singleton = bitset_of(&[path]); + let result = lattice.join_owned(top, &singleton); + assert_eq!(result, top); + } +} + +/// `TraversalPathBitSet` satisfies `BoundedJoinSemiLattice` laws. +#[test] +fn traversal_path_bitset_bounded_join_semilattice() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let set_a = + TraversalPathBitSet::Entity(bitset_of(&[EntityPath::WebId, EntityPath::DecisionTime])); + let set_b = + TraversalPathBitSet::Entity(bitset_of(&[EntityPath::EntityId, EntityPath::Properties])); + let set_c = TraversalPathBitSet::Entity(bitset_of(&[ + EntityPath::RecordId, + EntityPath::TransactionTime, + ])); + + assert_bounded_join_semilattice(&lattice, set_a, set_b, set_c); +} + +/// `is_top(top())` is consistent for `TraversalPathBitSet`. +#[test] +fn traversal_path_bitset_top_consistent() { + let lattice = TraversalLattice::new(VertexType::Entity); + assert_is_top_consistent::<_, TraversalPathBitSet>(&lattice); +} + +/// `join(top, a) = top` for `TraversalPathBitSet`. +#[test] +fn traversal_path_bitset_top_absorbs_join() { + let lattice = TraversalLattice::new(VertexType::Entity); + let top: TraversalPathBitSet = lattice.top(); + + for path in EntityPath::all() { + let singleton = TraversalPathBitSet::Entity(bitset_of(&[path])); + let result = lattice.join_owned(top, &singleton); + assert_eq!(result, top); + } +} + +/// `join` normalizes ancestor+descendant pairs produced by raw union. +/// +/// When one side has a leaf and the other has its ancestor composite, the union +/// contains both. `normalize` must remove the descendant since the ancestor covers it. +#[test] +fn join_normalizes_ancestor_descendant_pairs() { + let lattice = TraversalLattice::new(VertexType::Entity); + + let mut lhs = bitset_of(&[EntityPath::WebId, EntityPath::Properties]); + let rhs = bitset_of(&[EntityPath::RecordId]); + + lattice.join(&mut lhs, &rhs); + + assert!(lhs.contains(EntityPath::RecordId)); + assert!(lhs.contains(EntityPath::Properties)); + assert!(!lhs.contains(EntityPath::WebId)); + assert_eq!(lhs.len(), 2); +} + +// --- Transfer size tests --- + +/// Each composite's `transfer_size` equals the sum of its immediate children's `transfer_sizes`. +/// +/// Immediate children are identified automatically via `ancestors()`: a path is an immediate +/// child of composite C if C is its nearest ancestor (`ancestors()[0] == C`). This catches +/// drift if a new child is added to the hierarchy without updating the composite constant. +#[test] +fn composite_transfer_size_matches_children() { + let config = TransferCostConfig::new(InformationRange::zero()); + + for composite in EntityPath::all() { + let mut expected = InformationRange::zero(); + let mut has_children = false; + + for path in EntityPath::all() { + if path.ancestors().first() == Some(&composite) { + expected += path.estimate_size(&config); + has_children = true; + } + } + + if has_children { + assert_eq!( + composite.estimate_size(&config), + expected, + "{composite:?} transfer_size doesn't match sum of immediate children" + ); + } + } +} + +/// `ProvenanceInferred` has a static `transfer_size` independent of config. +/// +/// The type is a fixed structure (3 required scalars + 2 optional timestamps), so its +/// size is a constant `3..=5` regardless of `TransferCostConfig` values. +#[test] +fn inferred_provenance_transfer_size_is_static() { + let small_config = TransferCostConfig::new(InformationRange::zero()); + let large_config = TransferCostConfig::new(InformationRange::value(InformationUnit::new(1000))); + + let small = EntityPath::ProvenanceInferred.estimate_size(&small_config); + let large = EntityPath::ProvenanceInferred.estimate_size(&large_config); + + assert_eq!(small, large); + assert_eq!( + small, + InformationRange::new( + InformationUnit::new(3), + Bound::Included(InformationUnit::new(5)) + ) + ); +} diff --git a/libs/@local/hashql/mir/src/pass/transform/mod.rs b/libs/@local/hashql/mir/src/pass/transform/mod.rs index cff6fb9c773..c6819a45d9f 100644 --- a/libs/@local/hashql/mir/src/pass/transform/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/mod.rs @@ -12,7 +12,6 @@ mod inst_simplify; mod post_inline; mod pre_inline; mod ssa_repair; -mod traversal_extraction; pub use self::{ administrative_reduction::AdministrativeReduction, @@ -25,8 +24,7 @@ pub use self::{ forward_substitution::ForwardSubstitution, inline::{Inline, InlineConfig, InlineCostEstimationConfig, InlineHeuristicsConfig}, inst_simplify::InstSimplify, - post_inline::{PostInline, PostInlineResidual}, + post_inline::PostInline, pre_inline::PreInline, ssa_repair::SsaRepair, - traversal_extraction::{TraversalExtraction, Traversals}, }; diff --git a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs index bcdad82c183..5eb36ebb3c2 100644 --- a/libs/@local/hashql/mir/src/pass/transform/post_inline.rs +++ b/libs/@local/hashql/mir/src/pass/transform/post_inline.rs @@ -1,26 +1,19 @@ //! Post-inlining optimization pass. //! -//! Runs [`Canonicalization`] to clean up redundancy from inlining, then [`TraversalExtraction`] -//! to materialize vertex projections in graph read filter bodies. -//! -//! After running, call [`PostInline::finish`] to retrieve the [`Traversals`] maps. +//! Runs [`Canonicalization`] to clean up redundancy from inlining. use core::alloc::Allocator; -use hashql_core::heap::{BumpAllocator, Heap}; +use hashql_core::heap::BumpAllocator; -use super::{Canonicalization, CanonicalizationConfig, TraversalExtraction, Traversals}; +use super::{Canonicalization, CanonicalizationConfig}; use crate::{ body::Body, context::MirContext, - def::{DefIdSlice, DefIdVec}, - pass::{Changed, GlobalTransformPass, GlobalTransformState, TransformPass as _}, + def::DefIdSlice, + pass::{Changed, GlobalTransformPass, GlobalTransformState}, }; -pub struct PostInlineResidual<'heap> { - pub traversals: DefIdVec>, &'heap Heap>, -} - /// Post-inlining optimization driver. /// /// A thin wrapper around [`Canonicalization`] configured for post-inlining optimization. By running @@ -34,63 +27,32 @@ pub struct PostInlineResidual<'heap> { /// more optimization opportunities that may require additional passes to fully resolve. /// /// See [`Canonicalization`] for details on the pass ordering and implementation. -pub struct PostInline<'heap, A: Allocator> { +pub struct PostInline { canonicalization: Canonicalization, - - traversals: DefIdVec>, &'heap Heap>, } -impl<'heap, A: BumpAllocator> PostInline<'heap, A> { +impl PostInline { /// Creates a new post-inlining pass with the given allocator. /// /// The allocator is used for temporary data structures within sub-passes and is reset /// between pass invocations. - pub const fn new_in(heap: &'heap Heap, alloc: A) -> Self { + pub const fn new_in(alloc: A) -> Self { Self { canonicalization: Canonicalization::new_in( CanonicalizationConfig { max_iterations: 16 }, alloc, ), - traversals: DefIdVec::new_in(heap), - } - } - - /// Consumes the pass and returns accumulated results. - /// - /// The returned [`PostInlineResidual`] contains traversal maps for each graph read filter - /// body processed during the pass run. - pub fn finish(self) -> PostInlineResidual<'heap> { - PostInlineResidual { - traversals: self.traversals, } } } -impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for PostInline<'heap, A> { +impl<'env, 'heap, A: BumpAllocator> GlobalTransformPass<'env, 'heap> for PostInline { fn run( &mut self, context: &mut MirContext<'env, 'heap>, state: &mut GlobalTransformState<'_>, bodies: &mut DefIdSlice>, ) -> Changed { - let mut changed = Changed::No; - changed |= self.canonicalization.run(context, state, bodies); - - self.canonicalization.allocator_mut().scoped(|alloc| { - let mut extraction = TraversalExtraction::new_in(alloc); - - for (id, body) in bodies.iter_enumerated_mut() { - let changed_body = extraction.run(context, body); - - if let Some(traversal) = extraction.take_traversals() { - self.traversals.insert(id, traversal); - } - - state.mark(id, changed_body); - changed |= changed_body; - } - }); - - changed + self.canonicalization.run(context, state, bodies) } } diff --git a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs b/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs deleted file mode 100644 index ef6020770a4..00000000000 --- a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/mod.rs +++ /dev/null @@ -1,401 +0,0 @@ -//! Traversal extraction transformation pass. -//! -//! This pass extracts projections from a target local into separate bindings, creating explicit -//! intermediate assignments. It is the inverse of projection forwarding — rather than inlining -//! projections, it materializes them as distinct locals. -//! -//! # Pipeline Integration -//! -//! Traversal extraction runs as the final phase of [`super::PostInline`], after -//! [`super::Canonicalization`] has cleaned up redundancy from inlining: -//! -//! ```text -//! Post-Inline -//! ├── Canonicalization (fixpoint loop) -//! └── TraversalExtraction (single pass) -//! ``` -//! -//! The pass only operates on [`Source::GraphReadFilter`] bodies; other body types are skipped -//! with [`Changed::No`]. This placement ensures canonicalization has already simplified the MIR -//! before extraction, minimizing the number of projections that need materialization. -//! -//! # Purpose -//! -//! The primary use case is preparing graph read filters for entity traversal. When reading from -//! the graph, the filter body receives a vertex as its second argument (`Local::new(1)`). -//! Projections like `vertex.2.1` (accessing nested properties) need to be extracted so the graph -//! executor can track which paths through the vertex are actually accessed. -//! -//! # Algorithm -//! -//! The pass operates by: -//! -//! 1. Walking all operands in the MIR body -//! 2. For each place operand projecting from the target local, creating a new local and load -//! 3. Replacing the original operand with a reference to the new local -//! 4. Recording the projection path in a [`Traversals`] map for later consumption -//! -//! Deduplication is scoped to the current basic block — if the same projection appears multiple -//! times within a block, it reuses the existing extracted local rather than creating duplicates. -//! -//! Pre-existing loads (e.g., `b = a.2.1`) are detected via [`VisitorMut::visit_statement_assign`] -//! and recorded in the traversal map without generating new statements. -//! -//! # Example -//! -//! Before: -//! ```text -//! bb0: -//! _2 = input() -//! _3 = eq(_1.0.1, _2) -//! _4 = eq(_1.0.1, _1.2) -//! return and(_3, _4) -//! ``` -//! -//! After: -//! ```text -//! bb0: -//! _2 = input() -//! _5 = _1.0.1 -//! _3 = eq(_5, _2) -//! _6 = _1.2 -//! _4 = eq(_5, _6) -//! return and(_3, _4) -//! ``` -//! -//! The [`Traversals`] map records `_5 → _1.0.1` and `_6 → _1.2` for the graph executor to use. -#[cfg(test)] -mod tests; - -use core::{alloc::Allocator, convert::Infallible}; - -use hashql_core::{ - heap::Heap, - id::{Id as _, bit_vec::DenseBitSet}, - span::SpanId, -}; - -use crate::{ - body::{ - Body, Source, - basic_block::{BasicBlock, BasicBlockId}, - local::{Local, LocalDecl, LocalVec}, - location::Location, - operand::Operand, - place::Place, - rvalue::RValue, - statement::{Assign, Statement, StatementKind}, - terminator::Terminator, - }, - context::MirContext, - intern::Interner, - pass::{Changed, TransformPass}, - visit::{self, VisitorMut, r#mut::filter}, -}; - -/// Maps extracted locals back to their original projection paths. -/// -/// Produced by [`TraversalExtraction`] and consumed by the graph executor to determine which -/// property paths were accessed on the vertex local. -pub struct Traversals<'heap> { - /// The source local from which projections were extracted (typically the vertex, `_1`). - source: Local, - /// Sparse map from extracted local to its original projection path. - derivations: LocalVec>, &'heap Heap>, -} - -impl<'heap> Traversals<'heap> { - pub(crate) fn with_capacity_in(source: Local, capacity: usize, heap: &'heap Heap) -> Self { - Self { - source, - derivations: LocalVec::with_capacity_in(capacity, heap), - } - } - - pub(crate) fn insert(&mut self, local: Local, place: Place<'heap>) { - debug_assert_eq!(place.local, self.source); - - self.derivations.insert(local, place); - } - - /// Returns the original projection path for `local`, if it was extracted from the source. - #[must_use] - #[inline] - pub fn lookup(&self, local: Local) -> Option<&Place<'heap>> { - self.derivations.lookup(local) - } - - /// Returns `true` if `local` is a registered traversal destination. - #[must_use] - pub fn contains(&self, local: Local) -> bool { - self.derivations.contains(local) - } - - /// Returns the source local from which all projections were extracted. - #[must_use] - pub const fn source(&self) -> Local { - self.source - } - - /// Returns a bitset of all locals that are traversal destinations. - #[must_use] - pub fn enabled(&self, body: &Body<'heap>) -> DenseBitSet { - let mut set = DenseBitSet::new_empty(body.local_decls.len()); - - for (local, place) in self.derivations.iter_enumerated() { - if place.is_some() { - set.insert(local); - } - } - - set - } -} - -/// Visitor that extracts projections from a target local into separate bindings. -struct TraversalExtractionVisitor<'env, 'heap, A: Allocator> { - /// The local we're extracting projections from (the vertex). - target: Local, - /// Declaration of the target local, used to derive types for extracted locals. - target_decl: LocalDecl<'heap>, - - /// Span of the current statement/terminator being visited. - current_span: SpanId, - - /// Bound of existing locals before extraction (new locals start from here). - total_locals: Local, - - /// New local declarations to append to the body after visiting. - pending_locals: Vec, A>, - /// Index into `pending_locals` marking the start of the current basic block's locals. - /// Used to scope deduplication to the current block. - pending_locals_offset: usize, - /// New load statements to insert before the current statement. - pending_statements: Vec, A>, - - /// Accumulated traversal mappings. - traversals: Traversals<'heap>, - changed: Changed, - interner: &'env Interner<'heap>, -} - -impl<'heap, A: Allocator> VisitorMut<'heap> for TraversalExtractionVisitor<'_, 'heap, A> { - type Filter = filter::Deep; - type Residual = Result; - type Result - = Result - where - T: 'heap; - - fn interner(&self) -> &Interner<'heap> { - self.interner - } - - fn visit_operand(&mut self, _: Location, operand: &mut Operand<'heap>) -> Self::Result<()> { - let Some(place) = operand.as_place() else { - return Ok(()); - }; - - if place.local != self.target { - return Ok(()); - } - - let r#type = place.type_id_unchecked(&self.target_decl); - - // Check if we already extracted this projection in the current basic block. - let new_local = if let Some(offset) = - (self.pending_locals_offset..self.pending_locals.len()).find(|&index| { - self.traversals - .lookup(self.total_locals.plus(index)) - .is_some_and(|pending| pending.projections == place.projections) - }) { - self.total_locals.plus(offset) - } else { - let new_local = self.total_locals.plus(self.pending_locals.len()); - self.traversals.insert(new_local, *place); - - self.pending_locals.push(LocalDecl { - span: self.target_decl.span, - r#type, - name: None, - }); - self.pending_statements.push(Statement { - span: self.current_span, - kind: StatementKind::Assign(Assign { - lhs: Place::local(new_local), - rhs: RValue::Load(Operand::Place(*place)), - }), - }); - - new_local - }; - - *operand = Operand::Place(Place::local(new_local)); - - Ok(()) - } - - fn visit_rvalue(&mut self, location: Location, rvalue: &mut RValue<'heap>) -> Self::Result<()> { - // Skip loads — they're recorded by `visit_statement_assign` to avoid double-processing. - if matches!(rvalue, RValue::Load(_)) { - return Ok(()); - } - - visit::r#mut::walk_rvalue(self, location, rvalue) - } - - fn visit_statement_assign( - &mut self, - location: Location, - assign: &mut Assign<'heap>, - ) -> Self::Result<()> { - Ok(()) = visit::r#mut::walk_statement_assign(self, location, assign); - - let Assign { lhs, rhs } = assign; - - if !lhs.projections.is_empty() { - return Ok(()); - } - - let RValue::Load(Operand::Place(rhs)) = rhs else { - return Ok(()); - }; - - if rhs.local != self.target { - return Ok(()); - } - - // Record pre-existing load as a traversal (e.g., `_2 = _1.0.1` already in the MIR). - self.traversals.insert(lhs.local, *rhs); - - Ok(()) - } - - fn visit_statement( - &mut self, - location: Location, - statement: &mut Statement<'heap>, - ) -> Self::Result<()> { - self.current_span = statement.span; - - visit::r#mut::walk_statement(self, location, statement) - } - - fn visit_terminator( - &mut self, - location: Location, - terminator: &mut Terminator<'heap>, - ) -> Self::Result<()> { - self.current_span = terminator.span; - visit::r#mut::walk_terminator(self, location, terminator) - } - - fn visit_basic_block( - &mut self, - id: BasicBlockId, - BasicBlock { - params, - statements, - terminator, - }: &mut BasicBlock<'heap>, - ) -> Self::Result<()> { - let mut location = Location { - block: id, - statement_index: 0, - }; - - self.pending_locals_offset = self.pending_locals.len(); - - self.visit_basic_block_params(location, params)?; - - location.statement_index += 1; - - // statement_index is 1-indexed (0 is block params). - while location.statement_index <= statements.len() { - let index = location.statement_index - 1; - - let statement = &mut statements[index]; - Ok(()) = self.visit_statement(location, statement); - - location.statement_index += 1; - if self.pending_statements.is_empty() { - continue; - } - - // Skip over the statements we're about to insert — they're already recorded. - location.statement_index += self.pending_statements.len(); - - statements.splice(index..index, self.pending_statements.drain(..)); - self.changed = Changed::Yes; - } - - self.visit_terminator(location, terminator)?; - - // Insert any remaining statements from terminator operands at the block end. - #[expect(clippy::extend_with_drain, reason = "differing allocator")] - if !self.pending_statements.is_empty() { - statements.extend(self.pending_statements.drain(..)); - self.changed = Changed::Yes; - } - - Ok(()) - } -} - -/// Extracts projections from the vertex local in graph read filter bodies. -/// -/// This pass only runs on [`Source::GraphReadFilter`] bodies. After running, call -/// [`take_traversals`](Self::take_traversals) to retrieve the mapping of extracted locals to -/// their original projection paths. -pub struct TraversalExtraction<'heap, A: Allocator> { - alloc: A, - traversals: Option>, -} - -impl<'heap, A: Allocator> TraversalExtraction<'heap, A> { - /// Creates a new pass using `alloc` for temporary allocations. - pub const fn new_in(alloc: A) -> Self { - Self { - alloc, - traversals: None, - } - } - - /// Takes the traversal map from the last pass run. - /// - /// Returns [`None`] if the pass hasn't run or if the body wasn't a graph read filter. - pub const fn take_traversals(&mut self) -> Option> { - self.traversals.take() - } -} - -impl<'env, 'heap, A: Allocator> TransformPass<'env, 'heap> for TraversalExtraction<'heap, A> { - fn run(&mut self, context: &mut MirContext<'env, 'heap>, body: &mut Body<'heap>) -> Changed { - if !matches!(body.source, Source::GraphReadFilter(_)) { - self.traversals = None; - return Changed::No; - } - - debug_assert_eq!(body.args, 2); - let vertex = Local::new(1); - - let mut visitor = TraversalExtractionVisitor { - target: vertex, - target_decl: body.local_decls[vertex], - current_span: SpanId::SYNTHETIC, - total_locals: body.local_decls.bound(), - pending_locals_offset: 0, - pending_locals: Vec::new_in(&self.alloc), - pending_statements: Vec::new_in(&self.alloc), - traversals: Traversals::with_capacity_in(vertex, body.local_decls.len(), context.heap), - changed: Changed::No, - interner: context.interner, - }; - Ok(()) = visitor.visit_body_preserving_cfg(body); - - body.local_decls.extend(visitor.pending_locals); - - self.traversals = Some(visitor.traversals); - visitor.changed - } -} diff --git a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs b/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs deleted file mode 100644 index 0b227dbb1ed..00000000000 --- a/libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs +++ /dev/null @@ -1,457 +0,0 @@ -#![expect(clippy::min_ident_chars, reason = "tests")] - -use alloc::alloc::Global; -use std::{io::Write as _, path::PathBuf}; - -use bstr::ByteVec as _; -use hashql_core::{ - heap::Heap, - pretty::Formatter, - r#type::{TypeFormatter, TypeFormatterOptions, environment::Environment}, -}; -use hashql_diagnostics::DiagnosticIssues; -use insta::{Settings, assert_snapshot}; - -use crate::{ - body::Body, - builder::body, - context::MirContext, - def::DefIdSlice, - intern::Interner, - pass::{TransformPass as _, transform::traversal_extraction::TraversalExtraction}, - pretty::TextFormatOptions, -}; - -#[track_caller] -fn assert_traversal_pass<'heap>( - name: &'static str, - body: Body<'heap>, - mut context: MirContext<'_, 'heap>, -) { - let formatter = Formatter::new(context.heap); - let mut formatter = TypeFormatter::new( - &formatter, - context.env, - TypeFormatterOptions::terse().with_qualified_opaque_names(true), - ); - let mut text_format = TextFormatOptions { - writer: Vec::new(), - indent: 4, - sources: (), - types: &mut formatter, - annotations: (), - } - .build(); - - let mut bodies = [body]; - - text_format - .format(DefIdSlice::from_raw(&bodies), &[]) - .expect("should be able to write bodies"); - - let mut pass = TraversalExtraction::new_in(Global); - let changed = pass.run(&mut context, &mut bodies[0]); - - write!( - text_format.writer, - "\n\n{:=^50}\n\n", - format!(" Changed: {changed:?} ") - ) - .expect("infallible"); - - text_format - .format(DefIdSlice::from_raw(&bodies), &[]) - .expect("should be able to write bodies"); - - // Include traversals info if available - if let Some(traversals) = pass.take_traversals() { - write!(text_format.writer, "\n\n{:=^50}\n\n", " Traversals ").expect("infallible"); - - for local in bodies[0].local_decls.ids() { - if let Some(place) = traversals.lookup(local) { - writeln!(text_format.writer, "{local} → {place}").expect("infallible"); - } - } - } - - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut settings = Settings::clone_current(); - settings.set_snapshot_path(dir.join("tests/ui/pass/traversal_extraction")); - settings.set_prepend_module_to_snapshot(false); - - let _drop = settings.bind_to_scope(); - - let value = text_format.writer.into_string_lossy(); - assert_snapshot!(name, value); -} - -#[test] -fn non_graph_filter_unchanged() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Regular fn body, not GraphReadFilter - should return Changed::No - let body = body!(interner, env; fn@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - result = bin.== vertex_0 42; - return result; - } - }); - - assert_traversal_pass( - "non_graph_filter_unchanged", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn no_projections_from_target() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // GraphReadFilter but no projections from vertex (_1) - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - - bb0() { - result = load true; - return result; - } - }); - - assert_traversal_pass( - "no_projections_from_target", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn single_projection_extracted() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Single projection from vertex.0 should be extracted - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - result = bin.== vertex_0 42; - return result; - } - }); - - assert_traversal_pass( - "single_projection_extracted", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn nested_projection_extracted() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Nested projection vertex.0.1 should be extracted - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: ((Int, Int), Int), result: Bool; - @proj vertex_0 = vertex.0: (Int, Int), vertex_0_1 = vertex_0.1: Int; - - bb0() { - result = bin.== vertex_0_1 42; - return result; - } - }); - - assert_traversal_pass( - "nested_projection_extracted", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn duplicate_same_block_deduped() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Same projection used twice in one block - should reuse extracted local - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), r1: Bool, r2: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - r1 = bin.== vertex_0 42; - r2 = bin.== vertex_0 100; - result = bin.& r1 r2; - return result; - } - }); - - assert_traversal_pass( - "duplicate_same_block_deduped", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn duplicate_different_blocks() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Same projection in different blocks - should create separate locals (no cross-block dedup) - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), r1: Bool, r2: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - if true then bb1() else bb2(); - }, - bb1() { - r1 = bin.== vertex_0 42; - goto bb3(r1); - }, - bb2() { - r2 = bin.== vertex_0 100; - goto bb3(r2); - }, - bb3(r1) { - return r1; - } - }); - - assert_traversal_pass( - "duplicate_different_blocks", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn multiple_distinct_projections() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Multiple different projections - each gets its own extracted local - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int, Int), r1: Bool, r2: Bool, r3: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int, vertex_1 = vertex.1: Int, vertex_2 = vertex.2: Int; - - bb0() { - r1 = bin.== vertex_0 1; - r2 = bin.== vertex_1 2; - r3 = bin.== vertex_2 3; - result = bin.& r1 r2; - result = bin.& result r3; - return result; - } - }); - - assert_traversal_pass( - "multiple_distinct_projections", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn pre_existing_load_recorded() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Pre-existing load statement should be recorded without generating new statements - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int), extracted: Int, result: Bool; - @proj vertex_0 = vertex.0: Int; - - bb0() { - extracted = load vertex_0; - result = bin.== extracted 42; - return result; - } - }); - - assert_traversal_pass( - "pre_existing_load_recorded", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn terminator_operand_extraction() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projection used in terminator should be extracted at block end - let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { - decl env: (), vertex: (Int, Int); - @proj vertex_0 = vertex.0: Int; - - bb0() { - return vertex_0; - } - }); - - assert_traversal_pass( - "terminator_operand_extraction", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn mixed_statement_and_terminator() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projections in both statements and terminator - let body = body!(interner, env; [graph::read::filter]@0/2 -> Int { - decl env: (), vertex: (Int, Int), cond: Bool; - @proj vertex_0 = vertex.0: Int, vertex_1 = vertex.1: Int; - - bb0() { - cond = bin.== vertex_0 42; - if cond then bb1() else bb2(); - }, - bb1() { - return vertex_0; - }, - bb2() { - return vertex_1; - } - }); - - assert_traversal_pass( - "mixed_statement_and_terminator", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn projection_from_non_target_unchanged() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Projection from env (_0) should not be extracted - only vertex (_1) is target - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (Int, Int), vertex: (Int, Int), result: Bool; - @proj env_0 = env.0: Int; - - bb0() { - result = bin.== env_0 42; - return result; - } - }); - - assert_traversal_pass( - "projection_from_non_target_unchanged", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} - -#[test] -fn traversals_lookup_correct() { - let heap = Heap::new(); - let interner = Interner::new(&heap); - let env = Environment::new(&heap); - - // Verify traversals.lookup() returns correct projection paths - let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { - decl env: (), vertex: (Int, Int, Int), r1: Bool, r2: Bool, result: Bool; - @proj vertex_0 = vertex.0: Int, vertex_2 = vertex.2: Int; - - bb0() { - r1 = bin.== vertex_0 1; - r2 = bin.== vertex_2 3; - result = bin.& r1 r2; - return result; - } - }); - - assert_traversal_pass( - "traversals_lookup_correct", - body, - MirContext { - heap: &heap, - env: &env, - interner: &interner, - diagnostics: DiagnosticIssues::new(), - }, - ); -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap new file mode 100644 index 00000000000..339822d2ecb --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_joins_traversal_paths.snap @@ -0,0 +1,19 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/fusion/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { + let %2: ? + let %3: ? + + bb0(): { + %2 = %1.properties + %3 = %1.metadata.provenance.edition + + return %3 + } +} + +================= Block Targets ================== + +bb0: interpreter diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap index 283ab6f3e9f..7ce1d6b9bbe 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/all_args_excluded.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap index 981a34bc5e7..cf47eefe2f7 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/non_vectors_entity_projection_rejected.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -11,7 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %2 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap index b9e86c3d371..d27a7c6e3c9 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/only_vectors_projection_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -11,8 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap index 14d5b00838e..bcfb665f330 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -29,7 +29,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %11 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap index eef17c67fe6..44a6d7cc273 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -13,8 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap index 07b1dd10829..49428694348 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { @@ -31,7 +31,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %12 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap index 1568758b168..7fd988b687d 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/non_traversal_unaffected_by_costs.snap @@ -3,23 +3,15 @@ source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: Boolean + let %2: ? let %3: Integer - let %4: Integer - let %5: Integer - let %6: Boolean + let %4: Boolean bb0(): { - %2 = %1.metadata.archived // cost: 12 - %3 = 10 // cost: 8 - %4 = 20 // cost: 8 - %5 = %3 + %4 // cost: 8 - %6 = %5 > 15 // cost: 8 + %2 = %1.properties // cost: 8 + %3 = 42 // cost: 8 + %4 = %3 > 10 // cost: 8 - return %6 + return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap index 826771d4f33..0086a9abef8 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap new file mode 100644 index 00000000000..664620f8ba2 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_multiple_paths_cost.snap @@ -0,0 +1,15 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, Boolean) { + let %2: ? + let %3: (?, Boolean) + + bb0(): { + %2 = %1.properties // cost: 8 + %3 = (%1.properties, %1.metadata.archived) // cost: 8 + + return %3 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap similarity index 52% rename from libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap rename to libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap index 40c059720ef..d51e6ceae90 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_backend_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_single_path_cost.snap @@ -7,13 +7,9 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %3: Boolean bb0(): { - %3 = %1.metadata.archived // cost: 12 - %2 = !%3 // cost: 8 + %2 = %1.metadata.archived // cost: 8 + %3 = !%2 // cost: 8 - return %2 + return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap new file mode 100644 index 00000000000..c43e55a04bd --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_swallowing_reduces_cost.snap @@ -0,0 +1,13 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> (?, ?) { + let %2: (?, ?) + + bb0(): { + %2 = (%1.metadata.record_id.entity_id.web_id, %1.metadata.record_id) // cost: 8 + + return %2 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap deleted file mode 100644 index a98062f1ab1..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/traversal_worst_case_multiple_backends.snap +++ /dev/null @@ -1,19 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs -expression: output ---- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { - let %2: Boolean - let %3: ? - - bb0(): { - %2 = %1.metadata.archived // cost: 12 - %3 = %1.encodings.vectors // cost: 14 - - return %3 - } -} - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap index e243762048b..2f9c56244ef 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap index 16347008ee5..5fbf3d9d5c2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap index b87811583fe..558972bc77f 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/apply_rejected.snap @@ -17,7 +17,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap index b0ad1ca6f4f..3bff03d7cf2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/binary_unary_ops_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %6 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap index c935df92984..4d5a3e85593 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap @@ -37,7 +37,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { return %7 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap index 1a61f1151e5..a18c0b1b663 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_column.snap @@ -1,18 +1,9 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { - let %2: Boolean - bb0(): { - %2 = %1.metadata.archived // cost: 4 - - return %2 + return %1.metadata.archived } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap index 84f716750c5..d49c49e03cd 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/entity_projection_jsonb.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { @@ -11,8 +11,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> ? { return %2 } } - -=================== Traversals =================== - -Traversals: - %2: 4 diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap index 2da09ce226a..c60090e59c5 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_closure_field_rejected_other_accepted.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, (Integer) -> Integer), %1: Ent return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap index ca890d60d98..7d7b5573690 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) - return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap index e3e55a90c61..e43510fd97a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap index 79bea1693f6..21b8df95642 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap index 733eef254e8..b8a99b82d85 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_with_closure_type_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, (Integer) -> Integer), %1: Ent return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap index c21e2e65bbd..250a2822c48 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_without_closure_accepted.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (Integer, Boolean), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, Boolean), %1: Entity) -> Boole return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap index bff13820ad3..eafdd800313 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_dict_vs_struct_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict, (a: Integer)), % return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap index 0a227af5e40..e2e5d75207f 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_list_vs_tuple_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (List, (Integer, Integer)), %1: return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap index 79277ce302a..d2479908f24 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_place_vs_constant_accepted.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap index f60814f8d25..1cf9b7ba0b5 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_same_type_accepted.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: Entity) -> Boole return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap index bbe64b63f3b..0e1ed8a9f05 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_unknown_type_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Integer, ?), %1: Entity) -> Boolean { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap index b7e3d9457b7..ca5104a18ab 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap index f54367c9c82..ea8c6d79a91 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/graph_read_edge_unsupported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -24,7 +24,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %6 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap index afdd06acdc4..6494ccad9ef 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/input_supported.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { @@ -13,7 +13,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { return %3 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap index 3c69071337c..be924a3f22a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/ne_dict_vs_struct_rejected.snap @@ -15,7 +15,3 @@ fn {graph::read::filter@4294967040}(%0: (Dict, (a: Integer)), % return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap index aaa1cac6f56..1c479bb79cc 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_edge_propagates.snap @@ -22,7 +22,3 @@ fn {graph::read::filter@4294967040}(%0: (Uuid | String, Integer), %1: Entity) -> return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap index 2ddf52236fa..faab5b071f2 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/serialization_unsafe_statement_no_cost.snap @@ -17,7 +17,3 @@ fn {graph::read::filter@4294967040}(%0: (Uuid | String, Integer), %1: Entity) -> return %5 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap index 1eb59985945..4773d151019 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/storage_statements_zero_cost.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/statement_placement/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { @@ -19,7 +19,3 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { return %4 } } - -=================== Traversals =================== - -Traversals: diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap index 4996a0af2f9..023c596cbeb 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap @@ -2,20 +2,20 @@ source: libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs expression: output --- -fn {closure@4294967040}() -> Integer { - let %0: Integer - let %1: Integer +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { let %2: Integer + let %3: Integer + let %4: Integer bb0(): { - %1 = 10 - %0 = 1 + %3 = 10 + %2 = 1 - switchInt(%0) -> [0: bb1(%2), otherwise: bb2()] + switchInt(%2) -> [0: bb1(%4), otherwise: bb2()] } - bb1(%2): { - return %1 + bb1(%4): { + return %3 } bb2(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap deleted file mode 100644 index 8913c267a70..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_different_blocks.snap +++ /dev/null @@ -1,64 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - - bb0(): { - switchInt(1) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %2 = %1.0 == 42 - - goto -> bb3(%2) - } - - bb2(): { - %3 = %1.0 == 100 - - goto -> bb3(%3) - } - - bb3(%2): { - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Integer - let %5: Integer - - bb0(): { - switchInt(1) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %4 = %1.0 - %2 = %4 == 42 - - goto -> bb3(%2) - } - - bb2(): { - %5 = %1.0 - %3 = %5 == 100 - - goto -> bb3(%3) - } - - bb3(%2): { - return %2 - } -} - -=================== Traversals =================== - -%4 → %1.0 -%5 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap deleted file mode 100644 index 58ba673793b..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/duplicate_same_block_deduped.snap +++ /dev/null @@ -1,39 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - - bb0(): { - %2 = %1.0 == 42 - %3 = %1.0 == 100 - %4 = %2 & %3 - - return %4 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Integer - - bb0(): { - %5 = %1.0 - %2 = %5 == 42 - %3 = %5 == 100 - %4 = %2 & %3 - - return %4 - } -} - -=================== Traversals =================== - -%5 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap deleted file mode 100644 index 9e1f0561d97..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/mixed_statement_and_terminator.snap +++ /dev/null @@ -1,55 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - switchInt(%2) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - return %1.0 - } - - bb2(): { - return %1.1 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Boolean - let %3: Integer - let %4: Integer - let %5: Integer - - bb0(): { - %3 = %1.0 - %2 = %3 == 42 - - switchInt(%2) -> [0: bb2(), 1: bb1()] - } - - bb1(): { - %4 = %1.0 - - return %4 - } - - bb2(): { - %5 = %1.1 - - return %5 - } -} - -=================== Traversals =================== - -%3 → %1.0 -%4 → %1.0 -%5 → %1.1 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap deleted file mode 100644 index 13467e78d4e..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/multiple_distinct_projections.snap +++ /dev/null @@ -1,51 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Boolean - - bb0(): { - %2 = %1.0 == 1 - %3 = %1.1 == 2 - %4 = %1.2 == 3 - %5 = %2 & %3 - %5 = %5 & %4 - - return %5 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Boolean - let %6: Integer - let %7: Integer - let %8: Integer - - bb0(): { - %6 = %1.0 - %2 = %6 == 1 - %7 = %1.1 - %3 = %7 == 2 - %8 = %1.2 - %4 = %8 == 3 - %5 = %2 & %3 - %5 = %5 & %4 - - return %5 - } -} - -=================== Traversals =================== - -%6 → %1.0 -%7 → %1.1 -%8 → %1.2 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap deleted file mode 100644 index 06da1f6fbf3..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/nested_projection_extracted.snap +++ /dev/null @@ -1,31 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: ((Integer, Integer), Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0.1 == 42 - - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: ((Integer, Integer), Integer)) -> Boolean { - let %2: Boolean - let %3: Integer - - bb0(): { - %3 = %1.0.1 - %2 = %3 == 42 - - return %2 - } -} - -=================== Traversals =================== - -%3 → %1.0.1 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap deleted file mode 100644 index 526b9dbeffb..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/no_projections_from_target.snap +++ /dev/null @@ -1,27 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = 1 - - return %2 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = 1 - - return %2 - } -} - -=================== Traversals =================== diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap deleted file mode 100644 index 87955e126ad..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/non_graph_filter_unchanged.snap +++ /dev/null @@ -1,25 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {closure@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} - -================== Changed: No =================== - -fn {closure@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap deleted file mode 100644 index 99dfe813624..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/pre_existing_load_recorded.snap +++ /dev/null @@ -1,33 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Integer - let %3: Boolean - - bb0(): { - %2 = %1.0 - %3 = %2 == 42 - - return %3 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Integer - let %3: Boolean - - bb0(): { - %2 = %1.0 - %3 = %2 == 42 - - return %3 - } -} - -=================== Traversals =================== - -%2 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap deleted file mode 100644 index 10ca4a6879b..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/projection_from_non_target_unchanged.snap +++ /dev/null @@ -1,27 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %0.0 == 42 - - return %2 - } -} - -================== Changed: No =================== - -fn {graph::read::filter@4294967040}(%0: (Integer, Integer), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %0.0 == 42 - - return %2 - } -} - -=================== Traversals =================== diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap deleted file mode 100644 index 0b8e097b1cc..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/single_projection_extracted.snap +++ /dev/null @@ -1,31 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - - bb0(): { - %2 = %1.0 == 42 - - return %2 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Integer - - bb0(): { - %3 = %1.0 - %2 = %3 == 42 - - return %2 - } -} - -=================== Traversals =================== - -%3 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap deleted file mode 100644 index f4a95e38a5c..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/terminator_operand_extraction.snap +++ /dev/null @@ -1,25 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - bb0(): { - return %1.0 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer)) -> Integer { - let %2: Integer - - bb0(): { - %2 = %1.0 - - return %2 - } -} - -=================== Traversals =================== - -%2 → %1.0 diff --git a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap b/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap deleted file mode 100644 index d701ba43613..00000000000 --- a/libs/@local/hashql/mir/tests/ui/pass/traversal_extraction/traversals_lookup_correct.snap +++ /dev/null @@ -1,42 +0,0 @@ ---- -source: libs/@local/hashql/mir/src/pass/transform/traversal_extraction/tests.rs -expression: value ---- -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - - bb0(): { - %2 = %1.0 == 1 - %3 = %1.2 == 3 - %4 = %2 & %3 - - return %4 - } -} - -================== Changed: Yes ================== - -fn {graph::read::filter@4294967040}(%0: (), %1: (Integer, Integer, Integer)) -> Boolean { - let %2: Boolean - let %3: Boolean - let %4: Boolean - let %5: Integer - let %6: Integer - - bb0(): { - %5 = %1.0 - %2 = %5 == 1 - %6 = %1.2 - %3 = %6 == 3 - %4 = %2 & %3 - - return %4 - } -} - -=================== Traversals =================== - -%5 → %1.0 -%6 → %1.2