diff --git a/changelog.d/7049-clarity-type-size-approximation.added b/changelog.d/7049-clarity-type-size-approximation.added new file mode 100644 index 00000000000..b79a3077027 --- /dev/null +++ b/changelog.d/7049-clarity-type-size-approximation.added @@ -0,0 +1 @@ +New `ResidentBytes` trait for types which can approximate their resident memory size (stack+heap) \ No newline at end of file diff --git a/changelog.d/README.md b/changelog.d/README.md index d29d8d5c7ad..3e4f4bd8632 100644 --- a/changelog.d/README.md +++ b/changelog.d/README.md @@ -24,7 +24,7 @@ CHANGELOG.md. 2. Write the changelog entry text in the file (one or more lines of markdown): - ``` + ```text Added `marf_compress` as a node configuration parameter to enable MARF compression feature ([#6811](https://github.com/stacks-network/stacks-core/pull/6811)) ``` diff --git a/clarity-types/src/lib.rs b/clarity-types/src/lib.rs index 1b1bef2c501..ca9cb1f286c 100644 --- a/clarity-types/src/lib.rs +++ b/clarity-types/src/lib.rs @@ -26,6 +26,7 @@ pub use stacks_common::{ pub mod errors; pub mod representations; +pub mod resident_bytes; pub mod types; pub use errors::{ClarityTypeError, IncomparableError}; diff --git a/clarity-types/src/resident_bytes.rs b/clarity-types/src/resident_bytes.rs new file mode 100644 index 00000000000..8935bf5459d --- /dev/null +++ b/clarity-types/src/resident_bytes.rs @@ -0,0 +1,1201 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::mem::size_of; +use std::sync::Arc; + +#[cfg(feature = "developer-mode")] +use crate::representations::Span; +use crate::representations::{ + ClarityName, ContractName, SymbolicExpression, SymbolicExpressionType, TraitDefinition, +}; +use crate::types::signatures::{ + BufferLength, CallableSubtype, ListTypeData, SequenceSubtype, StringSubtype, StringUTF8Length, + TupleTypeSignature, TypeSignature, +}; +use crate::types::{ + ASCIIData, BuffData, CallableData, CharType, FunctionIdentifier, ListData, OptionalData, + PrincipalData, QualifiedContractIdentifier, ResponseData, SequenceData, StandardPrincipalData, + TraitIdentifier, TupleData, UTF8Data, Value, +}; + +/// Estimated overhead for `Arc`: `strong + weak counts + allocation header`. +const ARC_OVERHEAD: usize = 16; + +// The `btree` and `hashmap` modules below contain heuristic constants derived from std's internal +// implementations (as of Rust 1.94 / hashbrown 0.15). They provide reasonable estimates of +// structural overhead, not exact byte counts. + +/// Layout constants for `std::collections::BTreeMap` / `BTreeSet`. +/// +/// BTreeMap uses `B=6`, so nodes hold up to `2*B-1 = 11` entries. Leaf nodes store keys+values; +/// internal nodes add 12 edge pointers. ~32 bytes overhead per node (metadata + allocator header), +/// ~2/3 average fill (~7 entries/node, ~8 children for internal nodes). +mod btree { + use std::mem::size_of; + + /// Maximum entries per node (`B=6` → `2*B-1 = 11`). + pub const NODE_CAPACITY: usize = 11; + /// Estimated average entries per node in a steady-state B-tree (~2/3 fill). + pub const AVERAGE_FILL: usize = 7; + /// Average children per internal node at ~2/3 fill. + pub const AVG_FANOUT: usize = AVERAGE_FILL + 1; + /// Per-node overhead: `(parent ptr + idx + len + padding) + allocator header`. + pub const NODE_OVERHEAD: usize = 32; + /// Additional per-node size for internal nodes: `[MaybeUninit>; CAPACITY + 1]`. + pub const EDGE_ARRAY_SIZE: usize = (NODE_CAPACITY + 1) * size_of::(); + + /// Estimate total BTree node count (leaves + internal) and how many are internal. + pub fn node_counts(len: usize) -> (usize, usize) { + let leaves = len.div_ceil(AVERAGE_FILL); + let mut internal = 0; + let mut children_at_level = leaves; + while children_at_level > 1 { + let parents = children_at_level.div_ceil(AVG_FANOUT); + internal += parents; + children_at_level = parents; + } + (leaves + internal, internal) + } + + #[cfg(test)] + mod tests { + use super::*; + + #[test] + fn btree_node_counts() { + // 7 entries: 1 leaf, 0 internal + assert_eq!(node_counts(7), (1, 0)); + // 12 entries: 2 leaves + 1 internal root + let (total, internal) = node_counts(12); + assert_eq!(total, 3); + assert_eq!(internal, 1); + // 0 entries edge case + assert_eq!(node_counts(0), (0, 0)); + } + } +} + +/// Layout constants for [`HashMap`] / [`HashSet`] (hashbrown-backed since Rust 1.36). +/// +/// `hashbrown` uses a 7/8 max load factor and 1-byte control tags per bucket. +/// +/// The control array is padded by `Group::WIDTH` (4/8/16 depending on SIMD support); we use 16 as +/// an upper bound. +mod hashmap { + /// Inverse of `hashbrown`'s max load factor (7/8), as a fraction: `buckets ~= (capacity * 8/7)`. + pub const LOAD_FACTOR_INV_NUM: usize = 8; + pub const LOAD_FACTOR_INV_DEN: usize = 7; + /// Upper bound for SIMD group-width padding. In hashbrown 0.15, Group::WIDTH varies by target + /// and implementation (4/8/16 bytes), so we use 16 as a conservative upper bound for + /// control-byte padding overhead. + pub const CONTROL_GROUP_PADDING: usize = 16; + + /// Calculate the number of buckets for a given `HashMap` capacity, based on hashbrown's growth + /// strategy and load factor. + pub fn buckets_for_capacity(cap: usize) -> usize { + (cap * LOAD_FACTOR_INV_NUM).div_ceil(LOAD_FACTOR_INV_DEN) + } +} + +/// Approximate in-memory footprint, in bytes. +/// +/// Split into [`heap_bytes()`](Self::heap_bytes) (children only) and +/// [`resident_bytes()`](Self::resident_bytes) (inline + heap) to avoid double-counting in nested +/// types — containers call `heap_bytes()` on children, only the outermost caller should use +/// `resident_bytes()`. +pub trait ResidentBytes: Sized { + /// Total approximate memory footprint of this instance. + /// + /// Default implementation: [`size_of::()`](size_of) (inline size) + + /// [`heap_bytes()`](Self::heap_bytes) (additional heap allocations). + fn resident_bytes(&self) -> usize { + // Note: if we ever need to support unsized types, we should switch to size_of_val(self) + // here instead of size_of::() and remove the Sized trait bound. + std::mem::size_of::() + self.heap_bytes() + } + + /// Heap allocations only, beyond the inline size reported by [`size_of::()`](size_of). + fn heap_bytes(&self) -> usize; +} + +impl ResidentBytes for String { + fn heap_bytes(&self) -> usize { + self.capacity() + } +} + +impl ResidentBytes for Vec { + fn heap_bytes(&self) -> usize { + // Backing array: capacity slots (inline size per slot) + let backing = self.capacity() * size_of::(); + + // Children's heap allocations + let children: usize = self.iter().map(|v| v.heap_bytes()).sum(); + + // Total heap + backing + children + } +} + +impl ResidentBytes for Box { + fn heap_bytes(&self) -> usize { + // Box heap-allocates the pointee: its inline size + its own heap + size_of::() + (**self).heap_bytes() + } +} + +impl ResidentBytes for Option { + fn heap_bytes(&self) -> usize { + match self { + // For Some, the T is inline in the Option; only count T's heap + Some(v) => v.heap_bytes(), + None => 0, + } + } +} + +impl ResidentBytes for Arc { + fn heap_bytes(&self) -> usize { + // Counts the Arc allocation (header + pointee). Shared backing may be overcounted if + // multiple Arc handles to the same allocation are reachable in one measured graph. + ARC_OVERHEAD + size_of::() + (**self).heap_bytes() + } +} + +impl ResidentBytes for HashMap { + fn heap_bytes(&self) -> usize { + let cap = self.capacity(); + if cap == 0 { + // HashMap::new() does not allocate until first insert. + return 0; + } + + let buckets = hashmap::buckets_for_capacity(cap); + let backing = buckets * size_of::<(K, V)>() + buckets + hashmap::CONTROL_GROUP_PADDING; + + // Children's heap allocations (only for occupied entries) + let children: usize = self + .iter() + .map(|(k, v)| k.heap_bytes() + v.heap_bytes()) + .sum(); + + backing + children + } +} + +impl ResidentBytes for BTreeMap { + fn heap_bytes(&self) -> usize { + if self.is_empty() { + return 0; // Empty BTreeMaps do not allocate on the heap. + } + + let (total_nodes, internal_nodes) = btree::node_counts(self.len()); + + // Base node size (shared by leaf and internal): overhead + key/value arrays + let leaf_size = btree::NODE_OVERHEAD + + (btree::NODE_CAPACITY * size_of::()) + + (btree::NODE_CAPACITY * size_of::()); + // Internal nodes additionally carry an edge pointer array + let structural = total_nodes * leaf_size + internal_nodes * btree::EDGE_ARRAY_SIZE; + + // Children's heap allocations (only for occupied entries) + let children: usize = self + .iter() + .map(|(k, v)| k.heap_bytes() + v.heap_bytes()) + .sum(); + + structural + children + } +} + +impl ResidentBytes for BTreeSet { + fn heap_bytes(&self) -> usize { + if self.is_empty() { + return 0; + } + + let (total_nodes, internal_nodes) = btree::node_counts(self.len()); + + // BTreeSet is backed by BTreeMap — vals array is zero-size + let leaf_size = btree::NODE_OVERHEAD + (btree::NODE_CAPACITY * size_of::()); + let structural = total_nodes * leaf_size + internal_nodes * btree::EDGE_ARRAY_SIZE; + let children: usize = self.iter().map(|v| v.heap_bytes()).sum(); + structural + children + } +} + +impl ResidentBytes for HashSet { + fn heap_bytes(&self) -> usize { + let cap = self.capacity(); + if cap == 0 { + return 0; + } + + let buckets = hashmap::buckets_for_capacity(cap); + let backing = buckets * size_of::() + buckets + hashmap::CONTROL_GROUP_PADDING; + let children: usize = self.iter().map(|v| v.heap_bytes()).sum(); + backing + children + } +} + +impl ResidentBytes for (A, B) { + fn heap_bytes(&self) -> usize { + self.0.heap_bytes() + self.1.heap_bytes() + } +} + +// Primitive types: no heap allocation (stack-only) + +impl ResidentBytes for bool { + fn heap_bytes(&self) -> usize { + 0 + } +} +impl ResidentBytes for u8 { + fn heap_bytes(&self) -> usize { + 0 + } +} +impl ResidentBytes for u32 { + fn heap_bytes(&self) -> usize { + 0 + } +} +impl ResidentBytes for u64 { + fn heap_bytes(&self) -> usize { + 0 + } +} +impl ResidentBytes for u128 { + fn heap_bytes(&self) -> usize { + 0 + } +} +impl ResidentBytes for i128 { + fn heap_bytes(&self) -> usize { + 0 + } +} + +impl ResidentBytes for Value { + fn heap_bytes(&self) -> usize { + match self { + Value::Int(_) | Value::UInt(_) | Value::Bool(_) => 0, + Value::Sequence(data) => data.heap_bytes(), + Value::Principal(data) => data.heap_bytes(), + Value::Tuple(data) => data.heap_bytes(), + Value::Optional(data) => data.heap_bytes(), + Value::Response(data) => data.heap_bytes(), + Value::CallableContract(data) => data.heap_bytes(), + } + } +} + +impl ResidentBytes for SequenceData { + fn heap_bytes(&self) -> usize { + match self { + SequenceData::Buffer(buf) => buf.heap_bytes(), + SequenceData::List(list) => list.heap_bytes(), + SequenceData::String(char_type) => char_type.heap_bytes(), + } + } +} + +impl ResidentBytes for BuffData { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + } +} + +impl ResidentBytes for ListData { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + self.type_signature.heap_bytes() + } +} + +impl ResidentBytes for CharType { + fn heap_bytes(&self) -> usize { + match self { + CharType::ASCII(data) => data.heap_bytes(), + CharType::UTF8(data) => data.heap_bytes(), + } + } +} + +impl ResidentBytes for ASCIIData { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + } +} + +impl ResidentBytes for UTF8Data { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + } +} + +impl ResidentBytes for TupleData { + fn heap_bytes(&self) -> usize { + self.type_signature.heap_bytes() + self.data_map.heap_bytes() + } +} + +impl ResidentBytes for OptionalData { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + } +} + +impl ResidentBytes for ResponseData { + fn heap_bytes(&self) -> usize { + self.data.heap_bytes() + } +} + +impl ResidentBytes for CallableData { + fn heap_bytes(&self) -> usize { + self.contract_identifier.heap_bytes() + self.trait_identifier.heap_bytes() + } +} + +impl ResidentBytes for PrincipalData { + fn heap_bytes(&self) -> usize { + match self { + PrincipalData::Standard(data) => data.heap_bytes(), + PrincipalData::Contract(data) => data.heap_bytes(), + } + } +} + +impl ResidentBytes for StandardPrincipalData { + fn heap_bytes(&self) -> usize { + 0 // Fixed-size: u8 + [u8; 20], no heap allocation + } +} + +impl ResidentBytes for QualifiedContractIdentifier { + fn heap_bytes(&self) -> usize { + self.issuer.heap_bytes() + self.name.heap_bytes() + } +} + +impl ResidentBytes for ClarityName { + fn heap_bytes(&self) -> usize { + self.heap_capacity() + } +} + +impl ResidentBytes for ContractName { + fn heap_bytes(&self) -> usize { + self.heap_capacity() + } +} + +impl ResidentBytes for TraitIdentifier { + fn heap_bytes(&self) -> usize { + self.name.heap_bytes() + self.contract_identifier.heap_bytes() + } +} + +impl ResidentBytes for FunctionIdentifier { + fn heap_bytes(&self) -> usize { + self.heap_capacity() + } +} + +impl ResidentBytes for TypeSignature { + fn heap_bytes(&self) -> usize { + match self { + TypeSignature::NoType + | TypeSignature::IntType + | TypeSignature::UIntType + | TypeSignature::BoolType + | TypeSignature::PrincipalType => 0, + TypeSignature::SequenceType(subtype) => subtype.heap_bytes(), + TypeSignature::TupleType(tuple_sig) => tuple_sig.heap_bytes(), + TypeSignature::OptionalType(inner) => inner.heap_bytes(), + TypeSignature::ResponseType(inner) => inner.heap_bytes(), + TypeSignature::CallableType(subtype) => subtype.heap_bytes(), + TypeSignature::ListUnionType(set) => set.heap_bytes(), + TypeSignature::TraitReferenceType(trait_id) => trait_id.heap_bytes(), + } + } +} + +impl ResidentBytes for TupleTypeSignature { + fn heap_bytes(&self) -> usize { + // TupleTypeSignature wraps Arc>. get_type_map() + // returns &BTreeMap — count Arc overhead + map header + contents. + let map_header = size_of::>(); + ARC_OVERHEAD + map_header + self.get_type_map().heap_bytes() + } +} + +impl ResidentBytes for SequenceSubtype { + fn heap_bytes(&self) -> usize { + match self { + SequenceSubtype::BufferType(len) => len.heap_bytes(), + SequenceSubtype::ListType(list) => list.heap_bytes(), + SequenceSubtype::StringType(string) => string.heap_bytes(), + } + } +} + +impl ResidentBytes for ListTypeData { + fn heap_bytes(&self) -> usize { + // max_len: u32 (no heap), entry_type: Box + size_of::() + self.get_list_item_type().heap_bytes() + } +} + +impl ResidentBytes for StringSubtype { + fn heap_bytes(&self) -> usize { + 0 // Both variants (ASCII, UTF8) contain only u32 newtypes + } +} + +impl ResidentBytes for BufferLength { + fn heap_bytes(&self) -> usize { + 0 // u32 newtype + } +} + +impl ResidentBytes for StringUTF8Length { + fn heap_bytes(&self) -> usize { + 0 // u32 newtype + } +} + +impl ResidentBytes for CallableSubtype { + fn heap_bytes(&self) -> usize { + match self { + CallableSubtype::Principal(id) => id.heap_bytes(), + CallableSubtype::Trait(trait_id) => trait_id.heap_bytes(), + } + } +} + +#[cfg(feature = "developer-mode")] +impl ResidentBytes for Span { + fn heap_bytes(&self) -> usize { + 0 // 4 × u32, all inline + } +} + +impl ResidentBytes for SymbolicExpression { + fn heap_bytes(&self) -> usize { + #[allow(unused_mut)] + let mut total = self.expr.heap_bytes(); + // id is u64 — no heap allocation + + #[cfg(feature = "developer-mode")] + { + // span is inline (no heap), but pre_comments, end_line_comment, and + // post_comments have heap allocations via Vec/String. + total += self.pre_comments.heap_bytes(); + total += self.end_line_comment.heap_bytes(); + total += self.post_comments.heap_bytes(); + } + + total + } +} + +impl ResidentBytes for SymbolicExpressionType { + fn heap_bytes(&self) -> usize { + match self { + SymbolicExpressionType::AtomValue(value) + | SymbolicExpressionType::LiteralValue(value) => value.heap_bytes(), + SymbolicExpressionType::Atom(name) => name.heap_bytes(), + SymbolicExpressionType::List(exprs) => exprs.heap_bytes(), + SymbolicExpressionType::Field(trait_id) => trait_id.heap_bytes(), + SymbolicExpressionType::TraitReference(name, defn) => { + name.heap_bytes() + defn.heap_bytes() + } + } + } +} + +impl ResidentBytes for TraitDefinition { + fn heap_bytes(&self) -> usize { + match self { + TraitDefinition::Defined(id) | TraitDefinition::Imported(id) => id.heap_bytes(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + mod primitives { + use super::*; + + #[test] + fn primitive_heap_bytes_zero() { + assert_eq!(true.heap_bytes(), 0); + assert_eq!(0u8.heap_bytes(), 0); + assert_eq!(0u32.heap_bytes(), 0); + assert_eq!(0u64.heap_bytes(), 0); + assert_eq!(0u128.heap_bytes(), 0); + assert_eq!(0i128.heap_bytes(), 0); + } + + #[test] + fn u64_resident_bytes() { + let v: u64 = 42; + assert_eq!(v.resident_bytes(), 8); + assert_eq!(v.heap_bytes(), 0); + } + } + + mod std_containers { + use super::*; + + const HASHMAP_CAPACITY_TRANSITIONS: &[(usize, usize, usize)] = &[ + (0, 0, 0), + (1, 3, 4), + (4, 7, 8), + (8, 14, 16), + (15, 28, 32), + (29, 56, 64), + (57, 112, 128), + (113, 224, 256), + (225, 448, 512), + ]; + + #[test] + fn string() { + let s = String::from("hello world"); + assert!(s.resident_bytes() >= size_of::() + 11); + assert!(s.heap_bytes() >= 11); + } + + #[test] + fn vec() { + let v: Vec = vec![1, 2, 3, 4, 5]; + assert!(v.heap_bytes() >= 40); + assert!(v.resident_bytes() >= size_of::>() + 40); + } + + #[test] + fn boxed() { + let b = Box::new(String::from("boxed")); + assert!(b.heap_bytes() >= size_of::() + 5); + } + + #[test] + fn option_none() { + let opt: Option> = None; + assert_eq!(opt.heap_bytes(), 0); + assert!(opt.resident_bytes() >= size_of::>>()); + } + + #[test] + fn option_some() { + let opt: Option> = Some(Box::new(Value::Int(42))); + assert!(opt.heap_bytes() >= size_of::()); + } + + #[test] + fn arc() { + let a = Arc::new(String::from("hello")); + assert!(a.heap_bytes() >= ARC_OVERHEAD + size_of::() + 5); + } + + #[test] + fn tuple_pair() { + let t = ("hello".to_string(), 42u64); + assert!(t.heap_bytes() >= 5); + assert_eq!(42u64.heap_bytes(), 0); + } + + #[test] + fn hashmap() { + let mut m: HashMap = HashMap::new(); + m.insert("key1".into(), 1); + m.insert("key2".into(), 2); + + let cap = m.capacity(); + let buckets = hashmap::buckets_for_capacity(cap); + // Structural lower bound: buckets * entry_size + control bytes + let min_structural = buckets * size_of::<(String, u64)>() + buckets; + // Child heap: each key String has at least 4 bytes of heap + let min_child_heap = 2 * 4; + assert!( + m.heap_bytes() >= min_structural + min_child_heap, + "heap_bytes {} < expected minimum {}", + m.heap_bytes(), + min_structural + min_child_heap, + ); + } + + #[test] + fn hashmap_empty() { + let m: HashMap = HashMap::new(); + assert_eq!(m.heap_bytes(), 0); + } + + #[test] + fn hashmap_with_capacity_progression_matches_expected_boundaries() { + let mut observed = Vec::new(); + let mut previous = None; + + for requested in 0usize..=256 { + let map = HashMap::::with_capacity(requested); + let cap = map.capacity(); + let buckets = if cap == 0 { + 0 + } else { + hashmap::buckets_for_capacity(cap) + }; + + if previous != Some((cap, buckets)) { + observed.push((requested, cap, buckets)); + previous = Some((cap, buckets)); + } + } + + assert_eq!(observed.as_slice(), HASHMAP_CAPACITY_TRANSITIONS); + } + + #[test] + fn hashmap_capacity_boundaries_match_bucket_accounting() { + for (_, expected_cap, _) in HASHMAP_CAPACITY_TRANSITIONS.iter().copied().skip(1) { + let mut map = HashMap::with_capacity(expected_cap); + + assert_eq!( + map.capacity(), + expected_cap, + "HashMap::with_capacity({expected_cap}) returned capacity {cap}", + cap = map.capacity(), + ); + + for entry in 0..expected_cap { + map.insert(entry as u64, entry as u64); + assert_eq!( + map.capacity(), + expected_cap, + "HashMap grew before reaching capacity {expected_cap}; capacity is {cap} after {inserts} inserts", + cap = map.capacity(), + inserts = entry + 1, + ); + } + + map.insert(expected_cap as u64, expected_cap as u64); + assert!( + map.capacity() > expected_cap, + "HashMap did not grow after exceeding capacity {expected_cap}; capacity remained {cap}", + cap = map.capacity(), + ); + } + } + + #[test] + fn hashset() { + let mut s: HashSet = HashSet::new(); + for i in 0..10 { + s.insert(i); + } + + let cap = s.capacity(); + let buckets = hashmap::buckets_for_capacity(cap); + let min_structural = buckets * size_of::() + buckets; + assert!( + s.heap_bytes() >= min_structural, + "heap_bytes {} < expected minimum {}", + s.heap_bytes(), + min_structural, + ); + } + + #[test] + fn hashset_empty() { + let s: HashSet = HashSet::new(); + assert_eq!(s.heap_bytes(), 0); + } + + #[test] + fn btreemap() { + let mut m = BTreeMap::new(); + for i in 0..20u64 { + m.insert(i, i); + } + + let (total_nodes, internal_nodes) = btree::node_counts(20); + let leaf_size = btree::NODE_OVERHEAD + + btree::NODE_CAPACITY * size_of::() + + btree::NODE_CAPACITY * size_of::(); + let min_structural = total_nodes * leaf_size + internal_nodes * btree::EDGE_ARRAY_SIZE; + assert!( + m.heap_bytes() >= min_structural, + "heap_bytes {} < expected minimum {}", + m.heap_bytes(), + min_structural, + ); + // Must account for internal nodes (20 entries > single-leaf capacity of 11) + assert!(internal_nodes >= 1); + } + + #[test] + fn btreemap_empty() { + let m: BTreeMap = BTreeMap::new(); + assert_eq!(m.heap_bytes(), 0); + } + + #[test] + fn btreeset() { + let s: BTreeSet = (0..15).collect(); + + let (total_nodes, _) = btree::node_counts(15); + let leaf_size = btree::NODE_OVERHEAD + btree::NODE_CAPACITY * size_of::(); + assert!( + s.heap_bytes() >= total_nodes * leaf_size, + "heap_bytes {} < expected minimum {}", + s.heap_bytes(), + total_nodes * leaf_size, + ); + } + + #[test] + fn btreeset_empty() { + let s: BTreeSet = BTreeSet::new(); + assert_eq!(s.heap_bytes(), 0); + } + } + + mod clarity_values { + use super::*; + + #[test] + fn int_uint_bool_no_heap() { + assert_eq!(Value::Int(42).heap_bytes(), 0); + assert_eq!(Value::UInt(42).heap_bytes(), 0); + assert_eq!(Value::Bool(true).heap_bytes(), 0); + let int_size = Value::Int(42).resident_bytes(); + assert!( + int_size >= size_of::(), + "Int resident_bytes ({int_size}) should be >= size_of::()" + ); + } + + #[test] + fn sequence_buffer() { + let buf = BuffData { + data: vec![0u8; 100], + }; + assert!(buf.heap_bytes() >= 100); + } + + #[test] + fn sequence_data_buffer_variant() { + let seq = SequenceData::Buffer(BuffData { + data: vec![0u8; 64], + }); + assert!(seq.heap_bytes() >= 64); + } + + #[test] + fn sequence_ascii() { + let v = Value::Sequence(SequenceData::String(CharType::ASCII(ASCIIData { + data: vec![b'a', b'b', b'c'], + }))); + assert!(v.heap_bytes() >= 3); + } + + #[test] + fn sequence_utf8() { + let v = Value::Sequence(SequenceData::String(CharType::UTF8(UTF8Data { + data: vec![vec![0xC3, 0xA9], vec![0xC3, 0xB1]], + }))); + assert!(v.heap_bytes() > 0); + } + + #[test] + fn list_data() { + let list = ListData { + data: vec![Value::Int(1), Value::Int(2), Value::Int(3)], + type_signature: ListTypeData::new_list(TypeSignature::IntType, 10).unwrap(), + }; + assert!(list.heap_bytes() > 0); + } + + #[test] + fn sequence_data_list_variant() { + let seq = SequenceData::List(ListData { + data: vec![Value::Int(1), Value::Int(2)], + type_signature: ListTypeData::new_list(TypeSignature::IntType, 10).unwrap(), + }); + assert!(seq.heap_bytes() > 0); + } + + #[test] + fn principal_standard() { + let v = Value::Principal(PrincipalData::Standard(StandardPrincipalData::transient())); + assert_eq!(v.heap_bytes(), 0); + } + + #[test] + fn principal_contract() { + let v = Value::Principal(PrincipalData::Contract( + QualifiedContractIdentifier::transient(), + )); + assert!(v.heap_bytes() > 0); + } + + #[test] + fn tuple() { + let tuple = TupleData::from_data(vec![ + ( + ClarityName::try_from("a".to_string()).unwrap(), + Value::Int(1), + ), + ( + ClarityName::try_from("b".to_string()).unwrap(), + Value::Bool(true), + ), + ]) + .unwrap(); + assert!(Value::Tuple(tuple).heap_bytes() > 0); + } + + #[test] + fn optional() { + let opt = OptionalData { + data: Some(Box::new(Value::Int(42))), + }; + assert!(opt.heap_bytes() > 0); + } + + #[test] + fn value_optional_variant() { + let value = Value::Optional(OptionalData { + data: Some(Box::new(Value::Int(42))), + }); + assert!(value.heap_bytes() >= size_of::()); + } + + #[test] + fn response() { + let ok = Value::Response(ResponseData { + committed: true, + data: Box::new(Value::Int(42)), + }); + let err = Value::Response(ResponseData { + committed: false, + data: Box::new(Value::Bool(false)), + }); + assert!(ok.heap_bytes() >= size_of::()); + assert!(err.heap_bytes() >= size_of::()); + } + + #[test] + fn callable_contract() { + let v = Value::CallableContract(CallableData { + contract_identifier: QualifiedContractIdentifier::transient(), + trait_identifier: None, + }); + assert!(v.heap_bytes() > 0); + } + } + + mod clarity_identifiers { + use super::*; + + #[test] + fn clarity_name() { + let name = ClarityName::try_from("my-variable".to_string()).unwrap(); + assert!(name.heap_bytes() >= 11); + assert!(name.resident_bytes() > name.heap_bytes()); + } + + #[test] + fn contract_name() { + let name = ContractName::try_from("my-contract".to_string()).unwrap(); + assert!(name.heap_bytes() >= 11); + } + + #[test] + fn standard_principal_data() { + let p = StandardPrincipalData::transient(); + assert_eq!(p.heap_bytes(), 0); + } + + #[test] + fn qualified_contract_identifier() { + let id = QualifiedContractIdentifier::transient(); + assert!(id.heap_bytes() > 0); + } + + #[test] + fn trait_identifier() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("contract".to_string()).unwrap(), + ClarityName::try_from("my-trait".to_string()).unwrap(), + ); + assert!(id.heap_bytes() > 0); + } + + #[test] + fn function_identifier() { + let fid = FunctionIdentifier::new_native_function("map"); + assert!(fid.heap_bytes() > 0); + } + } + + mod type_signatures { + use super::*; + + #[test] + fn scalar_no_heap() { + assert_eq!(TypeSignature::IntType.heap_bytes(), 0); + assert_eq!(TypeSignature::UIntType.heap_bytes(), 0); + assert_eq!(TypeSignature::BoolType.heap_bytes(), 0); + assert_eq!(TypeSignature::PrincipalType.heap_bytes(), 0); + assert_eq!(TypeSignature::NoType.heap_bytes(), 0); + assert!(TypeSignature::IntType.resident_bytes() > 0); + } + + #[test] + fn optional() { + let sig = TypeSignature::OptionalType(Box::new(TypeSignature::IntType)); + assert!(sig.heap_bytes() > 0); + assert!(sig.resident_bytes() > sig.heap_bytes()); + } + + #[test] + fn response() { + let sig = TypeSignature::ResponseType(Box::new(( + TypeSignature::IntType, + TypeSignature::BoolType, + ))); + assert!(sig.heap_bytes() > 0); + } + + #[test] + fn sequence() { + let sig = TypeSignature::SequenceType(SequenceSubtype::BufferType( + BufferLength::try_from(64u32).unwrap(), + )); + assert_eq!(sig.heap_bytes(), 0); + } + + #[test] + fn tuple() { + let sig = TypeSignature::TupleType( + TupleTypeSignature::try_from(vec![( + ClarityName::try_from("f".to_string()).unwrap(), + TypeSignature::IntType, + )]) + .unwrap(), + ); + assert!(sig.heap_bytes() > 0); + } + + #[test] + fn callable() { + let sig = TypeSignature::CallableType(CallableSubtype::Principal( + QualifiedContractIdentifier::transient(), + )); + assert!(sig.heap_bytes() > 0); + } + + #[test] + fn list_union() { + let mut set = BTreeSet::new(); + set.insert(CallableSubtype::Principal( + QualifiedContractIdentifier::transient(), + )); + let sig = TypeSignature::ListUnionType(set); + assert!(sig.heap_bytes() > 0); + } + + #[test] + fn trait_reference() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("c".to_string()).unwrap(), + ClarityName::try_from("t".to_string()).unwrap(), + ); + let sig = TypeSignature::TraitReferenceType(id); + assert!(sig.heap_bytes() > 0); + } + + #[test] + fn tuple_type_signature() { + let sig = TupleTypeSignature::try_from(vec![ + ( + ClarityName::try_from("x".to_string()).unwrap(), + TypeSignature::IntType, + ), + ( + ClarityName::try_from("y".to_string()).unwrap(), + TypeSignature::BoolType, + ), + ]) + .unwrap(); + assert!(sig.heap_bytes() > ARC_OVERHEAD); + } + + #[test] + fn sequence_subtype() { + assert_eq!( + SequenceSubtype::BufferType(BufferLength::try_from(32u32).unwrap()).heap_bytes(), + 0, + ); + let list = SequenceSubtype::ListType( + ListTypeData::new_list(TypeSignature::IntType, 5).unwrap(), + ); + assert!(list.heap_bytes() > 0); + assert_eq!( + SequenceSubtype::StringType(StringSubtype::ASCII( + BufferLength::try_from(10u32).unwrap() + )) + .heap_bytes(), + 0, + ); + } + + #[test] + fn string_subtype_no_heap() { + assert_eq!( + StringSubtype::ASCII(BufferLength::try_from(10u32).unwrap()).heap_bytes(), + 0 + ); + assert_eq!( + StringSubtype::UTF8(StringUTF8Length::try_from(10u32).unwrap()).heap_bytes(), + 0 + ); + } + + #[test] + fn buffer_length_no_heap() { + assert_eq!(BufferLength::try_from(100u32).unwrap().heap_bytes(), 0); + } + + #[test] + fn string_utf8_length_no_heap() { + assert_eq!(StringUTF8Length::try_from(100u32).unwrap().heap_bytes(), 0); + } + + #[test] + fn callable_subtype_principal() { + let sub = CallableSubtype::Principal(QualifiedContractIdentifier::transient()); + assert!(sub.heap_bytes() > 0); + } + + #[test] + fn callable_subtype_trait() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("c".to_string()).unwrap(), + ClarityName::try_from("t".to_string()).unwrap(), + ); + assert!(CallableSubtype::Trait(id).heap_bytes() > 0); + } + } + + mod symbolic_expressions { + use super::*; + + #[test] + fn atom() { + let inner = SymbolicExpression::atom(ClarityName::try_from("x".to_string()).unwrap()); + let list = SymbolicExpression::list(vec![inner.clone(), inner.clone(), inner]); + assert!(list.heap_bytes() > 0); + assert!(list.resident_bytes() > list.heap_bytes()); + } + + #[test] + fn atom_value() { + let expr = SymbolicExpression::atom_value(Value::Int(1)); + assert_eq!(expr.heap_bytes(), 0); + } + + #[test] + fn literal_value() { + let expr = SymbolicExpression::literal_value(Value::Bool(true)); + assert_eq!(expr.heap_bytes(), 0); + } + + #[test] + fn field() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("c".to_string()).unwrap(), + ClarityName::try_from("f".to_string()).unwrap(), + ); + let expr = SymbolicExpression::field(id); + assert!(expr.heap_bytes() > 0); + } + + #[test] + fn trait_reference() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("c".to_string()).unwrap(), + ClarityName::try_from("t".to_string()).unwrap(), + ); + let expr = SymbolicExpression::trait_reference( + ClarityName::try_from("name".to_string()).unwrap(), + TraitDefinition::Defined(id), + ); + assert!(expr.heap_bytes() > 0); + } + + #[test] + fn trait_definition() { + let id = TraitIdentifier::new( + StandardPrincipalData::transient(), + ContractName::try_from("c".to_string()).unwrap(), + ClarityName::try_from("t".to_string()).unwrap(), + ); + let defined = TraitDefinition::Defined(id.clone()); + let imported = TraitDefinition::Imported(id); + assert!(defined.heap_bytes() > 0); + assert_eq!(defined.heap_bytes(), imported.heap_bytes()); + } + } + + #[cfg(feature = "developer-mode")] + mod developer_mode { + use super::*; + + #[test] + fn symbolic_expression_comment_fields() { + let mut expr = + SymbolicExpression::atom(ClarityName::try_from("x".to_string()).unwrap()); + expr.pre_comments = vec![ + ("comment1".to_string(), Span::zero()), + ("comment2".to_string(), Span::zero()), + ]; + expr.end_line_comment = Some("end comment".to_string()); + expr.post_comments = vec![("post".to_string(), Span::zero())]; + + let with_comments = expr.heap_bytes(); + let plain = SymbolicExpression::atom(ClarityName::try_from("x".to_string()).unwrap()); + assert!(with_comments > plain.heap_bytes()); + } + + #[test] + fn span_no_heap() { + let s = Span::zero(); + assert_eq!(s.heap_bytes(), 0); + } + } +} diff --git a/clarity-types/src/types/mod.rs b/clarity-types/src/types/mod.rs index af31bc679d4..a028bc4322a 100644 --- a/clarity-types/src/types/mod.rs +++ b/clarity-types/src/types/mod.rs @@ -1837,6 +1837,11 @@ impl fmt::Display for FunctionIdentifier { } impl FunctionIdentifier { + /// Returns the heap capacity of the backing `String` buffer. + pub fn heap_capacity(&self) -> usize { + self.identifier.capacity() + } + pub fn new_native_function(name: &str) -> FunctionIdentifier { let identifier = format!("_native_:{name}"); FunctionIdentifier { identifier } diff --git a/clarity/src/vm/callables.rs b/clarity/src/vm/callables.rs index 5e8c156ac1f..043cc7c3fe8 100644 --- a/clarity/src/vm/callables.rs +++ b/clarity/src/vm/callables.rs @@ -17,6 +17,7 @@ use std::collections::BTreeMap; use clarity_types::representations::ClarityName; +use clarity_types::resident_bytes::ResidentBytes; pub use clarity_types::types::FunctionIdentifier; use stacks_common::types::StacksEpochId; @@ -76,6 +77,17 @@ pub struct DefinedFunction { body: SymbolicExpression, } +impl ResidentBytes for DefinedFunction { + fn heap_bytes(&self) -> usize { + self.identifier.heap_bytes() + + self.name.heap_bytes() + + self.arg_types.heap_bytes() + + self.arguments.heap_bytes() + + self.body.heap_bytes() + // define_type is a fieldless enum — no heap allocation + } +} + /// This enum handles the actual invocation of the method /// implementing a native function. Each variant handles /// different expected number of arguments. @@ -773,4 +785,27 @@ mod test { TypeSignature::CallableType(CallableSubtype::Trait(trait_id)) ); } + + #[test] + fn resident_bytes_defined_function_counts_all_heap_fields() { + let function = DefinedFunction { + identifier: FunctionIdentifier::new_native_function("map"), + name: ClarityName::try_from("resident-bytes-fn".to_string()).unwrap(), + arg_types: vec![TypeSignature::OptionalType(Box::new( + TypeSignature::UIntType, + ))], + define_type: DefineType::Private, + arguments: vec![ClarityName::try_from("arg".to_string()).unwrap()], + body: SymbolicExpression::atom_value(Value::Bool(true)), + }; + + let expected = function.identifier.heap_bytes() + + function.name.heap_bytes() + + function.arg_types.heap_bytes() + + function.arguments.heap_bytes() + + function.body.heap_bytes(); + + assert_eq!(function.heap_bytes(), expected); + assert!(function.heap_bytes() > 0); + } } diff --git a/clarity/src/vm/contexts.rs b/clarity/src/vm/contexts.rs index 784edf564dc..d4a3990eda6 100644 --- a/clarity/src/vm/contexts.rs +++ b/clarity/src/vm/contexts.rs @@ -20,6 +20,7 @@ use std::mem::replace; use std::time::{Duration, Instant}; use clarity_types::representations::ClarityName; +use clarity_types::resident_bytes::ResidentBytes; use serde::Serialize; use serde_json::json; use stacks_common::alloc_tracker::{AllocationCounter, thread_allocated}; @@ -388,6 +389,40 @@ pub struct ContractContext { pub is_deploying: bool, } +impl ResidentBytes for ContractContext { + fn heap_bytes(&self) -> usize { + // Destructure to get a compile error when a field is added without accounting for it. + let ContractContext { + // Heap-allocated fields: accounted for by heap_bytes() calls below + contract_identifier, + variables, + functions, + defined_traits, + implemented_traits, + persisted_names, + meta_data_map, + meta_data_var, + meta_nft, + meta_ft, + // Inline-only fields: covered by size_of::() + data_size: _, + clarity_version: _, + is_deploying: _, + } = self; + + contract_identifier.heap_bytes() + + variables.heap_bytes() + + functions.heap_bytes() + + defined_traits.heap_bytes() + + implemented_traits.heap_bytes() + + persisted_names.heap_bytes() + + meta_data_map.heap_bytes() + + meta_data_var.heap_bytes() + + meta_nft.heap_bytes() + + meta_ft.heap_bytes() + } +} + pub struct LocalContext<'a> { pub function_context: Option<&'a LocalContext<'a>>, pub parent: Option<&'a LocalContext<'a>>, diff --git a/clarity/src/vm/contracts.rs b/clarity/src/vm/contracts.rs index df4cee68dcb..ae753877382 100644 --- a/clarity/src/vm/contracts.rs +++ b/clarity/src/vm/contracts.rs @@ -14,6 +14,7 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use clarity_types::resident_bytes::ResidentBytes; use stacks_common::types::StacksEpochId; use crate::vm::ast::ContractAST; @@ -28,8 +29,12 @@ pub struct Contract { pub contract_context: ContractContext, } -// AARON: this is an increasingly useless wrapper around a ContractContext struct. -// will probably be removed soon. +impl ResidentBytes for Contract { + fn heap_bytes(&self) -> usize { + self.contract_context.heap_bytes() + } +} + impl Contract { pub fn initialize_from_ast( contract_identifier: QualifiedContractIdentifier, @@ -56,3 +61,254 @@ impl Contract { self.contract_context.canonicalize_types(epoch) } } + +#[cfg(test)] +mod tests { + use std::mem::size_of; + + use clarity_types::resident_bytes::ResidentBytes; + use stacks_common::consts::CHAIN_ID_TESTNET; + use stacks_common::types::StacksEpochId; + + use crate::vm::GlobalContext; + use crate::vm::ast::build_ast; + use crate::vm::contexts::ContractContext; + use crate::vm::contracts::Contract; + use crate::vm::costs::LimitedCostTracker; + use crate::vm::database::MemoryBackingStore; + use crate::vm::types::QualifiedContractIdentifier; + use crate::vm::version::ClarityVersion; + + /// Verify that `Contract::heap_bytes` delegates cleanly to `ContractContext::heap_bytes`, and + /// that `resident_bytes` adds exactly `size_of::()`. + /// + /// Field-coverage (compile-time guard against forgotten fields) is enforced by the + /// destructuring inside `ContractContext::heap_bytes()` itself. + #[track_caller] + fn assert_contract_bytes_consistent(contract: &Contract) { + let ctx_heap = contract.contract_context.heap_bytes(); + assert_eq!(contract.heap_bytes(), ctx_heap); + assert_eq!(contract.resident_bytes(), size_of::() + ctx_heap); + } + + #[track_caller] + fn initialize_contract_with_store( + marf: &mut MemoryBackingStore, + source: &str, + contract_name: &str, + ) -> Contract { + let version = ClarityVersion::Clarity2; + let epoch = StacksEpochId::Epoch21; + let contract_identifier = QualifiedContractIdentifier::local(contract_name).unwrap(); + let conn = marf.as_clarity_db(); + let mut global_context = GlobalContext::new( + false, + CHAIN_ID_TESTNET, + conn, + LimitedCostTracker::new_free(), + epoch, + ); + let contract_ast = build_ast(&contract_identifier, source, &mut (), version, epoch) + .expect("contract source should parse"); + + global_context + .execute(|g| { + Contract::initialize_from_ast(contract_identifier, &contract_ast, None, g, version) + }) + .expect("contract source should initialize") + } + + fn initialize_contract(source: &str, contract_name: &str) -> Contract { + let mut marf = MemoryBackingStore::new(); + initialize_contract_with_store(&mut marf, source, contract_name) + } + + #[test] + fn resident_bytes_matches_empty_contract_context() { + let contract_identifier = + QualifiedContractIdentifier::local("resident-bytes-empty").unwrap(); + let contract = Contract { + contract_context: ContractContext::new(contract_identifier, ClarityVersion::Clarity2), + }; + + assert!(contract.contract_context.variables.is_empty()); + assert!(contract.contract_context.functions.is_empty()); + assert!(contract.contract_context.defined_traits.is_empty()); + assert!(contract.contract_context.implemented_traits.is_empty()); + assert!(contract.contract_context.persisted_names.is_empty()); + assert!(contract.contract_context.meta_data_map.is_empty()); + assert!(contract.contract_context.meta_data_var.is_empty()); + assert!(contract.contract_context.meta_nft.is_empty()); + assert!(contract.contract_context.meta_ft.is_empty()); + + assert_contract_bytes_consistent(&contract); + assert_eq!( + contract.heap_bytes(), + contract.contract_context.contract_identifier.heap_bytes() + ); + } + + #[test] + fn resident_bytes_covers_all_fields_in_rich_contract() { + let contract = initialize_contract( + r#" + (define-data-var counter uint u0) + (define-map balances { owner: principal } { amount: uint, memo: (string-ascii 32) }) + (define-constant label "resident-bytes") + + (define-private (helper (amount uint)) + (begin + (var-set counter (+ (var-get counter) amount)) + (ok (var-get counter)))) + + (define-read-only (lookup (owner principal)) + (default-to { amount: u0, memo: "none" } + (map-get? balances { owner: owner }))) + + (define-public (store (owner principal) (amount uint)) + (begin + (map-set balances { owner: owner } { amount: amount, memo: "cache-entry" }) + (try! (helper amount)) + (ok true))) + "#, + "resident-bytes-rich", + ); + + assert_eq!(contract.contract_context.variables.len(), 1); + assert_eq!(contract.contract_context.functions.len(), 3); + assert_eq!(contract.contract_context.meta_data_map.len(), 1); + assert_eq!(contract.contract_context.meta_data_var.len(), 1); + assert!(contract.contract_context.persisted_names.len() >= 2); + + assert_contract_bytes_consistent(&contract); + + // Magnitude check: a contract with 3 functions, a map, a var, and a constant must have + // substantial heap allocation beyond the bare struct size. + assert!( + contract.resident_bytes() > size_of::() + 1000, + "rich contract resident_bytes ({}) should exceed struct size + 1000", + contract.resident_bytes() + ); + } + + #[test] + fn resident_bytes_counts_ft_nft_and_traits() { + let contract = initialize_contract( + r#" + (define-fungible-token gold) + (define-fungible-token silver u1000000) + (define-non-fungible-token deed uint) + (define-non-fungible-token badge { class: uint, level: uint }) + (define-trait transferable ( + (transfer (uint principal principal) (response bool uint)) + (get-balance (principal) (response uint uint)))) + "#, + "resident-bytes-ft-nft-trait", + ); + + assert_eq!(contract.contract_context.meta_ft.len(), 2); + assert_eq!(contract.contract_context.meta_nft.len(), 2); + assert_eq!(contract.contract_context.defined_traits.len(), 1); + + assert_contract_bytes_consistent(&contract); + + // meta_nft contains a tuple key type (badge) — verify it contributes heap bytes + let nft_heap: usize = contract + .contract_context + .meta_nft + .values() + .map(|m| m.heap_bytes()) + .sum(); + assert!( + nft_heap > 0, + "NFT metadata with tuple key type should have non-zero heap bytes" + ); + + // defined_traits contains function signatures — verify they contribute heap bytes + let trait_heap: usize = contract + .contract_context + .defined_traits + .values() + .map(|m| m.heap_bytes()) + .sum(); + assert!( + trait_heap > 0, + "defined traits with function signatures should have non-zero heap bytes" + ); + } + + #[test] + fn resident_bytes_counts_implemented_traits() { + let mut marf = MemoryBackingStore::new(); + + // First contract defines the trait + let _trait_contract = initialize_contract_with_store( + &mut marf, + r#" + (define-trait transferable ( + (transfer (uint principal principal) (response bool uint)))) + "#, + "trait-definer", + ); + + // Second contract implements the trait (requires the first to be deployed) + let impl_contract = initialize_contract_with_store( + &mut marf, + r#" + (impl-trait .trait-definer.transferable) + (define-public (transfer (id uint) (from principal) (to principal)) + (ok true)) + "#, + "trait-impl", + ); + + assert_eq!(impl_contract.contract_context.implemented_traits.len(), 1); + + assert_contract_bytes_consistent(&impl_contract); + + // implemented_traits contains a TraitIdentifier; verify non-zero heap + let impl_heap = impl_contract + .contract_context + .implemented_traits + .heap_bytes(); + assert!( + impl_heap > 0, + "implemented_traits with a TraitIdentifier should have non-zero heap bytes" + ); + } + + #[test] + fn resident_bytes_grows_with_additional_initialized_content() { + let single_function = initialize_contract( + r#" + (define-public (echo (value uint)) + (ok value)) + "#, + "resident-bytes-single-fn", + ); + let many_functions = initialize_contract( + r#" + (define-private (double (value uint)) (+ value value)) + (define-private (triple (value uint)) (+ value (+ value value))) + (define-read-only (project (value uint)) + { original: value, doubled: (double value), tripled: (triple value) }) + (define-public (accumulate (a uint) (b uint) (c uint)) + (let ( + (first (double a)) + (second (triple b)) + (third (+ c u7))) + (ok (+ first (+ second third))))) + "#, + "resident-bytes-many-fns", + ); + + assert_contract_bytes_consistent(&single_function); + assert_contract_bytes_consistent(&many_functions); + assert!( + many_functions.contract_context.functions.len() + > single_function.contract_context.functions.len() + ); + assert!(many_functions.heap_bytes() > single_function.heap_bytes()); + assert!(many_functions.resident_bytes() > single_function.resident_bytes()); + } +} diff --git a/clarity/src/vm/database/structures.rs b/clarity/src/vm/database/structures.rs index 9741f3fb4ff..787a5a58486 100644 --- a/clarity/src/vm/database/structures.rs +++ b/clarity/src/vm/database/structures.rs @@ -16,6 +16,7 @@ use std::io::Write; +use clarity_types::resident_bytes::ResidentBytes; use serde::Deserialize; use stacks_common::util::hash::{hex_bytes, to_hex}; @@ -105,6 +106,30 @@ pub struct DataVariableMetadata { clarity_serializable!(DataVariableMetadata); +impl ResidentBytes for FungibleTokenMetadata { + fn heap_bytes(&self) -> usize { + 0 // Option — no heap allocation + } +} + +impl ResidentBytes for NonFungibleTokenMetadata { + fn heap_bytes(&self) -> usize { + self.key_type.heap_bytes() + } +} + +impl ResidentBytes for DataMapMetadata { + fn heap_bytes(&self) -> usize { + self.key_type.heap_bytes() + self.value_type.heap_bytes() + } +} + +impl ResidentBytes for DataVariableMetadata { + fn heap_bytes(&self) -> usize { + self.value_type.heap_bytes() + } +} + #[derive(Serialize, Deserialize)] pub struct ContractMetadata { pub contract: Contract, @@ -1475,3 +1500,60 @@ impl STXBalance { )? >= amount) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn heap_allocating_type() -> TypeSignature { + TypeSignature::OptionalType(Box::new(TypeSignature::UIntType)) + } + + #[test] + fn resident_bytes_fungible_token_metadata_has_no_heap() { + let none = FungibleTokenMetadata { total_supply: None }; + let some = FungibleTokenMetadata { + total_supply: Some(1), + }; + + assert_eq!(none.heap_bytes(), 0); + assert_eq!(some.heap_bytes(), 0); + } + + #[test] + fn resident_bytes_non_fungible_token_metadata_counts_key_type() { + let metadata = NonFungibleTokenMetadata { + key_type: heap_allocating_type(), + }; + + assert_eq!(metadata.heap_bytes(), metadata.key_type.heap_bytes()); + assert!(metadata.heap_bytes() > 0); + } + + #[test] + fn resident_bytes_data_map_metadata_counts_key_and_value_types() { + let metadata = DataMapMetadata { + key_type: heap_allocating_type(), + value_type: TypeSignature::ResponseType(Box::new(( + TypeSignature::BoolType, + TypeSignature::UIntType, + ))), + }; + + assert_eq!( + metadata.heap_bytes(), + metadata.key_type.heap_bytes() + metadata.value_type.heap_bytes() + ); + assert!(metadata.heap_bytes() > 0); + } + + #[test] + fn resident_bytes_data_variable_metadata_counts_value_type() { + let metadata = DataVariableMetadata { + value_type: heap_allocating_type(), + }; + + assert_eq!(metadata.heap_bytes(), metadata.value_type.heap_bytes()); + assert!(metadata.heap_bytes() > 0); + } +} diff --git a/clarity/src/vm/types/signatures.rs b/clarity/src/vm/types/signatures.rs index 62659be5daa..b2ef4a5cb1f 100644 --- a/clarity/src/vm/types/signatures.rs +++ b/clarity/src/vm/types/signatures.rs @@ -18,6 +18,7 @@ use std::collections::BTreeMap; use std::fmt; use clarity_types::ClarityTypeError; +use clarity_types::resident_bytes::ResidentBytes; pub use clarity_types::types::Value; pub use clarity_types::types::signatures::{ AssetIdentifier, BufferLength, CallableSubtype, ListTypeData, SequenceSubtype, StringSubtype, @@ -40,6 +41,12 @@ pub struct FunctionSignature { pub returns: TypeSignature, } +impl ResidentBytes for FunctionSignature { + fn heap_bytes(&self) -> usize { + self.args.heap_bytes() + self.returns.heap_bytes() + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FixedFunction { pub args: Vec, @@ -811,3 +818,25 @@ mod test { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resident_bytes_function_signature_counts_args_and_return() { + let signature = FunctionSignature { + args: vec![ + TypeSignature::PrincipalType, + TypeSignature::OptionalType(Box::new(TypeSignature::UIntType)), + ], + returns: TypeSignature::OptionalType(Box::new(TypeSignature::BoolType)), + }; + + assert_eq!( + signature.heap_bytes(), + signature.args.heap_bytes() + signature.returns.heap_bytes() + ); + assert!(signature.heap_bytes() > 0); + } +} diff --git a/stacks-common/src/util/macros.rs b/stacks-common/src/util/macros.rs index 3b272d17638..2dbdb74d277 100644 --- a/stacks-common/src/util/macros.rs +++ b/stacks-common/src/util/macros.rs @@ -234,6 +234,11 @@ macro_rules! guarded_string { self.len() == 0 } + /// Returns the heap capacity of the backing `String` buffer. + pub fn heap_capacity(&self) -> usize { + self.0.capacity() + } + /// The caller must guarantee that the conversion will succeed, because the method /// will panic otherwise. This is made for converting `&str` into things /// like `ClarityName`s, where the source value is hardcoded and thus it's visible