diff --git a/changelog.d/7049-clarity-type-size-approximation.added b/changelog.d/7049-clarity-type-size-approximation.added
new file mode 100644
index 00000000000..b79a3077027
--- /dev/null
+++ b/changelog.d/7049-clarity-type-size-approximation.added
@@ -0,0 +1 @@
+New `ResidentBytes` trait for types which can approximate their resident memory size (stack+heap)
\ No newline at end of file
diff --git a/changelog.d/README.md b/changelog.d/README.md
index d29d8d5c7ad..3e4f4bd8632 100644
--- a/changelog.d/README.md
+++ b/changelog.d/README.md
@@ -24,7 +24,7 @@ CHANGELOG.md.
2. Write the changelog entry text in the file (one or more lines of markdown):
- ```
+ ```text
Added `marf_compress` as a node configuration parameter to enable MARF compression feature ([#6811](https://github.com/stacks-network/stacks-core/pull/6811))
```
diff --git a/clarity-types/src/lib.rs b/clarity-types/src/lib.rs
index 1b1bef2c501..ca9cb1f286c 100644
--- a/clarity-types/src/lib.rs
+++ b/clarity-types/src/lib.rs
@@ -26,6 +26,7 @@ pub use stacks_common::{
pub mod errors;
pub mod representations;
+pub mod resident_bytes;
pub mod types;
pub use errors::{ClarityTypeError, IncomparableError};
diff --git a/clarity-types/src/resident_bytes.rs b/clarity-types/src/resident_bytes.rs
new file mode 100644
index 00000000000..8935bf5459d
--- /dev/null
+++ b/clarity-types/src/resident_bytes.rs
@@ -0,0 +1,1201 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::mem::size_of;
+use std::sync::Arc;
+
+#[cfg(feature = "developer-mode")]
+use crate::representations::Span;
+use crate::representations::{
+ ClarityName, ContractName, SymbolicExpression, SymbolicExpressionType, TraitDefinition,
+};
+use crate::types::signatures::{
+ BufferLength, CallableSubtype, ListTypeData, SequenceSubtype, StringSubtype, StringUTF8Length,
+ TupleTypeSignature, TypeSignature,
+};
+use crate::types::{
+ ASCIIData, BuffData, CallableData, CharType, FunctionIdentifier, ListData, OptionalData,
+ PrincipalData, QualifiedContractIdentifier, ResponseData, SequenceData, StandardPrincipalData,
+ TraitIdentifier, TupleData, UTF8Data, Value,
+};
+
+/// Estimated overhead for `Arc`: `strong + weak counts + allocation header`.
+const ARC_OVERHEAD: usize = 16;
+
+// The `btree` and `hashmap` modules below contain heuristic constants derived from std's internal
+// implementations (as of Rust 1.94 / hashbrown 0.15). They provide reasonable estimates of
+// structural overhead, not exact byte counts.
+
+/// Layout constants for `std::collections::BTreeMap` / `BTreeSet`.
+///
+/// BTreeMap uses `B=6`, so nodes hold up to `2*B-1 = 11` entries. Leaf nodes store keys+values;
+/// internal nodes add 12 edge pointers. ~32 bytes overhead per node (metadata + allocator header),
+/// ~2/3 average fill (~7 entries/node, ~8 children for internal nodes).
+mod btree {
+ use std::mem::size_of;
+
+ /// Maximum entries per node (`B=6` → `2*B-1 = 11`).
+ pub const NODE_CAPACITY: usize = 11;
+ /// Estimated average entries per node in a steady-state B-tree (~2/3 fill).
+ pub const AVERAGE_FILL: usize = 7;
+ /// Average children per internal node at ~2/3 fill.
+ pub const AVG_FANOUT: usize = AVERAGE_FILL + 1;
+ /// Per-node overhead: `(parent ptr + idx + len + padding) + allocator header`.
+ pub const NODE_OVERHEAD: usize = 32;
+ /// Additional per-node size for internal nodes: `[MaybeUninit>; CAPACITY + 1]`.
+ pub const EDGE_ARRAY_SIZE: usize = (NODE_CAPACITY + 1) * size_of::();
+
+ /// Estimate total BTree node count (leaves + internal) and how many are internal.
+ pub fn node_counts(len: usize) -> (usize, usize) {
+ let leaves = len.div_ceil(AVERAGE_FILL);
+ let mut internal = 0;
+ let mut children_at_level = leaves;
+ while children_at_level > 1 {
+ let parents = children_at_level.div_ceil(AVG_FANOUT);
+ internal += parents;
+ children_at_level = parents;
+ }
+ (leaves + internal, internal)
+ }
+
+ #[cfg(test)]
+ mod tests {
+ use super::*;
+
+ #[test]
+ fn btree_node_counts() {
+ // 7 entries: 1 leaf, 0 internal
+ assert_eq!(node_counts(7), (1, 0));
+ // 12 entries: 2 leaves + 1 internal root
+ let (total, internal) = node_counts(12);
+ assert_eq!(total, 3);
+ assert_eq!(internal, 1);
+ // 0 entries edge case
+ assert_eq!(node_counts(0), (0, 0));
+ }
+ }
+}
+
+/// Layout constants for [`HashMap`] / [`HashSet`] (hashbrown-backed since Rust 1.36).
+///
+/// `hashbrown` uses a 7/8 max load factor and 1-byte control tags per bucket.
+///
+/// The control array is padded by `Group::WIDTH` (4/8/16 depending on SIMD support); we use 16 as
+/// an upper bound.
+mod hashmap {
+ /// Inverse of `hashbrown`'s max load factor (7/8), as a fraction: `buckets ~= (capacity * 8/7)`.
+ pub const LOAD_FACTOR_INV_NUM: usize = 8;
+ pub const LOAD_FACTOR_INV_DEN: usize = 7;
+ /// Upper bound for SIMD group-width padding. In hashbrown 0.15, Group::WIDTH varies by target
+ /// and implementation (4/8/16 bytes), so we use 16 as a conservative upper bound for
+ /// control-byte padding overhead.
+ pub const CONTROL_GROUP_PADDING: usize = 16;
+
+ /// Calculate the number of buckets for a given `HashMap` capacity, based on hashbrown's growth
+ /// strategy and load factor.
+ pub fn buckets_for_capacity(cap: usize) -> usize {
+ (cap * LOAD_FACTOR_INV_NUM).div_ceil(LOAD_FACTOR_INV_DEN)
+ }
+}
+
+/// Approximate in-memory footprint, in bytes.
+///
+/// Split into [`heap_bytes()`](Self::heap_bytes) (children only) and
+/// [`resident_bytes()`](Self::resident_bytes) (inline + heap) to avoid double-counting in nested
+/// types — containers call `heap_bytes()` on children, only the outermost caller should use
+/// `resident_bytes()`.
+pub trait ResidentBytes: Sized {
+ /// Total approximate memory footprint of this instance.
+ ///
+ /// Default implementation: [`size_of::()`](size_of) (inline size) +
+ /// [`heap_bytes()`](Self::heap_bytes) (additional heap allocations).
+ fn resident_bytes(&self) -> usize {
+ // Note: if we ever need to support unsized types, we should switch to size_of_val(self)
+ // here instead of size_of::() and remove the Sized trait bound.
+ std::mem::size_of::() + self.heap_bytes()
+ }
+
+ /// Heap allocations only, beyond the inline size reported by [`size_of::()`](size_of).
+ fn heap_bytes(&self) -> usize;
+}
+
+impl ResidentBytes for String {
+ fn heap_bytes(&self) -> usize {
+ self.capacity()
+ }
+}
+
+impl ResidentBytes for Vec {
+ fn heap_bytes(&self) -> usize {
+ // Backing array: capacity slots (inline size per slot)
+ let backing = self.capacity() * size_of::();
+
+ // Children's heap allocations
+ let children: usize = self.iter().map(|v| v.heap_bytes()).sum();
+
+ // Total heap
+ backing + children
+ }
+}
+
+impl ResidentBytes for Box {
+ fn heap_bytes(&self) -> usize {
+ // Box heap-allocates the pointee: its inline size + its own heap
+ size_of::() + (**self).heap_bytes()
+ }
+}
+
+impl ResidentBytes for Option {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ // For Some, the T is inline in the Option; only count T's heap
+ Some(v) => v.heap_bytes(),
+ None => 0,
+ }
+ }
+}
+
+impl ResidentBytes for Arc {
+ fn heap_bytes(&self) -> usize {
+ // Counts the Arc allocation (header + pointee). Shared backing may be overcounted if
+ // multiple Arc handles to the same allocation are reachable in one measured graph.
+ ARC_OVERHEAD + size_of::() + (**self).heap_bytes()
+ }
+}
+
+impl ResidentBytes for HashMap {
+ fn heap_bytes(&self) -> usize {
+ let cap = self.capacity();
+ if cap == 0 {
+ // HashMap::new() does not allocate until first insert.
+ return 0;
+ }
+
+ let buckets = hashmap::buckets_for_capacity(cap);
+ let backing = buckets * size_of::<(K, V)>() + buckets + hashmap::CONTROL_GROUP_PADDING;
+
+ // Children's heap allocations (only for occupied entries)
+ let children: usize = self
+ .iter()
+ .map(|(k, v)| k.heap_bytes() + v.heap_bytes())
+ .sum();
+
+ backing + children
+ }
+}
+
+impl ResidentBytes for BTreeMap {
+ fn heap_bytes(&self) -> usize {
+ if self.is_empty() {
+ return 0; // Empty BTreeMaps do not allocate on the heap.
+ }
+
+ let (total_nodes, internal_nodes) = btree::node_counts(self.len());
+
+ // Base node size (shared by leaf and internal): overhead + key/value arrays
+ let leaf_size = btree::NODE_OVERHEAD
+ + (btree::NODE_CAPACITY * size_of::())
+ + (btree::NODE_CAPACITY * size_of::());
+ // Internal nodes additionally carry an edge pointer array
+ let structural = total_nodes * leaf_size + internal_nodes * btree::EDGE_ARRAY_SIZE;
+
+ // Children's heap allocations (only for occupied entries)
+ let children: usize = self
+ .iter()
+ .map(|(k, v)| k.heap_bytes() + v.heap_bytes())
+ .sum();
+
+ structural + children
+ }
+}
+
+impl ResidentBytes for BTreeSet {
+ fn heap_bytes(&self) -> usize {
+ if self.is_empty() {
+ return 0;
+ }
+
+ let (total_nodes, internal_nodes) = btree::node_counts(self.len());
+
+ // BTreeSet is backed by BTreeMap — vals array is zero-size
+ let leaf_size = btree::NODE_OVERHEAD + (btree::NODE_CAPACITY * size_of::());
+ let structural = total_nodes * leaf_size + internal_nodes * btree::EDGE_ARRAY_SIZE;
+ let children: usize = self.iter().map(|v| v.heap_bytes()).sum();
+ structural + children
+ }
+}
+
+impl ResidentBytes for HashSet {
+ fn heap_bytes(&self) -> usize {
+ let cap = self.capacity();
+ if cap == 0 {
+ return 0;
+ }
+
+ let buckets = hashmap::buckets_for_capacity(cap);
+ let backing = buckets * size_of::() + buckets + hashmap::CONTROL_GROUP_PADDING;
+ let children: usize = self.iter().map(|v| v.heap_bytes()).sum();
+ backing + children
+ }
+}
+
+impl ResidentBytes for (A, B) {
+ fn heap_bytes(&self) -> usize {
+ self.0.heap_bytes() + self.1.heap_bytes()
+ }
+}
+
+// Primitive types: no heap allocation (stack-only)
+
+impl ResidentBytes for bool {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+impl ResidentBytes for u8 {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+impl ResidentBytes for u32 {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+impl ResidentBytes for u64 {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+impl ResidentBytes for u128 {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+impl ResidentBytes for i128 {
+ fn heap_bytes(&self) -> usize {
+ 0
+ }
+}
+
+impl ResidentBytes for Value {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ Value::Int(_) | Value::UInt(_) | Value::Bool(_) => 0,
+ Value::Sequence(data) => data.heap_bytes(),
+ Value::Principal(data) => data.heap_bytes(),
+ Value::Tuple(data) => data.heap_bytes(),
+ Value::Optional(data) => data.heap_bytes(),
+ Value::Response(data) => data.heap_bytes(),
+ Value::CallableContract(data) => data.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for SequenceData {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ SequenceData::Buffer(buf) => buf.heap_bytes(),
+ SequenceData::List(list) => list.heap_bytes(),
+ SequenceData::String(char_type) => char_type.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for BuffData {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes()
+ }
+}
+
+impl ResidentBytes for ListData {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes() + self.type_signature.heap_bytes()
+ }
+}
+
+impl ResidentBytes for CharType {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ CharType::ASCII(data) => data.heap_bytes(),
+ CharType::UTF8(data) => data.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for ASCIIData {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes()
+ }
+}
+
+impl ResidentBytes for UTF8Data {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes()
+ }
+}
+
+impl ResidentBytes for TupleData {
+ fn heap_bytes(&self) -> usize {
+ self.type_signature.heap_bytes() + self.data_map.heap_bytes()
+ }
+}
+
+impl ResidentBytes for OptionalData {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes()
+ }
+}
+
+impl ResidentBytes for ResponseData {
+ fn heap_bytes(&self) -> usize {
+ self.data.heap_bytes()
+ }
+}
+
+impl ResidentBytes for CallableData {
+ fn heap_bytes(&self) -> usize {
+ self.contract_identifier.heap_bytes() + self.trait_identifier.heap_bytes()
+ }
+}
+
+impl ResidentBytes for PrincipalData {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ PrincipalData::Standard(data) => data.heap_bytes(),
+ PrincipalData::Contract(data) => data.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for StandardPrincipalData {
+ fn heap_bytes(&self) -> usize {
+ 0 // Fixed-size: u8 + [u8; 20], no heap allocation
+ }
+}
+
+impl ResidentBytes for QualifiedContractIdentifier {
+ fn heap_bytes(&self) -> usize {
+ self.issuer.heap_bytes() + self.name.heap_bytes()
+ }
+}
+
+impl ResidentBytes for ClarityName {
+ fn heap_bytes(&self) -> usize {
+ self.heap_capacity()
+ }
+}
+
+impl ResidentBytes for ContractName {
+ fn heap_bytes(&self) -> usize {
+ self.heap_capacity()
+ }
+}
+
+impl ResidentBytes for TraitIdentifier {
+ fn heap_bytes(&self) -> usize {
+ self.name.heap_bytes() + self.contract_identifier.heap_bytes()
+ }
+}
+
+impl ResidentBytes for FunctionIdentifier {
+ fn heap_bytes(&self) -> usize {
+ self.heap_capacity()
+ }
+}
+
+impl ResidentBytes for TypeSignature {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ TypeSignature::NoType
+ | TypeSignature::IntType
+ | TypeSignature::UIntType
+ | TypeSignature::BoolType
+ | TypeSignature::PrincipalType => 0,
+ TypeSignature::SequenceType(subtype) => subtype.heap_bytes(),
+ TypeSignature::TupleType(tuple_sig) => tuple_sig.heap_bytes(),
+ TypeSignature::OptionalType(inner) => inner.heap_bytes(),
+ TypeSignature::ResponseType(inner) => inner.heap_bytes(),
+ TypeSignature::CallableType(subtype) => subtype.heap_bytes(),
+ TypeSignature::ListUnionType(set) => set.heap_bytes(),
+ TypeSignature::TraitReferenceType(trait_id) => trait_id.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for TupleTypeSignature {
+ fn heap_bytes(&self) -> usize {
+ // TupleTypeSignature wraps Arc>. get_type_map()
+ // returns &BTreeMap — count Arc overhead + map header + contents.
+ let map_header = size_of::>();
+ ARC_OVERHEAD + map_header + self.get_type_map().heap_bytes()
+ }
+}
+
+impl ResidentBytes for SequenceSubtype {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ SequenceSubtype::BufferType(len) => len.heap_bytes(),
+ SequenceSubtype::ListType(list) => list.heap_bytes(),
+ SequenceSubtype::StringType(string) => string.heap_bytes(),
+ }
+ }
+}
+
+impl ResidentBytes for ListTypeData {
+ fn heap_bytes(&self) -> usize {
+ // max_len: u32 (no heap), entry_type: Box
+ size_of::() + self.get_list_item_type().heap_bytes()
+ }
+}
+
+impl ResidentBytes for StringSubtype {
+ fn heap_bytes(&self) -> usize {
+ 0 // Both variants (ASCII, UTF8) contain only u32 newtypes
+ }
+}
+
+impl ResidentBytes for BufferLength {
+ fn heap_bytes(&self) -> usize {
+ 0 // u32 newtype
+ }
+}
+
+impl ResidentBytes for StringUTF8Length {
+ fn heap_bytes(&self) -> usize {
+ 0 // u32 newtype
+ }
+}
+
+impl ResidentBytes for CallableSubtype {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ CallableSubtype::Principal(id) => id.heap_bytes(),
+ CallableSubtype::Trait(trait_id) => trait_id.heap_bytes(),
+ }
+ }
+}
+
+#[cfg(feature = "developer-mode")]
+impl ResidentBytes for Span {
+ fn heap_bytes(&self) -> usize {
+ 0 // 4 × u32, all inline
+ }
+}
+
+impl ResidentBytes for SymbolicExpression {
+ fn heap_bytes(&self) -> usize {
+ #[allow(unused_mut)]
+ let mut total = self.expr.heap_bytes();
+ // id is u64 — no heap allocation
+
+ #[cfg(feature = "developer-mode")]
+ {
+ // span is inline (no heap), but pre_comments, end_line_comment, and
+ // post_comments have heap allocations via Vec/String.
+ total += self.pre_comments.heap_bytes();
+ total += self.end_line_comment.heap_bytes();
+ total += self.post_comments.heap_bytes();
+ }
+
+ total
+ }
+}
+
+impl ResidentBytes for SymbolicExpressionType {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ SymbolicExpressionType::AtomValue(value)
+ | SymbolicExpressionType::LiteralValue(value) => value.heap_bytes(),
+ SymbolicExpressionType::Atom(name) => name.heap_bytes(),
+ SymbolicExpressionType::List(exprs) => exprs.heap_bytes(),
+ SymbolicExpressionType::Field(trait_id) => trait_id.heap_bytes(),
+ SymbolicExpressionType::TraitReference(name, defn) => {
+ name.heap_bytes() + defn.heap_bytes()
+ }
+ }
+ }
+}
+
+impl ResidentBytes for TraitDefinition {
+ fn heap_bytes(&self) -> usize {
+ match self {
+ TraitDefinition::Defined(id) | TraitDefinition::Imported(id) => id.heap_bytes(),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ mod primitives {
+ use super::*;
+
+ #[test]
+ fn primitive_heap_bytes_zero() {
+ assert_eq!(true.heap_bytes(), 0);
+ assert_eq!(0u8.heap_bytes(), 0);
+ assert_eq!(0u32.heap_bytes(), 0);
+ assert_eq!(0u64.heap_bytes(), 0);
+ assert_eq!(0u128.heap_bytes(), 0);
+ assert_eq!(0i128.heap_bytes(), 0);
+ }
+
+ #[test]
+ fn u64_resident_bytes() {
+ let v: u64 = 42;
+ assert_eq!(v.resident_bytes(), 8);
+ assert_eq!(v.heap_bytes(), 0);
+ }
+ }
+
+ mod std_containers {
+ use super::*;
+
+ const HASHMAP_CAPACITY_TRANSITIONS: &[(usize, usize, usize)] = &[
+ (0, 0, 0),
+ (1, 3, 4),
+ (4, 7, 8),
+ (8, 14, 16),
+ (15, 28, 32),
+ (29, 56, 64),
+ (57, 112, 128),
+ (113, 224, 256),
+ (225, 448, 512),
+ ];
+
+ #[test]
+ fn string() {
+ let s = String::from("hello world");
+ assert!(s.resident_bytes() >= size_of::() + 11);
+ assert!(s.heap_bytes() >= 11);
+ }
+
+ #[test]
+ fn vec() {
+ let v: Vec = vec![1, 2, 3, 4, 5];
+ assert!(v.heap_bytes() >= 40);
+ assert!(v.resident_bytes() >= size_of::>() + 40);
+ }
+
+ #[test]
+ fn boxed() {
+ let b = Box::new(String::from("boxed"));
+ assert!(b.heap_bytes() >= size_of::() + 5);
+ }
+
+ #[test]
+ fn option_none() {
+ let opt: Option> = None;
+ assert_eq!(opt.heap_bytes(), 0);
+ assert!(opt.resident_bytes() >= size_of::