Skip to content
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7f94df2
add squash logic and squash-aware trie lookups
francesco-stacks Mar 30, 2026
04f3aad
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Mar 31, 2026
d142bf5
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Apr 2, 2026
60e19dc
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Apr 2, 2026
3ac2225
add changelog fragment
francesco-stacks Apr 2, 2026
a4007b4
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Apr 27, 2026
eb62b95
Merge branch 'upstream-develop' into feat/marf-squash-engine
francesco-stacks May 8, 2026
8eb6275
refactor: stream blobs child-before-parent like dump_consume
francesco-stacks May 8, 2026
5db9691
refactor: remove unused code
francesco-stacks May 8, 2026
8b8832d
use is_squashed where possible
francesco-stacks May 11, 2026
70eafc0
use is_inline_child_ptr where possible
francesco-stacks May 11, 2026
cb393af
merge update_inline_child_ptrs and remap_ptrs_to_blob_offsets
francesco-stacks May 11, 2026
d38f8bd
feat: disable proofs for squashed marfs
francesco-stacks May 11, 2026
50d601b
feat: disable blocks lookups before squash height
francesco-stacks May 11, 2026
56cba88
error if output marf db or blobs already exist
francesco-stacks May 11, 2026
23ba282
varius nits and improved tests
francesco-stacks May 11, 2026
ce22a2a
rename InlineOnlyBlockMap -> BackptrFreeBlockMap
francesco-stacks May 11, 2026
0d24e87
fix: atomically commit squash finalization in a single sql transaction
francesco-stacks May 11, 2026
4e6dcad
remove compute_blob_offsets; stream_squash_blob computes its own offsets
francesco-stacks May 11, 2026
c207c61
reject leaf iteration below squash height
francesco-stacks May 11, 2026
67a8ffa
build NodeStore temp paths with PathBuf
francesco-stacks May 11, 2026
6eb0265
track writer offsets without per-node stream_position
francesco-stacks May 11, 2026
88d8a06
split MARF squash internals
francesco-stacks May 11, 2026
06a84b3
Merge branch 'develop' into feat/marf-squash-engine
francesco-stacks May 11, 2026
f1736df
crc: clear db and blobs on failure. improve logged info
francesco-stacks May 12, 2026
6e175b8
crc: user a single writer
francesco-stacks May 12, 2026
71ed7da
avoid copying hash when possible
francesco-stacks May 12, 2026
a642873
various nits
francesco-stacks May 12, 2026
e820a0e
crc: rename update_inline_child_ptrs -> resolve_inline_child_offsets
francesco-stacks May 12, 2026
a7e5652
simplify some gates for squashed marf
francesco-stacks May 12, 2026
20739d6
add read_squashed_block_root_hash_by_hash and give more consistent na…
francesco-stacks May 12, 2026
70a1aa1
crc: query Trie before SQL side tables
francesco-stacks May 12, 2026
db15da7
crc: remove redundant squash guard
francesco-stacks May 12, 2026
35f9d01
clippy
francesco-stacks May 12, 2026
cd0a433
crc: add cleanup log on error
francesco-stacks May 13, 2026
13186f1
crc: internalize reader into NodeStore
francesco-stacks May 13, 2026
2289f6b
Merge branch 'develop' into feat/marf-squash-engine
francesco-stacks May 13, 2026
fcb3f3a
crc: re-add comment
francesco-stacks May 13, 2026
820e32e
crc: remove old retry and open Reader at creation time
francesco-stacks May 13, 2026
6324b34
skip redundand seek in overwrite_node, collapse finish_writing and flush
francesco-stacks May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/marf-squash-engine.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights
57 changes: 54 additions & 3 deletions stackslib/src/chainstate/stacks/index/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
use sha2::{Digest, Sha512_256 as TrieHasher};

use crate::chainstate::stacks::index::node::{
clear_compressed, clear_ctrl_bits, is_compressed, ptrs_fmt, ConsensusSerializable, TrieNode,
TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, TrieNodeType,
TriePtr,
clear_compressed, clear_ctrl_bits, is_backptr, is_compressed, ptrs_fmt, ConsensusSerializable,
TrieNode, TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch,
TrieNodeType, TriePtr,
};
use crate::chainstate::stacks::index::storage::TrieStorageConnection;
use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieLeaf};
Expand Down Expand Up @@ -107,6 +107,13 @@ pub fn get_ptrs_byte_len(ptrs: &[TriePtr]) -> usize {
node_id_len + ptrs.iter().map(TriePtr::encoded_size).sum::<usize>()
}

/// Returns `true` when a pointer is an inline child: non-empty and not a
/// backpointer to an ancestor block.
#[inline]
pub fn is_inline_child_ptr(ptr: &TriePtr) -> bool {
!ptr.is_empty() && !is_backptr(ptr.id())
}

/// Helper to determine a sparse TriePtr list's bitmap size, given the node ID's numeric value.
/// Returns Some(size) if the node identified node type has ptrs
/// Returns None if `id` is a `Leaf`, `Patch`, or `Empty` node, or is unrecognized.
Expand Down Expand Up @@ -789,6 +796,50 @@ pub fn get_node_byte_len_compressed(node: &TrieNodeType) -> usize {
hash_len + node_byte_len
}

/// Compute the worst-case on-disk size for a root node that is reserved before
/// its descendants are written.
///
/// The base size is calculated with the root's current child pointer values.
/// Each inline child pointer may later widen from u32 to u64 once its final
/// file offset is known.
pub fn reserved_root_size(base_len: usize, ptrs: &[TriePtr]) -> Result<u64, Error> {
let base_len = base_len as u64;
let inline_count = ptrs.iter().filter(|p| is_inline_child_ptr(p)).count() as u64;
let inline_ptr_growth = inline_count.checked_mul(4).ok_or(Error::OverflowError)?;

base_len
.checked_add(inline_ptr_growth)
.ok_or(Error::OverflowError)
}

/// Rewrite inline child pointers from in-memory node indices to blob-local
/// byte offsets. Backpointers and empty pointers are left untouched.
pub fn update_inline_child_ptrs(ptrs: &mut [TriePtr], file_offsets: &[u64]) -> Result<(), Error> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not to start bikeshedding, but ... any good idea for a name for this function that makes it more obvious what's happening? "Update" is pretty generic and meaningless, especially considering how consequential the data modification is that it performs.

(I know you didn't actually add this function here, you just moved it)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed in e820a0e

for ptr in ptrs.iter_mut() {
if !is_inline_child_ptr(ptr) {
continue;
}

let child_idx = ptr.try_ptr_into_usize()?;
let Some(&offset) = file_offsets.get(child_idx) else {
return Err(Error::CorruptionError(format!(
"inline child index {child_idx} out of bounds"
)));
};
// 0 is the sentinel for "not yet placed": valid offsets are always
// past the blob header.
if offset == 0 {
return Err(Error::CorruptionError(format!(
"inline child index {child_idx} has not been written"
)));
}

ptr.ptr = offset;
}

Ok(())
}

/// Write all the bytes for a node, including its hash, to the given Writeable object.
/// The list of child pointers will NOT be compressed.
/// Returns Ok(nw) on success, where `nw` is the number of bytes written.
Expand Down
70 changes: 69 additions & 1 deletion stackslib/src/chainstate/stacks/index/marf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use rusqlite::{Connection, Transaction};
use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE};
use stacks_common::util::hash::Sha512Trunc256Sum;

pub use super::squash::SquashStats;
use super::storage::ReopenedTrieStorageConnection;
use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash};
use crate::chainstate::stacks::index::node::{
Expand All @@ -35,7 +36,9 @@ use crate::chainstate::stacks::index::storage::{
TrieStorageTransaction,
};
use crate::chainstate::stacks::index::trie::Trie;
use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof};
use crate::chainstate::stacks::index::{
trie_sql, Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof,
};
use crate::util_lib::db::Error as db_error;

pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT";
Expand Down Expand Up @@ -208,6 +211,10 @@ pub trait MarfConnection<T: MarfTrieId> {
key: &str,
) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> {
self.with_conn(|conn| {
// Squash-aware proofs are not currently supported.
if conn.is_squashed() {
return Err(Error::UnsupportedOnSquashedMarf("get_with_proof"));
}
let marf_value = match MARF::get_by_key(conn, block_hash, key)? {
None => return Ok(None),
Some(x) => x,
Expand All @@ -223,6 +230,10 @@ pub trait MarfConnection<T: MarfTrieId> {
hash: &TrieHash,
) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> {
self.with_conn(|conn| {
// Squash-aware proofs are not currently supported.
if conn.is_squashed() {
return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash"));
}
let marf_value = match MARF::get_by_path(conn, block_hash, hash)? {
None => return Ok(None),
Some(x) => x,
Expand Down Expand Up @@ -1133,6 +1144,20 @@ impl<T: MarfTrieId> MARF<T> {
) -> Result<Option<TrieLeaf>, Error> {
trace!("MARF::get_path({block_hash:?}) {path:?}");

// In a squashed MARF, blocks below the squash height share the same blob, so reject
// historical reads from them.
if let Some(squash_height) = storage.squash_info().map(|info| info.height) {
if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)?
{
if h < squash_height {
return Err(Error::HistoricalReadInSquashedRange {
block_height: h,
squash_height,
});
}
}
}

// a NotFoundError _here_ means that a block didn't exist
storage.open_block(block_hash).inspect_err(|_e| {
test_debug!("Failed to open block {block_hash:?}: {_e:?}");
Expand Down Expand Up @@ -1341,6 +1366,16 @@ impl<T: MarfTrieId> MARF<T> {
}
}

// In a squashed MARF, OWN_BLOCK_HEIGHT_KEY returns the squash
// height H for every block in the squashed range. Use the
// side-table when available.
if storage.is_squashed() {
if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)?
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume that in the vast majority of cases, this function is going to becalled with the chain tip or very close to it.

If that's true, wouldn't it be better if we tried the MARF first, and only fall back to SQL later?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is a valid point. In normal operations that will be the case. we'll mostly read from the squash during clarity lookups, block replay, and the RPC endpoints in general. Anyway I did the change , it was a bit more involved than I expected, because the sql lookup first removed a couple of edge cases 70a1aa1

{
return Ok(Some(h));
}
}

let marf_value = if block_hash == current_block_hash {
MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)?
} else {
Expand Down Expand Up @@ -1390,6 +1425,19 @@ impl<T: MarfTrieId> MARF<T> {
return Ok(Some(current_block_hash.clone()));
}

// Squashed MARFs keep historical height -> block mappings in
// `marf_squashed_blocks`, not in per-height trie state. When the
// caller is inside the squashed range, answer from the side table
// and preserve the usual "no future blocks" behavior.
if let Some(squash_height) = storage.squash_info().map(|info| info.height) {
if current_block_height <= squash_height {
if height > current_block_height {
return Ok(None);
}
return trie_sql::read_squash_block_hash::<T>(storage.sqlite_conn(), height);
}
}

let height_key = format!("{}::{}", BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, height);

MARF::get_by_key(storage, current_block_hash, &height_key)
Expand Down Expand Up @@ -1486,6 +1534,10 @@ impl<T: MarfTrieId> MARF<T> {
key: &str,
) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> {
let mut conn = self.storage.connection();
// Squash-aware proofs are not currently supported.
if conn.is_squashed() {
return Err(Error::UnsupportedOnSquashedMarf("get_with_proof"));
}
let marf_value = match MARF::get_by_key(&mut conn, block_hash, key)? {
None => return Ok(None),
Some(x) => x,
Expand All @@ -1500,6 +1552,10 @@ impl<T: MarfTrieId> MARF<T> {
path: &TrieHash,
) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> {
let mut conn = self.storage.connection();
// Squash-aware proofs are not currently supported.
if conn.is_squashed() {
return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash"));
}
let marf_value = match MARF::get_by_path(&mut conn, block_hash, path)? {
None => return Ok(None),
Some(x) => x,
Expand Down Expand Up @@ -1765,6 +1821,18 @@ impl<T: MarfTrieId> MARF<T> {
where
F: FnMut(TrieHash, MARFValue) -> Result<(), Error>,
{
if let Some(squash_height) = storage.squash_info().map(|info| info.height) {
if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)?
{
if h < squash_height {
return Err(Error::HistoricalReadInSquashedRange {
block_height: h,
squash_height,
});
}
}
}

let (original_block_hash, original_block_id) = storage.get_cur_block_and_id();
let result = Self::for_each_leaf_inner(storage, block_hash, &mut handle_leaf);

Expand Down
27 changes: 27 additions & 0 deletions stackslib/src/chainstate/stacks/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub mod marf;
pub mod node;
pub mod profile;
pub mod proofs;
pub mod squash;
pub mod storage;
pub mod trie;
pub mod trie_sql;
Expand Down Expand Up @@ -265,6 +266,18 @@ pub enum Error {
OverflowError,
Patch(Option<TrieHash>, TrieNodePatch),
NodeTooDeep,
/// Read at a block strictly below the squash height of a squashed MARF.
/// The squashed MARF only retains the canonical state at H, so per-block
/// historical reads in `0..H` cannot be served.
HistoricalReadInSquashedRange {
block_height: u32,
squash_height: u32,
},
/// Operation is not supported on a squashed MARF (e.g. proof generation).
UnsupportedOnSquashedMarf(&'static str),
/// A destination path required to be empty already exists. Carries the
/// offending path.
DestinationExists(String),
}

impl From<io::Error> for Error {
Expand Down Expand Up @@ -335,6 +348,20 @@ impl fmt::Display for Error {
write!(f, "Read patch node instead of expected node: {p:?}")
}
Error::NodeTooDeep => write!(f, "Node is too deeply buried under patches"),
Error::HistoricalReadInSquashedRange {
block_height,
squash_height,
} => write!(
f,
"Historical read at height {block_height} below squash height {squash_height} \
is not supported on a squashed MARF"
),
Error::UnsupportedOnSquashedMarf(op) => {
write!(f, "Operation `{op}` is not supported on a squashed MARF")
}
Error::DestinationExists(ref p) => {
write!(f, "Destination path already exists: {p}")
}
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions stackslib/src/chainstate/stacks/index/proofs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,13 @@ impl<T: MarfTrieId> TrieMerkleProof<T> {
expected_value: &MARFValue,
root_block_header: &T,
) -> Result<TrieMerkleProof<T>, Error> {
// Squash-aware proofs are not currently supported.
if storage.is_squashed() {
return Err(Error::UnsupportedOnSquashedMarf(
"TrieMerkleProof::from_path",
));
}

// accumulate proofs in reverse order -- each proof will be from an earlier and earlier
// trie, so we'll reverse them in the end so the proof starts with the latest trie.
let mut segment_proofs = vec![];
Expand Down
Loading
Loading