diff --git a/changelog.d/marf-squash-engine.added b/changelog.d/marf-squash-engine.added new file mode 100644 index 0000000000..ed709c40e4 --- /dev/null +++ b/changelog.d/marf-squash-engine.added @@ -0,0 +1 @@ +Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights diff --git a/stackslib/src/chainstate/stacks/index/bits.rs b/stackslib/src/chainstate/stacks/index/bits.rs index d8579987ce..dcc4526218 100644 --- a/stackslib/src/chainstate/stacks/index/bits.rs +++ b/stackslib/src/chainstate/stacks/index/bits.rs @@ -20,9 +20,9 @@ use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; use sha2::{Digest, Sha512_256 as TrieHasher}; use crate::chainstate::stacks::index::node::{ - clear_compressed, clear_ctrl_bits, is_compressed, ptrs_fmt, ConsensusSerializable, TrieNode, - TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, TrieNodeType, - TriePtr, + clear_compressed, clear_ctrl_bits, is_backptr, is_compressed, ptrs_fmt, ConsensusSerializable, + TrieNode, TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, + TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieLeaf}; @@ -107,6 +107,13 @@ pub fn get_ptrs_byte_len(ptrs: &[TriePtr]) -> usize { node_id_len + ptrs.iter().map(TriePtr::encoded_size).sum::() } +/// Returns `true` when a pointer is an inline child: non-empty and not a +/// backpointer to an ancestor block. +#[inline] +pub fn is_inline_child_ptr(ptr: &TriePtr) -> bool { + !ptr.is_empty() && !is_backptr(ptr.id()) +} + /// Helper to determine a sparse TriePtr list's bitmap size, given the node ID's numeric value. /// Returns Some(size) if the node identified node type has ptrs /// Returns None if `id` is a `Leaf`, `Patch`, or `Empty` node, or is unrecognized. @@ -789,6 +796,53 @@ pub fn get_node_byte_len_compressed(node: &TrieNodeType) -> usize { hash_len + node_byte_len } +/// Compute the worst-case on-disk size for a root node that is reserved before +/// its descendants are written. +/// +/// The base size is calculated with the root's current child pointer values. +/// Each inline child pointer may later widen from u32 to u64 once its final +/// file offset is known. +pub fn reserved_root_size(base_len: usize, ptrs: &[TriePtr]) -> Result { + let base_len = base_len as u64; + let inline_count = ptrs.iter().filter(|p| is_inline_child_ptr(p)).count() as u64; + let inline_ptr_growth = inline_count.checked_mul(4).ok_or(Error::OverflowError)?; + + base_len + .checked_add(inline_ptr_growth) + .ok_or(Error::OverflowError) +} + +/// Rewrite inline child pointers from in-memory node indices to blob-local +/// byte offsets. Backpointers and empty pointers are left untouched. +pub fn resolve_inline_child_offsets( + ptrs: &mut [TriePtr], + file_offsets: &[u64], +) -> Result<(), Error> { + for ptr in ptrs.iter_mut() { + if !is_inline_child_ptr(ptr) { + continue; + } + + let child_idx = ptr.try_ptr_into_usize()?; + let Some(&offset) = file_offsets.get(child_idx) else { + return Err(Error::CorruptionError(format!( + "inline child index {child_idx} out of bounds" + ))); + }; + // 0 is the sentinel for "not yet placed": valid offsets are always + // past the blob header. + if offset == 0 { + return Err(Error::CorruptionError(format!( + "inline child index {child_idx} has not been written" + ))); + } + + ptr.ptr = offset; + } + + Ok(()) +} + /// Write all the bytes for a node, including its hash, to the given Writeable object. /// The list of child pointers will NOT be compressed. /// Returns Ok(nw) on success, where `nw` is the number of bytes written. @@ -796,7 +850,7 @@ pub fn get_node_byte_len_compressed(node: &TrieNodeType) -> usize { pub fn write_nodetype_bytes( f: &mut F, node: &TrieNodeType, - hash: TrieHash, + hash: &TrieHash, ) -> Result { let start = f.stream_position().map_err(Error::IOError)?; f.write_all(hash.as_bytes())?; diff --git a/stackslib/src/chainstate/stacks/index/marf.rs b/stackslib/src/chainstate/stacks/index/marf.rs index 4d9b71325f..f5f63b2c29 100644 --- a/stackslib/src/chainstate/stacks/index/marf.rs +++ b/stackslib/src/chainstate/stacks/index/marf.rs @@ -24,6 +24,7 @@ use rusqlite::{Connection, Transaction}; use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; use stacks_common::util::hash::Sha512Trunc256Sum; +pub use super::squash::SquashStats; use super::storage::ReopenedTrieStorageConnection; use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash}; use crate::chainstate::stacks::index::node::{ @@ -35,7 +36,9 @@ use crate::chainstate::stacks::index::storage::{ TrieStorageTransaction, }; use crate::chainstate::stacks::index::trie::Trie; -use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof}; +use crate::chainstate::stacks::index::{ + trie_sql, Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof, +}; use crate::util_lib::db::Error as db_error; pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT"; @@ -208,6 +211,10 @@ pub trait MarfConnection { key: &str, ) -> Result)>, Error> { self.with_conn(|conn| { + // Squash-aware proofs are not currently supported. + if conn.is_squashed() { + return Err(Error::UnsupportedOnSquashedMarf("get_with_proof")); + } let marf_value = match MARF::get_by_key(conn, block_hash, key)? { None => return Ok(None), Some(x) => x, @@ -223,6 +230,10 @@ pub trait MarfConnection { hash: &TrieHash, ) -> Result)>, Error> { self.with_conn(|conn| { + // Squash-aware proofs are not currently supported. + if conn.is_squashed() { + return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash")); + } let marf_value = match MARF::get_by_path(conn, block_hash, hash)? { None => return Ok(None), Some(x) => x, @@ -1133,6 +1144,8 @@ impl MARF { ) -> Result, Error> { trace!("MARF::get_path({block_hash:?}) {path:?}"); + storage.check_historical_read_allowed(block_hash)?; + // a NotFoundError _here_ means that a block didn't exist storage.open_block(block_hash).inspect_err(|_e| { test_debug!("Failed to open block {block_hash:?}: {_e:?}"); @@ -1326,12 +1339,43 @@ impl MARF { result.map(|option_result| option_result.map(|leaf| leaf.data)) } + /// Read `OWN_BLOCK_HEIGHT_KEY` for the block the caller is standing on. + /// + /// In a squashed MARF the shared squashed blob's `OWN_BLOCK_HEIGHT_KEY` + /// is pinned at the squash height for every block in the squashed range, + /// so the per-block height must come from `marf_squashed_blocks` instead. + /// A trie answer of `h <= squash_height` with no side-table entry means + /// the squash metadata is corrupted. + fn get_own_block_height( + storage: &mut TrieStorageConnection, + current_block_hash: &T, + ) -> Result, Error> { + let Some(squash_height) = storage.squash_height() else { + return MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY) + .map(|value| value.map(u32::from)); + }; + + if let Some(h) = storage.squashed_block_height(current_block_hash)? { + return Ok(Some(h)); + } + + let marf_height = + MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)?.map(u32::from); + if marf_height.is_some_and(|h| h <= squash_height) { + return Err(Error::CorruptionError(format!( + "squashed MARF inconsistency: trie reports block \ + {current_block_hash} at height <= squash height \ + {squash_height} but marf_squashed_blocks has no entry" + ))); + } + Ok(marf_height) + } + pub fn get_block_height_miner_tip( storage: &mut TrieStorageConnection, block_hash: &T, current_block_hash: &T, ) -> Result, Error> { - let hash_key = format!("{}::{}", BLOCK_HASH_TO_HEIGHT_MAPPING_KEY, block_hash); #[cfg(test)] { // used in testing in order to short-circuit block-height lookups @@ -1341,13 +1385,21 @@ impl MARF { } } - let marf_value = if block_hash == current_block_hash { - MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)? - } else { - MARF::get_by_key(storage, current_block_hash, &hash_key)? - }; + if block_hash == current_block_hash { + return MARF::get_own_block_height(storage, current_block_hash); + } - Ok(marf_value.map(u32::from)) + // Cross-block metadata remains MARF-backed. If the caller is standing + // on a squashed block, the trie read is rejected and the target block + // height must come from the squashed-block side table instead. + let hash_key = format!("{BLOCK_HASH_TO_HEIGHT_MAPPING_KEY}::{block_hash}"); + match MARF::get_by_key(storage, current_block_hash, &hash_key) { + Ok(value) => Ok(value.map(u32::from)), + Err(Error::HistoricalReadInSquashedRange { .. }) => { + storage.squashed_block_height(block_hash) + } + Err(e) => Err(e), + } } pub fn get_block_height( @@ -1390,6 +1442,22 @@ impl MARF { return Ok(Some(current_block_hash.clone())); } + // Squashed MARFs keep historical height -> block mappings in + // `marf_squashed_blocks`, not in per-height trie state. When the + // caller is inside the squashed range, answer from the side table + // and preserve the usual "no future blocks" behavior. + if let Some(squash_height) = storage.squash_height() { + if current_block_height <= squash_height { + if height > current_block_height { + return Ok(None); + } + return trie_sql::read_squashed_block_hash_by_height::( + storage.sqlite_conn(), + height, + ); + } + } + let height_key = format!("{}::{}", BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, height); MARF::get_by_key(storage, current_block_hash, &height_key) @@ -1486,6 +1554,10 @@ impl MARF { key: &str, ) -> Result)>, Error> { let mut conn = self.storage.connection(); + // Squash-aware proofs are not currently supported. + if conn.is_squashed() { + return Err(Error::UnsupportedOnSquashedMarf("get_with_proof")); + } let marf_value = match MARF::get_by_key(&mut conn, block_hash, key)? { None => return Ok(None), Some(x) => x, @@ -1500,6 +1572,10 @@ impl MARF { path: &TrieHash, ) -> Result)>, Error> { let mut conn = self.storage.connection(); + // Squash-aware proofs are not currently supported. + if conn.is_squashed() { + return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash")); + } let marf_value = match MARF::get_by_path(&mut conn, block_hash, path)? { None => return Ok(None), Some(x) => x, @@ -1765,6 +1841,8 @@ impl MARF { where F: FnMut(TrieHash, MARFValue) -> Result<(), Error>, { + storage.check_historical_read_allowed(block_hash)?; + let (original_block_hash, original_block_id) = storage.get_cur_block_and_id(); let result = Self::for_each_leaf_inner(storage, block_hash, &mut handle_leaf); diff --git a/stackslib/src/chainstate/stacks/index/mod.rs b/stackslib/src/chainstate/stacks/index/mod.rs index 5c2d023c75..53d0717129 100644 --- a/stackslib/src/chainstate/stacks/index/mod.rs +++ b/stackslib/src/chainstate/stacks/index/mod.rs @@ -33,6 +33,7 @@ pub mod marf; pub mod node; pub mod profile; pub mod proofs; +pub mod squash; pub mod storage; pub mod trie; pub mod trie_sql; @@ -265,6 +266,18 @@ pub enum Error { OverflowError, Patch(Option, TrieNodePatch), NodeTooDeep, + /// Read at a block strictly below the squash height of a squashed MARF. + /// The squashed MARF only retains the canonical state at H, so per-block + /// historical reads in `0..H` cannot be served. + HistoricalReadInSquashedRange { + block_height: u32, + squash_height: u32, + }, + /// Operation is not supported on a squashed MARF (e.g. proof generation). + UnsupportedOnSquashedMarf(&'static str), + /// A destination path required to be empty already exists. Carries the + /// offending path. + DestinationExists(String), } impl From for Error { @@ -335,6 +348,20 @@ impl fmt::Display for Error { write!(f, "Read patch node instead of expected node: {p:?}") } Error::NodeTooDeep => write!(f, "Node is too deeply buried under patches"), + Error::HistoricalReadInSquashedRange { + block_height, + squash_height, + } => write!( + f, + "Historical read at height {block_height} below squash height {squash_height} \ + is not supported on a squashed MARF" + ), + Error::UnsupportedOnSquashedMarf(op) => { + write!(f, "Operation `{op}` is not supported on a squashed MARF") + } + Error::DestinationExists(ref p) => { + write!(f, "Destination path already exists: {p}") + } } } } diff --git a/stackslib/src/chainstate/stacks/index/proofs.rs b/stackslib/src/chainstate/stacks/index/proofs.rs index 38299edb86..660ca88c1f 100644 --- a/stackslib/src/chainstate/stacks/index/proofs.rs +++ b/stackslib/src/chainstate/stacks/index/proofs.rs @@ -1401,6 +1401,13 @@ impl TrieMerkleProof { expected_value: &MARFValue, root_block_header: &T, ) -> Result, Error> { + // Squash-aware proofs are not currently supported. + if storage.is_squashed() { + return Err(Error::UnsupportedOnSquashedMarf( + "TrieMerkleProof::from_path", + )); + } + // accumulate proofs in reverse order -- each proof will be from an earlier and earlier // trie, so we'll reverse them in the end so the proof starts with the latest trie. let mut segment_proofs = vec![]; diff --git a/stackslib/src/chainstate/stacks/index/squash.rs b/stackslib/src/chainstate/stacks/index/squash.rs new file mode 100644 index 0000000000..ae5ec0bced --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/squash.rs @@ -0,0 +1,820 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//! MARF squashing: offline snapshot creation and validation. +//! +//! A squashed MARF contains only the canonical state at a given +//! height H plus the metadata needed for ancestor hash lookups and +//! block-height resolution. + +use std::collections::HashMap; +use std::io::{BufWriter, Seek, SeekFrom, Write}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, Instant}; + +use rusqlite::params; +use stacks_common::types::chainstate::TrieHash; + +use crate::chainstate::stacks::index::marf::{ + MARFOpenOpts, MarfConnection as _, BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, MARF, +}; +use crate::chainstate::stacks::index::node::{clear_backptr, is_backptr, TrieNodeID, TriePtr}; +use crate::chainstate::stacks::index::storage::{ + SquashInfo, TrieFileStorage, TrieStorageConnection, +}; +use crate::chainstate::stacks::index::trie::Trie; +use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId}; + +mod node_store; +mod stream; + +pub(crate) use node_store::NodeStore; +#[cfg(test)] +pub(crate) use node_store::{deserialize_node, serialize_node}; +pub(crate) use stream::stream_squash_blob; +use stream::{recompute_content_hashes, BlobReader}; + +/// Classify a child pointer: resolve the `(block_id, byte_offset)` pair that +/// locates the child in blob storage. Backpointers carry the target block_id +/// directly; inline pointers belong to `origin_block_id`. +/// Returns `None` for empty pointers. +#[inline] +fn resolve_child_ptr(ptr: &TriePtr, origin_block_id: u32) -> Option<(u32, u64)> { + if ptr.id() == TrieNodeID::Empty as u8 { + return None; + } + if is_backptr(ptr.id()) { + Some((ptr.back_block(), ptr.from_backptr().ptr())) + } else { + Some((origin_block_id, ptr.ptr())) + } +} + +/// Format a `Duration` as `X.YZ secs` or `X min Y.ZW secs`. +fn fmt_duration(d: Duration) -> String { + let total_centis = d.as_millis() / 10; + let mins = total_centis / 6000; + let secs = (total_centis % 6000) as f64 / 100.0; + if mins == 0 { + format!("{secs:.2} secs") + } else { + format!("{mins} min {secs:.2} secs") + } +} + +/// Remap child pointers in a `NodeStore` for the squashed trie layout. +/// +/// For each non-leaf node, reads it from the temp file, remaps its child +/// pointers from source (block_id, offset) to sequential indices, and +/// writes the modified node back. +/// +/// Each child's `back_block` is set to the squashed equivalent of its origin +/// block via `block_id_map`. The annotation is needed for the squash blob so +/// that COW and hash computation preserve block identity when the squashed +/// MARF is later extended. +fn remap_child_ptrs( + store: &mut NodeStore, + source_to_idx: &HashMap<(u32, u64), usize>, + block_id_map: &HashMap, + label: &str, +) -> Result<(), Error> { + let remap_start = Instant::now(); + let node_count = store.len(); + + for idx in 0..node_count { + if idx > 0 && idx % 1_000_000 == 0 { + info!( + "[{label}] Remap trie pointers: {idx}/{node_count} nodes in {}", + fmt_duration(remap_start.elapsed()) + ); + } + + let mut node = store.read_node(idx)?; + let origin_block_id = store.block_id(idx); + + if node.is_leaf() { + continue; + } + + let ptrs = node.ptrs_mut(); + let mut modified = false; + for ptr in ptrs.iter_mut() { + let Some((child_block_id, read_ptr_val)) = resolve_child_ptr(ptr, origin_block_id) + else { + continue; + }; + + let source_key = (child_block_id, read_ptr_val); + let child_idx = *source_to_idx.get(&source_key).ok_or_else(|| { + Error::CorruptionError(format!( + "remap_child_ptrs: child {source_key:?} not in source_to_idx" + )) + })?; + + ptr.ptr = child_idx as u64; + ptr.id = clear_backptr(ptr.id); + + ptr.back_block = *block_id_map.get(&child_block_id).ok_or_else(|| { + Error::CorruptionError(format!( + "remap_child_ptrs: block_id {child_block_id} not in block_id_map" + )) + })?; + modified = true; + } + + if modified { + store.overwrite_node(idx, &node)?; + } + } + store.flush()?; + + info!( + "[{label}] Remap trie pointers complete: {node_count} nodes in {}", + fmt_duration(remap_start.elapsed()) + ); + Ok(()) +} + +/// Per-height block metadata: `(height, block_hash, root_hash)`. +type BlockInfo = (u32, T, TrieHash); + +/// Wall-clock duration of each squash step. +#[derive(Debug, Clone, Default)] +pub struct SquashStepDurations { + /// [1/8] Build the block_id -> blob offset map from `marf_data`. + pub load_block_map: Duration, + /// [2/8] Walk per-height keys and resolve each block's root hash. + pub build_height_index: Duration, + /// [3/8] DFS over the reachable trie nodes at the squash tip. + pub collect_trie_nodes: Duration, + /// [4/8] Bulk-insert placeholder rows for blocks 0..H-1. + pub register_placeholders: Duration, + /// [5/8] Disk-backed remap of inline pointers and backpointers. + pub remap_pointers: Duration, + /// [6/8] Recompute leaf and internal node hashes. + pub recompute_hashes: Duration, + /// [7/8] Stream the squashed trie blob to the destination. + pub write_trie_blob: Duration, + /// [8/8] Persist squash metadata, broadcast blob offsets, commit. + pub persist_metadata: Duration, +} + +/// Summary statistics from a squashing run. +#[derive(Debug, Clone)] +pub struct SquashStats { + /// Total number of nodes collected into the squashed MARF. + pub node_count: u64, + /// Squash height (blocks 0..=height are squashed). + pub squash_height: u32, + /// Path to the destination MARF SQLite database. + pub dst_db_path: PathBuf, + /// Path to the destination `.blobs` file containing the shared trie. + pub dst_blobs_path: PathBuf, + /// Size in bytes of the squashed trie blob written to `.blobs`. + pub blob_size: u64, + /// Number of placeholder rows inserted for historical blocks 0..H-1. + pub historical_placeholder_count: u64, + /// Root hash of the archival MARF at `squash_height`. + pub source_root_hash: TrieHash, + /// Hash of the squashed trie root node. + pub squash_root_node_hash: TrieHash, + /// Per-step wall-clock durations. + pub step_durations: SquashStepDurations, + /// End-to-end wall-clock duration of `squash_to_path`. + pub total_duration: Duration, +} + +/// Step 1: Build an in-memory block_map from all `marf_data` entries. +fn collect_block_map(src: &MARF) -> Result, Error> { + let all_blocks = trie_sql::bulk_read_block_entries::(src.sqlite_conn())?; + Ok(all_blocks + .into_iter() + .map(|(id, bh, offset)| (bh, (id, offset))) + .collect()) +} + +/// Step 2: For each height 0..=H, resolve (block_hash, root_hash) via trie +/// walk + direct blob seek. +fn collect_per_height_metadata( + src: &mut MARF, + tip: &T, + block_map: &HashMap, + blob_reader: &mut BlobReader, + height: u32, + label: &str, +) -> Result>, Error> { + let mut block_info: Vec> = Vec::with_capacity((height + 1) as usize); + let mut last_log = Instant::now(); + let start = Instant::now(); + + for h in 0..=height { + let h_key = format!("{BLOCK_HEIGHT_TO_HASH_MAPPING_KEY}::{h}"); + let val = src + .with_conn(|conn| MARF::::get_by_key(conn, tip, &h_key))? + .ok_or_else(|| { + Error::CorruptionError(format!("Missing height mapping for height {h}")) + })?; + let bh = T::from(val); + + let &(block_id, blob_offset) = block_map.get(&bh).ok_or_else(|| { + Error::CorruptionError(format!( + "Missing block map entry for block hash at height {h}" + )) + })?; + + let rh = blob_reader.read_root_hash(block_id, blob_offset)?; + + block_info.push((h, bh, rh)); + + if last_log.elapsed().as_secs() >= 30 || (h > 0 && h % 100_000 == 0) { + info!( + "[{label}] [2/8] Build height index: {}/{} heights in {}", + h + 1, + height + 1, + fmt_duration(start.elapsed()) + ); + last_log = Instant::now(); + } + } + info!( + "[{label}] [2/8] Build height index: {} heights in {}", + height + 1, + fmt_duration(start.elapsed()) + ); + + Ok(block_info) +} + +/// Step 4: Bulk-insert `marf_data` placeholder rows for blocks 0..H-1. +/// +/// Returns a mapping from archival block_id to squashed block_id. +fn insert_placeholder_blocks( + conn: &rusqlite::Connection, + block_info: &[BlockInfo], + block_at_height: &T, + block_map: &HashMap, + label: &str, +) -> Result, Error> { + let start = Instant::now(); + let mut archival_to_squashed: HashMap = HashMap::new(); + let mut stmt = conn.prepare(PLACEHOLDER_INSERT_SQL)?; + for (h, bh, _) in block_info { + if bh == block_at_height { + continue; + } + let (archival_id, _) = block_map.get(bh).ok_or(Error::NotFoundError)?; + let empty_blob: &[u8] = &[]; + let squashed_id: u32 = stmt + .insert(params![bh.to_string(), empty_blob, 0i64, 0i64])? + .try_into() + .expect("block_id overflow"); + archival_to_squashed.insert(*archival_id, squashed_id); + if *h % 100_000 == 0 && *h > 0 { + info!( + "[{label}] [4/8] Register placeholder blocks: {h} of {} in {}", + block_info.len(), + fmt_duration(start.elapsed()) + ); + } + } + info!( + "[{label}] [4/8] Register placeholder blocks: {} entries in {}", + archival_to_squashed.len(), + fmt_duration(start.elapsed()) + ); + Ok(archival_to_squashed) +} + +/// SQL used to insert an empty placeholder row into `marf_data`. +const PLACEHOLDER_INSERT_SQL: &str = + "INSERT INTO marf_data (block_hash, data, unconfirmed, external_offset, external_length) \ + VALUES (?1, ?2, 0, ?3, ?4)"; + +/// Step 6: Write all squash SQL metadata in one transaction scope. +fn persist_squash_metadata( + conn: &rusqlite::Connection, + block_info: &[BlockInfo], + source_root_hash: &TrieHash, + height: u32, +) -> Result<(), Error> { + let start = Instant::now(); + trie_sql::write_squash_info(conn, source_root_hash, height)?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO marf_squashed_blocks (height, block_hash, marf_root_hash) VALUES (?1, ?2, ?3)", + )?; + for (h, bh, rh) in block_info { + stmt.execute(params![ + i64::from(*h), + bh.as_bytes(), + rh.as_bytes().to_vec() + ])?; + } + info!( + "Squash: wrote {} root hashes and block heights in {}", + block_info.len(), + fmt_duration(start.elapsed()) + ); + Ok(()) +} + +/// Persist `squash_root_node_hash` and broadcast the tip blob offset to all +/// placeholder rows. +fn finalize_shared_blob_offsets( + conn: &rusqlite::Connection, + block_at_height: &T, + squash_root_node_hash: &TrieHash, +) -> Result { + trie_sql::update_squash_root_node_hash(conn, squash_root_node_hash)?; + + let start = Instant::now(); + let bh_id = trie_sql::get_block_identifier(conn, block_at_height)?; + let (offset, length) = trie_sql::get_external_trie_offset_length(conn, bh_id)?; + let updated = trie_sql::bulk_update_blob_offsets(conn, offset, length, block_at_height)?; + info!( + "Squash: updated {} placeholder blob offsets in {}", + updated, + fmt_duration(start.elapsed()) + ); + Ok(updated) +} + +impl MARF { + /// Squash the MARF at `height` into a new database at `dst_path`. + /// + /// Produces a hash-preserving squash: the squashed MARF contains a single + /// shared trie storage with all trie nodes reachable at `height`. Each historical + /// block (0..=height) has a `marf_data` row pointing at this shared trie storage so + /// that `get_block_hash_caching(local_id)` returns the correct original + /// `StacksBlockId`. + /// + /// Backpointer identity is preserved via `TriePtr.back_block` annotations. + /// Children that were backpointers in the archival MARF are stored inline in + /// the blob but with `back_block` set to the squashed DB's local_id for the + /// original block. When the squashed MARF is extended to height H+1, + /// `node_copy_update_ptrs` preserves these annotations, ensuring + /// that `inner_write_children_hashes` uses the same `StacksBlockId` values + /// as the archival MARF. This guarantees identical per-block root hashes. + pub fn squash_to_path( + src_path: &str, + dst_path: &str, + open_opts: MARFOpenOpts, + tip: &T, + height: u32, + label: &str, + ) -> Result { + if open_opts.compress { + return Err(Error::CorruptionError( + "squash_to_path does not support compress=true; \ + the direct blob write path only emits uncompressed nodes" + .to_string(), + )); + } + + let dst_db_path = PathBuf::from(dst_path); + let dst_blobs_path = PathBuf::from(format!("{dst_path}.blobs")); + if dst_db_path.exists() { + return Err(Error::DestinationExists(dst_path.to_string())); + } + if dst_blobs_path.exists() { + return Err(Error::DestinationExists( + dst_blobs_path.to_string_lossy().into_owned(), + )); + } + + // Run the actual squash work. On any failure after this point we may + // have created `dst_db_path` and/or `dst_blobs_path`, so remove them + // before propagating the error. + let result = Self::squash_to_path_inner( + src_path, + &dst_db_path, + &dst_blobs_path, + open_opts, + tip, + height, + label, + ); + + if let Err(e) = &result { + error!("[{label}] squash failed: {e}; cleaning up partial output at {dst_path}"); + let _ = std::fs::remove_file(&dst_db_path); + let _ = std::fs::remove_file(&dst_blobs_path); + } + result + } + + fn squash_to_path_inner( + src_path: &str, + dst_db_path: &Path, + dst_blobs_path: &Path, + open_opts: MARFOpenOpts, + tip: &T, + height: u32, + label: &str, + ) -> Result { + let dst_path = dst_db_path.to_str().ok_or_else(|| { + Error::CorruptionError(format!( + "squash dst path is not valid UTF-8: {}", + dst_db_path.display() + )) + })?; + + let overall_start = Instant::now(); + let mut step_durations = SquashStepDurations::default(); + + // Step 1: bulk SQL block map + let src_storage = TrieFileStorage::open_readonly(src_path, open_opts.clone())?; + let mut src = MARF::from_storage(src_storage); + + let block_at_height = src + .get_block_at_height(height, tip)? + .ok_or(Error::NotFoundError)?; + + let start = Instant::now(); + let block_map = collect_block_map(&src)?; + step_durations.load_block_map = start.elapsed(); + info!( + "[{label}] [1/8] Load block map: {} entries in {}", + block_map.len(), + fmt_duration(step_durations.load_block_map) + ); + + // [2/8] Build height index + info!( + "[{label}] [2/8] Build height index: reading {} heights...", + height + 1 + ); + let start = Instant::now(); + let mut blob_reader = BlobReader::new(src_path, open_opts.external_blobs)?; + let block_info = collect_per_height_metadata( + &mut src, + tip, + &block_map, + &mut blob_reader, + height, + label, + )?; + step_durations.build_height_index = start.elapsed(); + + // [3/8] Collect trie nodes (DFS walk) + // + // Derive the temp directory from dst_path: use the parent directory. + let tmp_dir = dst_db_path + .parent() + .filter(|p| !p.as_os_str().is_empty()) + .and_then(|p| p.to_str()) + .unwrap_or("."); + info!("[{label}] [3/8] Collect trie nodes: starting DFS..."); + let start = Instant::now(); + let (mut node_store, source_to_idx) = src.with_conn(|conn| { + MARF::::collect_reachable_nodes(conn, &block_at_height, tmp_dir) + })?; + let node_count = node_store.len() as u64; + step_durations.collect_trie_nodes = start.elapsed(); + info!( + "[{label}] [3/8] Collect trie nodes: {node_count} nodes in {}", + fmt_duration(step_durations.collect_trie_nodes) + ); + + let mut dst_open_opts = open_opts; + dst_open_opts.external_blobs = true; + + // Open destination MARF and begin transaction + let mut dst = MARF::from_path(dst_path, dst_open_opts)?; + let mut tx = dst.begin_tx()?; + tx.begin(&T::sentinel(), &block_at_height)?; + + // [4/8] Register placeholder blocks + let start = Instant::now(); + let mut archival_to_squashed = insert_placeholder_blocks( + tx.sqlite_tx(), + &block_info, + &block_at_height, + &block_map, + label, + )?; + let historical_placeholder_count = archival_to_squashed.len() as u64; + + // Build `block_id_map`: every archival `block_id` that appears + // as a node origin in the DFS must be mappable. insert_placeholder_blocks + // covers heights 0..H-1 but skips block_at_height and sentinel. + // Add them explicitly so `remap_child_ptrs` can resolve all children. + // + // Sentinel: flushed to marf_data by tx.begin() -> flush(). + let sentinel = T::sentinel(); + if let Some((archival_sentinel_id, _)) = block_map.get(&sentinel) { + let squashed_sentinel_id: u32 = tx.sqlite_tx().query_row( + "SELECT block_id FROM marf_data WHERE block_hash = ?1", + rusqlite::params![sentinel.to_string()], + |row| row.get(0), + )?; + archival_to_squashed.insert(*archival_sentinel_id, squashed_sentinel_id); + } + + // block_at_height: not yet in the destination `marf_data` (only in + // `block_extension_locks`). Insert an empty placeholder now to get a + // real `block_id`. Step [7/8] will UPDATE this row instead of + // inserting a new one via `update_external_trie_blob`. + let squashed_tip_placeholder_id = { + let (archival_tip_id, _) = block_map + .get(&block_at_height) + .ok_or(Error::NotFoundError)?; + let empty_blob: &[u8] = &[]; + let placeholder_id = tx + .sqlite_tx() + .prepare(PLACEHOLDER_INSERT_SQL)? + .insert(params![block_at_height.to_string(), empty_blob, 0i64, 0i64])? + .try_into() + .expect("block_id overflow"); + archival_to_squashed.insert(*archival_tip_id, placeholder_id); + placeholder_id + }; + drop(block_map); + step_durations.register_placeholders = start.elapsed(); + + // [5/8] Remap trie pointers (disk-backed) + info!("[{label}] [5/8] Remap trie pointers: {node_count} nodes..."); + let start = Instant::now(); + remap_child_ptrs( + &mut node_store, + &source_to_idx, + &archival_to_squashed, + label, + )?; + step_durations.remap_pointers = start.elapsed(); + info!( + "[{label}] [5/8] Remap trie pointers: {node_count} nodes in {}", + fmt_duration(step_durations.remap_pointers) + ); + drop(source_to_idx); + drop(archival_to_squashed); + node_store.drop_block_ids(); // free ~200 MB + + // [6/8] Recompute node hashes (disk-backed) + info!("[{label}] [6/8] Recompute node hashes: {node_count} nodes..."); + let start = Instant::now(); + recompute_content_hashes(&mut node_store)?; + step_durations.recompute_hashes = start.elapsed(); + info!( + "[{label}] [6/8] Recompute node hashes: {node_count} nodes in {}", + fmt_duration(step_durations.recompute_hashes) + ); + + let squash_root_node_hash = if node_store.len() > 0 { + *node_store.hash(0) + } else { + return Err(Error::CorruptionError( + "No nodes in squash trie".to_string(), + )); + }; + + // [7/8] Write trie blob (compute offsets + stream to destination) + info!("[{label}] [7/8] Write trie blob: {node_count} nodes..."); + let start = Instant::now(); + let parent_hash = T::sentinel(); + + // Destination squash MARFs always use external blobs. + let (block_id, total_blob_size) = tx.storage.with_trie_blobs(|db, blobs| { + let Some(trie_file) = blobs else { + return Err(Error::CorruptionError( + "squash destination requires external .blobs file but handle is unavailable" + .to_string(), + )); + }; + let offset = trie_sql::get_external_blobs_length(db)?; + trie_file + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + // buffer size is 1 MiB, completely arbitrary. + let mut buf_writer = BufWriter::with_capacity(1 << 20, trie_file); + let total_blob_size = + stream_squash_blob(&mut node_store, &parent_hash, &mut buf_writer)?; + buf_writer.flush().map_err(Error::IOError)?; + let trie_file = buf_writer.into_inner().map_err(|e| { + Error::IOError(std::io::Error::other(format!( + "failed to flush BufWriter: {e}" + ))) + })?; + trie_file.flush().map_err(Error::IOError)?; + trie_file.sync_data().map_err(Error::IOError)?; + trie_sql::update_external_trie_blob( + db, + &block_at_height, + offset, + total_blob_size, + squashed_tip_placeholder_id, + ) + .map(|block_id| (block_id, total_blob_size)) + })?; + step_durations.write_trie_blob = start.elapsed(); + info!( + "[{label}] [7/8] Write trie blob: block_id={block_id}, {total_blob_size} bytes in {}", + fmt_duration(step_durations.write_trie_blob) + ); + + drop(node_store); // free temp file + metadata + + // [8/8] Persist metadata, share blob offsets, and commit. + let step8_start = Instant::now(); + let source_root_hash = block_info + .iter() + .find(|(_, bh, _)| bh == &block_at_height) + .map(|(_, _, rh)| *rh) + .ok_or(Error::NotFoundError)?; + persist_squash_metadata(tx.sqlite_tx(), &block_info, &source_root_hash, height)?; + info!("[{label}] Squash root hash: {squash_root_node_hash}"); + + finalize_shared_blob_offsets(tx.sqlite_tx(), &block_at_height, &squash_root_node_hash)?; + + tx.set_squash_info(Some(SquashInfo { + archival_marf_root_hash: source_root_hash, + squash_root_node_hash, + height, + })); + + // Commit the SQL transaction without flushing TrieRAM (we already wrote the blob directly) + tx.commit_squash()?; + + step_durations.persist_metadata = step8_start.elapsed(); + info!( + "[{label}] [8/8] Persist metadata & commit: finished in {}", + fmt_duration(step_durations.persist_metadata) + ); + + let total_duration = overall_start.elapsed(); + info!( + "[{label}] Squash complete: {node_count} nodes, total time {}", + fmt_duration(total_duration) + ); + + Ok(SquashStats { + node_count, + squash_height: height, + dst_db_path: dst_db_path.to_path_buf(), + dst_blobs_path: dst_blobs_path.to_path_buf(), + blob_size: total_blob_size, + historical_placeholder_count, + source_root_hash, + squash_root_node_hash, + step_durations, + total_duration, + }) + } + + /// DFS collection pass: gather all trie nodes reachable from `block_hash`. + /// + /// Uses a disk-backed `NodeStore` to avoid holding ~50M full node objects + /// in memory (~20 GB). Only lightweight metadata (hashes, block_ids, + /// file offsets) is kept in RAM (~4 GB). + /// + /// Uses iterative DFS instead of BFS. The DFS stack holds at most + /// `trie_height` frames (~32), each with one node's child pointer list. + /// Total stack memory is ~128 KB, compared to the BFS frontier which + /// could hold millions of entries (~GBs) for wide, hash-distributed tries. + /// + /// Nodes are pushed in DFS preorder (parent before children), which is + /// all the remap and hash-recompute passes require. + /// + /// Returns: + /// - `node_store`: disk-backed node data + in-memory metadata. + /// - `source_to_idx`: `(source_block_id, byte_offset) -> node index` map + /// needed by the remap pass. + fn collect_reachable_nodes( + source: &mut TrieStorageConnection, + block_hash: &T, + tmp_dir: &str, + ) -> Result<(NodeStore, HashMap<(u32, u64), usize>), Error> { + source.open_block(block_hash)?; + let (root_node, root_hash) = Trie::read_root(source)?; + let root_block_id = source.get_cur_block_identifier()?; + + let mut store = NodeStore::new(tmp_dir)?; + let mut source_to_idx: HashMap<(u32, u64), usize> = HashMap::new(); + + let root_disk_ptr = TrieStorageConnection::::root_ptr_disk(); + source_to_idx.insert((root_block_id, root_disk_ptr), 0); + + let root_is_leaf = root_node.is_leaf(); + let root_ptrs: Vec = if root_is_leaf { + vec![] + } else { + root_node.ptrs().to_vec() + }; + store.push(&root_node, root_hash, root_block_id)?; + + // DFS stack frame: holds remaining child pointers for one node. + // Stack depth is bounded by trie height (~32), so total memory is + // ~32 * max_ptrs * sizeof(TriePtr) ≈ 128 KB - negligible. + struct DfsFrame { + origin_block_id: u32, + child_ptrs: Vec, + next_child: usize, + } + + let mut stack: Vec = Vec::new(); + if !root_is_leaf { + stack.push(DfsFrame { + origin_block_id: root_block_id, + child_ptrs: root_ptrs, + next_child: 0, + }); + } + + let dfs_start = Instant::now(); + let mut nodes_collected: u64 = 1; // root already counted + let mut last_log = Instant::now(); + + while !stack.is_empty() { + let stack_depth = stack.len(); + let frame = stack.last_mut().expect("stack is non-empty"); + // Scan this frame's remaining children for the next one to descend into. + let mut descend_frame: Option = None; + + while frame.next_child < frame.child_ptrs.len() { + let ptr = *frame + .child_ptrs + .get(frame.next_child) + .expect("BUG: next_child within bounds"); + frame.next_child += 1; + + if ptr.id() == TrieNodeID::Empty as u8 { + continue; + } + + let (child_block_id, read_ptr) = if is_backptr(ptr.id()) { + (ptr.back_block(), ptr.from_backptr()) + } else { + (frame.origin_block_id, ptr) + }; + + let source_key = (child_block_id, read_ptr.ptr()); + if source_to_idx.contains_key(&source_key) { + continue; + } + + let child_bh = source.get_block_from_local_id(child_block_id)?.clone(); + source.open_block_maybe_id(&child_bh, Some(child_block_id))?; + let (child_node, child_hash) = source.read_nodetype(&read_ptr)?; + + let child_is_leaf = child_node.is_leaf(); + let child_ptrs_vec: Vec = if child_is_leaf { + vec![] + } else { + child_node.ptrs().to_vec() + }; + + source_to_idx.insert(source_key, store.len()); + store.push(&child_node, child_hash, child_block_id)?; + + nodes_collected += 1; + if last_log.elapsed().as_secs() >= 30 || nodes_collected % 1_000_000 == 0 { + info!( + "Trie DFS: {nodes_collected} nodes, stack depth {stack_depth}, {} elapsed", + fmt_duration(dfs_start.elapsed()) + ); + last_log = Instant::now(); + } + + // If internal node, descend into it (push frame and break). + // If leaf, continue scanning siblings. + if !child_is_leaf { + descend_frame = Some(DfsFrame { + origin_block_id: child_block_id, + child_ptrs: child_ptrs_vec, + next_child: 0, + }); + break; + } + } + + match descend_frame { + Some(new_frame) => stack.push(new_frame), + None => { + // All children of this frame processed, backtrack. + stack.pop(); + } + } + } + + store.flush()?; + + info!( + "Trie DFS: {} nodes in {}", + store.len(), + fmt_duration(dfs_start.elapsed()) + ); + + Ok((store, source_to_idx)) + } +} diff --git a/stackslib/src/chainstate/stacks/index/squash/node_store.rs b/stackslib/src/chainstate/stacks/index/squash/node_store.rs new file mode 100644 index 0000000000..7b9c3b2d19 --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/squash/node_store.rs @@ -0,0 +1,386 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::fs::File; +use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}; + +use stacks_common::types::chainstate::TrieHash; + +use crate::chainstate::stacks::index::node::{ + TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeType, TriePtr, +}; +use crate::chainstate::stacks::index::{Error, MARFValue, TrieLeaf}; + +/// Tag bytes for node serialization to the temp file. +const TAG_LEAF: u8 = 0; +const TAG_NODE4: u8 = 1; +const TAG_NODE16: u8 = 2; +const TAG_NODE48: u8 = 3; +const TAG_NODE256: u8 = 4; + +/// Serialize a single `TriePtr` to the writer. +fn write_trie_ptr(w: &mut W, p: &TriePtr) -> Result<(), Error> { + w.write_all(&[p.id, p.chr])?; + w.write_all(&p.ptr.to_le_bytes())?; + w.write_all(&p.back_block.to_le_bytes())?; + Ok(()) +} + +/// Deserialize a single `TriePtr` from the reader. +fn read_trie_ptr(r: &mut R) -> Result { + let mut buf2 = [0u8; 2]; + r.read_exact(&mut buf2)?; + let mut buf8 = [0u8; 8]; + r.read_exact(&mut buf8)?; + let ptr = u64::from_le_bytes(buf8); + let mut buf4 = [0u8; 4]; + r.read_exact(&mut buf4)?; + let back_block = u32::from_le_bytes(buf4); + Ok(TriePtr { + id: buf2[0], + chr: buf2[1], + ptr, + back_block, + }) +} + +/// Serialize a `TrieNodeType` to the writer in a compact binary format. +/// Format: [tag: u8] [path_len: u32] [path bytes] [variant data] +pub(crate) fn serialize_node(w: &mut W, node: &TrieNodeType) -> Result<(), Error> { + match node { + TrieNodeType::Leaf(leaf) => { + w.write_all(&[TAG_LEAF])?; + w.write_all(&(leaf.path.len() as u32).to_le_bytes())?; + w.write_all(&leaf.path)?; + w.write_all(&leaf.data.0)?; + } + TrieNodeType::Node4(n) => { + w.write_all(&[TAG_NODE4])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node16(n) => { + w.write_all(&[TAG_NODE16])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node48(n) => { + w.write_all(&[TAG_NODE48])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + let indexes = n.indexes.map(|idx| idx as u8); + w.write_all(&indexes)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node256(n) => { + w.write_all(&[TAG_NODE256])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + } + Ok(()) +} + +/// Deserialize a `TrieNodeType` from the reader. +pub(crate) fn deserialize_node(r: &mut R) -> Result { + let mut tag = [0u8; 1]; + r.read_exact(&mut tag)?; + let mut path_len_buf = [0u8; 4]; + r.read_exact(&mut path_len_buf)?; + let path_len = u32::from_le_bytes(path_len_buf) as usize; + let mut path = vec![0u8; path_len]; + if path_len > 0 { + r.read_exact(&mut path)?; + } + + match tag[0] { + TAG_LEAF => { + let mut data = [0u8; 40]; + r.read_exact(&mut data)?; + Ok(TrieNodeType::Leaf(TrieLeaf { + path, + data: MARFValue(data), + })) + } + TAG_NODE4 => { + let mut ptrs = [TriePtr::default(); 4]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node4(TrieNode4 { + path, + ptrs, + cowptr: None, + patches: vec![], + })) + } + TAG_NODE16 => { + let mut ptrs = [TriePtr::default(); 16]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node16(TrieNode16 { + path, + ptrs, + cowptr: None, + patches: vec![], + })) + } + TAG_NODE48 => { + let mut indexes_u8 = [0u8; 256]; + r.read_exact(&mut indexes_u8)?; + let indexes = indexes_u8.map(|idx| idx as i8); + let mut ptrs = [TriePtr::default(); 48]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node48(Box::new(TrieNode48 { + path, + indexes, + ptrs, + cowptr: None, + patches: vec![], + }))) + } + TAG_NODE256 => { + let mut ptrs = [TriePtr::default(); 256]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node256(Box::new(TrieNode256 { + path, + ptrs, + cowptr: None, + patches: vec![], + }))) + } + _ => Err(Error::CorruptionError(format!( + "NodeStore: invalid tag byte {0}", + tag[0] + ))), + } +} + +/// `Write`/`Seek` adapter that keeps the current stream position in memory. +pub(super) struct CountingWriter { + inner: W, + offset: u64, +} + +impl CountingWriter { + /// Create a writer whose inner stream is known to be positioned at 0. + pub(super) fn new(inner: W) -> Self { + Self { inner, offset: 0 } + } + + /// Create a writer whose current position is already known. + pub(super) fn with_position(inner: W, offset: u64) -> Self { + Self { inner, offset } + } + + pub(super) fn position(&self) -> u64 { + self.offset + } +} + +impl Write for CountingWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let written = self.inner.write(buf)?; + self.offset = self + .offset + .checked_add(written as u64) + .ok_or_else(|| std::io::Error::other("CountingWriter offset overflow"))?; + Ok(written) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } +} + +impl Seek for CountingWriter { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let offset = self.inner.seek(pos)?; + self.offset = offset; + Ok(offset) + } + + fn stream_position(&mut self) -> std::io::Result { + Ok(self.offset) + } +} + +/// Disk-backed store for collected trie nodes. +/// +/// Full node data is serialized to a temporary file. Only lightweight +/// per-node metadata (hash, block_id, file offset) is kept in memory. +pub(crate) struct NodeStore { + /// Temp file holding serialized nodes (write handle). + writer: CountingWriter>, + /// Lazily-opened read handle to the temp file. Opened on first read. + reader: BufReader, + /// Path to the temp file (for re-opening as reader). + pub(crate) path: std::path::PathBuf, + /// Byte offset in the temp file for each node. + pub(crate) file_offsets: Vec, + /// Per-node hash. + hashes: Vec, + /// Per-node origin block ID. + block_ids: Vec, +} + +impl NodeStore { + pub(crate) fn new(dir: &str) -> Result { + let pid = std::process::id(); + // Try up to 16 times with atomic create_new to avoid collision. + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let mut path = std::path::PathBuf::from(dir); + path.push(format!(".squash_nodes_{pid}_{nanos}.tmp")); + let temp_file = File::options().write(true).create_new(true).open(&path)?; + Ok(NodeStore { + writer: CountingWriter::new(BufWriter::with_capacity(1 << 20, temp_file)), + reader: BufReader::new(File::open(&path)?), + path, + file_offsets: Vec::new(), + hashes: Vec::new(), + block_ids: Vec::new(), + }) + } + + pub(crate) fn len(&self) -> usize { + self.file_offsets.len() + } + + /// Append a node. Returns the node's index. + pub(crate) fn push( + &mut self, + node: &TrieNodeType, + hash: TrieHash, + block_id: u32, + ) -> Result { + let idx = self.file_offsets.len(); + + self.file_offsets.push(self.writer.position()); + self.hashes.push(hash); + self.block_ids.push(block_id); + serialize_node(&mut self.writer, node)?; + Ok(idx) + } + + /// Overwrite the node at `idx` in place. + /// + /// Call only after `flush` and only when the new serialization + /// length matches the original. + pub(crate) fn overwrite_node(&mut self, idx: usize, node: &TrieNodeType) -> Result<(), Error> { + let offset = *self.file_offsets.get(idx).ok_or_else(|| { + Error::CorruptionError(format!("overwrite_node: index {idx} out of bounds")) + })?; + let next_offset = self.file_offsets.get(idx + 1); + if self.writer.position() != offset { + self.writer + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + } + serialize_node(&mut self.writer, node)?; + if let Some(expected_end) = next_offset { + debug_assert_eq!( + self.writer.position(), + *expected_end, + "overwrite_node: re-serialized node {idx} changed length" + ); + } + Ok(()) + } + + /// Flush buffered writes so subsequent reads see them. + pub(crate) fn flush(&mut self) -> Result<(), Error> { + self.writer.flush().map_err(Error::IOError)?; + Ok(()) + } + + /// Read the node at `idx`. Lazily opens a shared `BufReader` on first call. + /// + /// Reads see only flushed writes; re-reading an overwritten node requires + /// a preceding `flush`. + pub(crate) fn read_node(&mut self, idx: usize) -> Result { + let offset = *self.file_offsets.get(idx).ok_or_else(|| { + Error::CorruptionError(format!("NodeStore: index {idx} out of bounds")) + })?; + + self.reader + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + deserialize_node(&mut self.reader) + } + + pub(crate) fn hash(&self, idx: usize) -> &TrieHash { + self.hashes.get(idx).unwrap_or_else(|| { + panic!( + "NodeStore::hash: index {idx} out of bounds (len={})", + self.hashes.len() + ) + }) + } + + pub(crate) fn set_hash(&mut self, idx: usize, hash: TrieHash) { + if let Some(slot) = self.hashes.get_mut(idx) { + *slot = hash; + } else { + panic!( + "NodeStore::set_hash: index {idx} out of bounds (len={})", + self.hashes.len() + ); + } + } + + pub(crate) fn block_id(&self, idx: usize) -> u32 { + self.block_ids.get(idx).copied().unwrap_or_else(|| { + panic!( + "NodeStore::block_id: index {idx} out of bounds (len={})", + self.block_ids.len() + ) + }) + } + + /// Drop the block_ids Vec to free memory after remap. + pub(crate) fn drop_block_ids(&mut self) { + self.block_ids = Vec::new(); + } + + /// Clean up the temp file. + fn cleanup(&self) { + let _ = std::fs::remove_file(&self.path); + } +} + +impl Drop for NodeStore { + fn drop(&mut self) { + self.cleanup(); + } +} diff --git a/stackslib/src/chainstate/stacks/index/squash/stream.rs b/stackslib/src/chainstate/stacks/index/squash/stream.rs new file mode 100644 index 0000000000..483da05d2d --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/squash/stream.rs @@ -0,0 +1,303 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::fs::File; +use std::io::{BufReader, Read as _, Seek, SeekFrom, Write}; +use std::time::Instant; + +use rusqlite::DatabaseName; +use sha2::Digest as _; +use stacks_common::types::chainstate::{ + StacksBlockId, TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE, TRIEHASH_ENCODED_SIZE, +}; + +use super::fmt_duration; +use super::node_store::{CountingWriter, NodeStore}; +use crate::chainstate::stacks::index::bits::{ + get_leaf_hash, get_node_byte_len, is_inline_child_ptr, reserved_root_size, + resolve_inline_child_offsets, write_nodetype_bytes, +}; +use crate::chainstate::stacks::index::node::{is_backptr, TrieNodeType}; +use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieHasher}; + +/// Recompute content hashes using a `NodeStore`. +/// +/// Leaf hashes are computed by reading each leaf from the temp file. +/// Internal node hashes are computed bottom-up (reverse order) using +/// the in-memory hashes Vec for child lookups and reading the node +/// structure from the temp file. +pub(super) fn recompute_content_hashes(store: &mut NodeStore) -> Result<(), Error> { + let empty_hash = TrieHash::EMPTY; + let node_count = store.len(); + let start = Instant::now(); + + // Pass 1: compute leaf hashes + for idx in 0..node_count { + let node = store.read_node(idx)?; + if let TrieNodeType::Leaf(ref leaf) = node { + store.set_hash(idx, get_leaf_hash(leaf)); + } + } + info!( + "Trie hash: leaf pass done in {}", + fmt_duration(start.elapsed()) + ); + + // Pass 2: internal nodes in reverse order + for idx in (0..node_count).rev() { + let node = store.read_node(idx)?; + if node.is_leaf() { + continue; + } + + // Collect child hashes + let ptrs = node.ptrs(); + let mut child_hashes = Vec::with_capacity(ptrs.len()); + for child_ptr in ptrs { + if !is_inline_child_ptr(child_ptr) { + child_hashes.push(empty_hash); + } else { + let child_idx = child_ptr.ptr() as usize; + if child_idx >= node_count { + return Err(Error::CorruptionError(format!( + "Invalid child index {child_idx} at node {idx}" + ))); + } + child_hashes.push(*store.hash(child_idx)); + } + } + + let new_hash = compute_node_hash(&node, &child_hashes); + store.set_hash(idx, new_hash); + } + + info!( + "Trie hash: {node_count} nodes in {}", + fmt_duration(start.elapsed()) + ); + Ok(()) +} + +/// Stream the squash blob into an arbitrary `Write + Seek` sink. +/// +/// Reads nodes one-at-a-time from the NodeStore temp file and serializes them +/// directly into `sink`. +/// +/// This mirrors `TrieRAM::dump_consume`: reserve worst-case root space at the +/// front of the blob, write descendants in child-before-parent order so child +/// offsets are known, then seek back and write the root. +/// +/// The blob is written starting at the sink's current position. +/// All internal offsets (header, node pointers) are relative to the blob +/// start, not to the absolute file position. +/// +/// Returns the number of bytes written. +pub(crate) fn stream_squash_blob( + store: &mut NodeStore, + parent_hash: &T, + sink: &mut F, +) -> Result { + let n = store.len(); + if n == 0 { + return Err(Error::CorruptionError( + "Cannot stream empty squash trie".to_string(), + )); + } + + // Record the base offset so all writes are relative to blob start. + let base = sink.stream_position().map_err(Error::IOError)?; + let mut sink = CountingWriter::with_position(sink, base); + let header_size = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + + let root_node = store.read_node(0)?; + let root_reserved_size = reserved_root_size(get_node_byte_len(&root_node), root_node.ptrs())?; + + // Write header: parent block hash + zero identifier + sink.write_all(parent_hash.as_bytes()) + .map_err(Error::IOError)?; + sink.seek(SeekFrom::Start( + base + BLOCK_HEADER_HASH_ENCODED_SIZE as u64, + )) + .map_err(Error::IOError)?; + sink.write_all(&0u32.to_le_bytes()) + .map_err(Error::IOError)?; + + sink.seek(SeekFrom::Start( + base.checked_add(header_size) + .and_then(|x| x.checked_add(root_reserved_size)) + .ok_or(Error::OverflowError)?, + )) + .map_err(Error::IOError)?; + + // Map from NodeStore index to offset inside this blob. Offset 0 means + // "not written yet"; real node offsets always come after the header. + let mut blob_offsets = vec![0u64; n]; + + // NodeStore is collected in root-first DFS preorder. Reversing all + // descendants writes children before parents, so parent pointer remapping + // never needs a fixpoint pass. + for idx in (1..n).rev() { + let current = sink.position(); + *blob_offsets + .get_mut(idx) + .ok_or_else(|| Error::CorruptionError("blob offset index out of bounds".into()))? = + current.checked_sub(base).ok_or(Error::OverflowError)?; + + let mut node = store.read_node(idx)?; + let hash = store.hash(idx); + + // Convert array-index pointers to byte offsets (relative to blob start) + if !node.is_leaf() { + resolve_inline_child_offsets(node.ptrs_mut(), &blob_offsets)?; + } + + write_nodetype_bytes(&mut sink, &node, hash)?; + } + + let end = sink.position(); + let total_size = end.checked_sub(base).ok_or(Error::OverflowError)?; + + // Write the root into its reserved slot. + *blob_offsets + .get_mut(0) + .ok_or_else(|| Error::CorruptionError("empty blob offset table".into()))? = header_size; + let mut root_node = store.read_node(0)?; + if !root_node.is_leaf() { + resolve_inline_child_offsets(root_node.ptrs_mut(), &blob_offsets)?; + } + + sink.seek(SeekFrom::Start( + base.checked_add(header_size).ok_or(Error::OverflowError)?, + )) + .map_err(Error::IOError)?; + let root_written = write_nodetype_bytes(&mut sink, &root_node, store.hash(0))?; + debug_assert!( + root_written <= root_reserved_size, + "root wrote {root_written} bytes but only {root_reserved_size} were reserved" + ); + + // Leave the caller positioned at the end of the blob, as if the write had + // been a single forward stream. + sink.seek(SeekFrom::Start( + base.checked_add(total_size).ok_or(Error::OverflowError)?, + )) + .map_err(Error::IOError)?; + + Ok(total_size) +} + +/// Reads root hashes from either an external `.blobs` file or from SQLite +/// internal `marf_data.data` BLOB columns. +pub(super) enum BlobReader { + External(BufReader), + Internal(rusqlite::Connection), +} + +impl BlobReader { + pub(super) fn new(db_path: &str, external_blobs: bool) -> Result { + if external_blobs { + let blobs_path = format!("{db_path}.blobs"); + let file = File::open(&blobs_path).map_err(Error::IOError)?; + Ok(BlobReader::External(BufReader::with_capacity( + 64 * 1024, + file, + ))) + } else { + let conn = rusqlite::Connection::open_with_flags( + db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY, + )?; + Ok(BlobReader::Internal(conn)) + } + } + + /// Read the root hash for a block. + /// + /// For `External`, seeks to `blob_offset + root_ptr_offset` in the `.blobs` file. + /// For `Internal`, opens the SQLite blob for `block_id` and seeks within it. + pub(super) fn read_root_hash( + &mut self, + block_id: u32, + blob_offset: u64, + ) -> Result { + let root_ptr_offset = (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4; + let mut hash_bytes = [0u8; TRIEHASH_ENCODED_SIZE]; + match self { + BlobReader::External(reader) => { + reader.seek(SeekFrom::Start(blob_offset + root_ptr_offset))?; + reader.read_exact(&mut hash_bytes)?; + } + BlobReader::Internal(conn) => { + let mut blob = conn.blob_open( + DatabaseName::Main, + "marf_data", + "data", + block_id.into(), + true, // readonly + )?; + blob.seek(SeekFrom::Start(root_ptr_offset))?; + blob.read_exact(&mut hash_bytes)?; + } + } + Ok(TrieHash(hash_bytes)) + } +} + +/// A `BlockMap` for nodes whose backpointers were already made inline. +/// +/// `write_consensus_bytes` should never ask this map for a block hash; the +/// debug assertion in `compute_node_hash` checks that precondition. +struct BackptrFreeBlockMap; + +impl BlockMap for BackptrFreeBlockMap { + type TrieId = StacksBlockId; + + fn get_block_hash(&self, _id: u32) -> Result { + unreachable!("BackptrFreeBlockMap: no backpointers in squash trie") + } + fn get_block_hash_caching(&mut self, _id: u32) -> Result<&Self::TrieId, Error> { + unreachable!("BackptrFreeBlockMap: no backpointers in squash trie") + } + fn is_block_hash_cached(&self, _id: u32) -> bool { + false + } + fn get_block_id(&self, _bhh: &Self::TrieId) -> Result { + unreachable!("BackptrFreeBlockMap: no backpointers in squash trie") + } + fn get_block_id_caching(&mut self, _bhh: &Self::TrieId) -> Result { + unreachable!("BackptrFreeBlockMap: no backpointers in squash trie") + } +} + +/// Compute the content hash of a `TrieNodeType` given pre-collected child hashes. +/// +/// Equivalent to `bits::get_node_hash` but works on the `TrieNodeType` enum +/// directly (which does not implement `ConsensusSerializable`). +/// +/// Precondition: every child pointer of `node` has its backptr bit cleared. +/// `BackptrFreeBlockMap` panics if that precondition is violated. +fn compute_node_hash(node: &TrieNodeType, child_hashes: &[TrieHash]) -> TrieHash { + debug_assert!( + node.is_leaf() || node.ptrs().iter().all(|p| !is_backptr(p.id)), + "compute_node_hash precondition violated: node still has backpointer children" + ); + let mut hasher = TrieHasher::new(); + node.write_consensus_bytes(&mut BackptrFreeBlockMap, &mut hasher) + .expect("IO failure pushing to hasher"); + for h in child_hashes { + hasher.update(h.as_ref()); + } + TrieHash(hasher.finalize().into()) +} diff --git a/stackslib/src/chainstate/stacks/index/storage.rs b/stackslib/src/chainstate/stacks/index/storage.rs index 65fa2e4db9..5f304fcabe 100644 --- a/stackslib/src/chainstate/stacks/index/storage.rs +++ b/stackslib/src/chainstate/stacks/index/storage.rs @@ -26,8 +26,9 @@ use rusqlite::{Connection, OpenFlags, Transaction}; use sha2::Digest; use crate::chainstate::stacks::index::bits::{ - get_node_byte_len, get_node_byte_len_compressed, read_hash_bytes, read_nodetype, - read_root_hash, write_nodetype_bytes, write_nodetype_bytes_compressed, + get_node_byte_len, get_node_byte_len_compressed, is_inline_child_ptr, read_hash_bytes, + read_nodetype, read_root_hash, reserved_root_size, resolve_inline_child_offsets, + write_nodetype_bytes, write_nodetype_bytes_compressed, }; use crate::chainstate::stacks::index::cache::*; use crate::chainstate::stacks::index::file::{TrieFile, TrieFileNodeHashReader}; @@ -746,7 +747,7 @@ impl TrieRAM { // count get_nodetype load time for write_children_hashes_same_block benchmark, but // only if that code path will be exercised. for ptr in node.ptrs().iter() { - if !is_backptr(ptr.id()) && !ptr.is_empty() { + if is_inline_child_ptr(ptr) { if let Some(start_node_time) = start_node_time.take() { // count the time taken to load the root node in this case, // but only do so once. @@ -838,44 +839,6 @@ impl TrieRAM { } } - /// Compute the reserved on-disk size for a root written after its children. - fn reserved_root_size(base_len: usize, ptrs: &[TriePtr]) -> Result { - let base_len = base_len as u64; - let inline_count = ptrs - .iter() - .filter(|p| !p.is_empty() && !is_backptr(p.id)) - .count() as u64; - let inline_ptr_growth = inline_count.checked_mul(4).ok_or(Error::OverflowError)?; - base_len - .checked_add(inline_ptr_growth) - .ok_or(Error::OverflowError) - } - - /// Rewrite inline child pointers from in-memory indices to file offsets. - fn update_inline_child_ptrs(ptrs: &mut [TriePtr], file_offsets: &[u64]) -> Result<(), Error> { - for ptr in ptrs.iter_mut() { - if ptr.is_empty() || is_backptr(ptr.id) { - continue; - } - - let child_idx = ptr.try_ptr_into_usize()?; - let Some(&offset) = file_offsets.get(child_idx) else { - return Err(Error::CorruptionError("Child index out of range".into())); - }; - // 0 is the sentinel for "not yet placed": valid offsets are always - // past the 36-byte header + root reservation. - if offset == 0 { - return Err(Error::CorruptionError( - "Child offset not yet written".into(), - )); - } - - ptr.ptr = offset; - } - - Ok(()) - } - /// Walk through the buffered TrieNodes and dump them to f. /// This consumes this TrieRAM instance. /// @@ -897,7 +860,7 @@ impl TrieRAM { let (node, _) = self.get_nodetype(ptr)?; if !node.is_leaf() { for child in node.ptrs().iter() { - if !child.is_empty() && !is_backptr(child.id) { + if is_inline_child_ptr(child) { stack.push(child.try_ptr_into_u32()?); } } @@ -923,7 +886,7 @@ impl TrieRAM { // 4 * n_inline_children bytes) between the root and the first descendant. let root_reserved_size = { let (root_node, _) = self.get_nodetype(root_mem_ptr)?; - Self::reserved_root_size(get_node_byte_len(root_node), root_node.ptrs())? + reserved_root_size(get_node_byte_len(root_node), root_node.ptrs())? }; // Write the blob header (parent hash + reserved 4-byte block-id field set to 0). @@ -953,10 +916,10 @@ impl TrieRAM { })?; if !entry.0.is_leaf() { - Self::update_inline_child_ptrs(entry.0.ptrs_mut(), &file_offsets)?; + resolve_inline_child_offsets(entry.0.ptrs_mut(), &file_offsets)?; } - write_nodetype_bytes(f, &entry.0, entry.1)?; + write_nodetype_bytes(f, &entry.0, &entry.1)?; } let end_offset = f.stream_position()?; @@ -968,10 +931,10 @@ impl TrieRAM { .ok_or_else(|| Error::CorruptionError("Invalid root pointer in dump_consume".into()))?; if !entry.0.is_leaf() { - Self::update_inline_child_ptrs(entry.0.ptrs_mut(), &file_offsets)?; + resolve_inline_child_offsets(entry.0.ptrs_mut(), &file_offsets)?; } f.seek(SeekFrom::Start(header_size))?; - let root_written = write_nodetype_bytes(f, &entry.0, entry.1)?; + let root_written = write_nodetype_bytes(f, &entry.0, &entry.1)?; debug_assert!( root_written <= root_reserved_size, "root wrote {root_written} bytes but only {root_reserved_size} were reserved" @@ -1121,12 +1084,12 @@ impl TrieRAM { let node_inline = node .ptrs() .iter() - .filter(|p| !p.is_empty() && !is_backptr(p.id)) + .filter(|p| is_inline_child_ptr(p)) .map(|p| p.chr()); let diff_inline = patch_node .ptr_diff .iter() - .filter(|p| !p.is_empty() && !is_backptr(p.id)) + .filter(|p| is_inline_child_ptr(p)) .map(|p| p.chr()); node_inline.eq(diff_inline) } else { @@ -1137,7 +1100,7 @@ impl TrieRAM { // Push children onto the DFS stack. if !node.is_leaf() { for child in node.ptrs().iter() { - if !child.is_empty() && !is_backptr(child.id) { + if is_inline_child_ptr(child) { let idx = child.try_ptr_into_u32()?; stack.push(idx); } @@ -1169,10 +1132,10 @@ impl TrieRAM { // 4 * n_inline_children bytes) between the root and the first descendant. let root_reserved_size = { if let Some(patch) = root_dp.patch() { - Self::reserved_root_size(TRIEHASH_ENCODED_SIZE + patch.size(), &patch.ptr_diff)? + reserved_root_size(TRIEHASH_ENCODED_SIZE + patch.size(), &patch.ptr_diff)? } else { let (root_node, _) = self.get_nodetype(root_mem_ptr)?; - Self::reserved_root_size(get_node_byte_len_compressed(root_node), root_node.ptrs())? + reserved_root_size(get_node_byte_len_compressed(root_node), root_node.ptrs())? } }; @@ -1219,13 +1182,13 @@ impl TrieRAM { Error::CorruptionError("Node index out of range in dump_compressed_consume".into()) })? = f.stream_position()?; if let Some(patch) = dp.patch_mut() { - Self::update_inline_child_ptrs(patch.ptr_diff.as_mut_slice(), &file_offsets)?; + resolve_inline_child_offsets(patch.ptr_diff.as_mut_slice(), &file_offsets)?; } else { let entry = self.data.get_mut(dp_idx).ok_or_else(|| { Error::CorruptionError("Invalid node pointer in dump_compressed_consume".into()) })?; if !entry.0.is_leaf() { - Self::update_inline_child_ptrs(entry.0.ptrs_mut(), &file_offsets)?; + resolve_inline_child_offsets(entry.0.ptrs_mut(), &file_offsets)?; } } write_dump_ptr(f, dp, &self.data)?; @@ -1235,13 +1198,13 @@ impl TrieRAM { // Step 4: write the root node into its reserved space. if let Some(patch) = root_dp.patch_mut() { - Self::update_inline_child_ptrs(patch.ptr_diff.as_mut_slice(), &file_offsets)?; + resolve_inline_child_offsets(patch.ptr_diff.as_mut_slice(), &file_offsets)?; } else { let entry = self.data.get_mut(root_dp.ptr() as usize).ok_or_else(|| { Error::CorruptionError("Invalid root pointer in dump_compressed_consume".into()) })?; if !entry.0.is_leaf() { - Self::update_inline_child_ptrs(entry.0.ptrs_mut(), &file_offsets)?; + resolve_inline_child_offsets(entry.0.ptrs_mut(), &file_offsets)?; } } f.seek(SeekFrom::Start(header_size))?; @@ -1277,7 +1240,7 @@ impl TrieRAM { if let TrieNodeType::Node256(ref mut data) = root_node { // queue children in the same order we stored them for ptr in data.ptrs.iter_mut() { - if ptr.id() != TrieNodeID::Empty as u8 && !is_backptr(ptr.id()) { + if is_inline_child_ptr(ptr) { frontier.push_back(*ptr); // fix up ptrs @@ -1313,7 +1276,7 @@ impl TrieRAM { }; for ptr in ptrs { - if ptr.id() != TrieNodeID::Empty as u8 && !is_backptr(ptr.id()) { + if is_inline_child_ptr(ptr) { frontier.push_back(*ptr); // fix up ptrs @@ -2525,6 +2488,11 @@ impl TrieStorageConnection<'_, T> { self.data.squash_info.as_ref() } + /// Returns the configured squash height, if this storage is squashed. + pub fn squash_height(&self) -> Option { + self.squash_info().map(|info| info.height) + } + /// Set cached squashing metadata for this storage connection. pub(crate) fn set_squash_info(&mut self, squash_info: Option) { self.data.set_squash_info(squash_info); @@ -2535,6 +2503,52 @@ impl TrieStorageConnection<'_, T> { &self.db } + /// Read this block's height from the squashed-block side table. + /// + /// Returns `None` for archival MARFs and for blocks outside the squashed + /// range. + pub fn squashed_block_height(&self, block_hash: &T) -> Result, Error> { + if !self.is_squashed() { + return Ok(None); + } + + trie_sql::read_squashed_block_height_by_hash(self.sqlite_conn(), block_hash) + } + + /// Read this block's archival MARF root hash from the squashed-block side + /// table. + /// + /// Returns `None` for archival MARFs and for blocks outside the squashed + /// range. + pub fn squashed_block_root_hash(&self, block_hash: &T) -> Result, Error> { + if !self.is_squashed() { + return Ok(None); + } + + trie_sql::read_squashed_block_root_hash_by_hash(self.sqlite_conn(), block_hash) + } + + /// Reject trie traversal below the squash height, where blocks share the + /// squash blob. + pub fn check_historical_read_allowed(&self, block_hash: &T) -> Result<(), Error> { + let Some(squash_height) = self.squash_height() else { + return Ok(()); + }; + + let Some(block_height) = self.squashed_block_height(block_hash)? else { + return Ok(()); + }; + + if block_height < squash_height { + return Err(Error::HistoricalReadInSquashedRange { + block_height, + squash_height, + }); + } + + Ok(()) + } + pub fn set_cached_ancestor_hashes_bytes(&mut self, bhh: &T, bytes: Vec) { self.data.trie_ancestor_hash_bytes_cache = Some((bhh.clone(), bytes)); } @@ -2544,6 +2558,11 @@ impl TrieStorageConnection<'_, T> { } pub fn get_root_hash_at(&mut self, tip: &T) -> Result { + // Squashed historical blocks keep their archival roots in SQL. + if let Some(root_hash) = self.squashed_block_root_hash(tip)? { + return Ok(root_hash); + } + let cur_block_hash = self.get_cur_block(); self.open_block(tip)?; @@ -2632,14 +2651,12 @@ impl TrieStorageConnection<'_, T> { /// Generate a mapping between Trie root hashes and the blocks that contain them. /// /// For squashed MARFs, blocks within the squashed range (0..=H) share a - /// single shared trie storage whose stored trie hash was computed at height H. The - /// standard blob-scanning approach would produce collisions (all blocks - /// get the same trie hash). Instead, for each squashed block at height - /// K we re-derive the trie hash by combining the squash trie's content - /// hash with the archival ancestor hashes at height K from the SQL - /// metadata. This mirrors what the proof verifier computes when it - /// processes a segment proof inside the squash trie and the subsequent - /// initial shunt. + /// single shared trie storage whose stored trie hash was computed at height H. + /// The standard blob-scanning approach would produce collisions (all blocks + /// get the same trie hash). For each squashed block at height K we + /// substitute the per-height archival root hash recorded in + /// `marf_squashed_blocks` so that the table maps each historical block to + /// its own archival root. #[cfg(test)] pub fn read_root_to_block_table(&mut self) -> Result, Error> { let mut ret = self.inner_read_persisted_root_to_blocks()?; @@ -2648,17 +2665,17 @@ impl TrieStorageConnection<'_, T> { // All blocks at heights 0..=H share a single squash trie, so // `inner_read_persisted_root_to_blocks` maps them all to the same // trie hash. Replace those entries with the per-height archival - // trie hashes stored during squashing. These are the hashes that - // the proof verifier expects (the squash shunt at idx = -1 injects - // the archival trie hash directly). + // trie hashes stored during squashing. if let Some(info) = self.data.squash_info.clone() { for h in 0..=info.height { - let Some(bh) = trie_sql::read_squash_block_hash::(self.sqlite_conn(), h)? else { + let Some(bh) = + trie_sql::read_squashed_block_hash_by_height::(self.sqlite_conn(), h)? + else { continue; }; let Some(archival_trie_hash) = - trie_sql::read_squash_archival_marf_root_hash(self.sqlite_conn(), h)? + trie_sql::read_squashed_block_root_hash_by_height(self.sqlite_conn(), h)? else { continue; }; diff --git a/stackslib/src/chainstate/stacks/index/test/marf.rs b/stackslib/src/chainstate/stacks/index/test/marf.rs index 40fec78797..cc33af3ea7 100644 --- a/stackslib/src/chainstate/stacks/index/test/marf.rs +++ b/stackslib/src/chainstate/stacks/index/test/marf.rs @@ -2209,14 +2209,14 @@ fn assert_metadata_keys_present( } } -/// Create a configurable multi-block MARF for `for_each_leaf` tests. +/// Create a configurable multi-block MARF for tests. /// /// `k1` is updated at every block (exercises backpointers at every depth). /// For each block at height h > 0, inserts `keys_per_block` new keys. /// Also creates: /// - 10 common keys updated at every block /// - 10 common keys updated only on some blocks -fn setup_for_each_leaf_marf( +pub(super) fn setup_marf( path: &str, num_blocks: usize, keys_per_block: usize, @@ -2299,7 +2299,7 @@ fn setup_for_each_leaf_marf( #[test] fn test_for_each_leaf_yields_all_keys() { - let (mut marf, blocks, expected_keys) = setup_for_each_leaf_marf(":memory:", 2, 1); + let (mut marf, blocks, expected_keys) = setup_marf(":memory:", 2, 1); let b1 = blocks[0].clone(); let b2 = blocks[1].clone(); @@ -2352,7 +2352,7 @@ fn test_for_each_leaf_yields_all_keys() { #[test] fn test_for_each_leaf_large_scale_resolves_backpointers_and_values() { - let (mut marf, blocks, expected_keys) = setup_for_each_leaf_marf(":memory:", 300, 150); + let (mut marf, blocks, expected_keys) = setup_marf(":memory:", 300, 150); let block_at_tip = &blocks[299]; @@ -2450,7 +2450,7 @@ fn test_for_each_leaf_single_block() { #[test] fn test_for_each_leaf_at_intermediate_height() { - let (mut marf, blocks, _expected_keys) = setup_for_each_leaf_marf(":memory:", 300, 150); + let (mut marf, blocks, _expected_keys) = setup_marf(":memory:", 300, 150); // Walk at height 4 (blocks[4]), NOT the tip. let block_at_4 = &blocks[4]; @@ -2514,8 +2514,8 @@ fn test_for_each_leaf_at_intermediate_height() { #[test] fn test_for_each_leaf_callback_error_propagates() { - let (mut marf, blocks, _expected_keys) = setup_for_each_leaf_marf(":memory:", 10, 10); - let tip = &blocks[9]; + let (mut marf, blocks, _expected_keys) = setup_marf(":memory:", 2, 1); + let tip = &blocks[1]; let call_count = Cell::new(0u64); let result = marf.with_conn(|conn| { diff --git a/stackslib/src/chainstate/stacks/index/test/mod.rs b/stackslib/src/chainstate/stacks/index/test/mod.rs index 91159a0d1d..6151366132 100644 --- a/stackslib/src/chainstate/stacks/index/test/mod.rs +++ b/stackslib/src/chainstate/stacks/index/test/mod.rs @@ -38,6 +38,7 @@ pub mod marf_perfs; pub mod node; pub mod node_patch; pub mod proofs; +pub mod squash; pub mod storage; pub mod trie; diff --git a/stackslib/src/chainstate/stacks/index/test/node.rs b/stackslib/src/chainstate/stacks/index/test/node.rs index 7de37ff5c4..af33b6932c 100644 --- a/stackslib/src/chainstate/stacks/index/test/node.rs +++ b/stackslib/src/chainstate/stacks/index/test/node.rs @@ -5135,6 +5135,46 @@ fn trieptr_compressed_roundtrip_non_backptr() { } } +#[test] +fn trieptr_compressed_roundtrip_inline_back_block_payload_u32() { + let mut ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0x21, 777); + ptr.back_block = 42; + + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + assert_eq!(10, bytes.len()); + assert_eq!( + set_compressed(set_inline_back_block(TrieNodeID::Node16 as u8)), + bytes[0] + ); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); +} + +#[test] +fn trieptr_compressed_roundtrip_inline_back_block_payload_u64() { + let mut ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0x22, u64::from(u32::MAX) + 9); + ptr.back_block = 314; + + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + assert_eq!(14, bytes.len()); + assert_eq!( + set_compressed(set_inline_back_block(set_u64_ptr(TrieNodeID::Node16 as u8))), + bytes[0] + ); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); +} + #[test] fn trieptr_compressed_roundtrip_backptr() { let ptr = TriePtr::new_backptr( @@ -5297,3 +5337,49 @@ fn ptrs_from_bytes_compressed_dense_mixed_width() { assert_eq!(expected, decoded); assert_eq!(expected_consumed, cursor_pos); } + +#[test] +fn test_node_copy_update_ptrs_preserves_nonzero_back_block() { + use crate::chainstate::stacks::index::node::node_copy_update_ptrs; + + // Inline pointer with back_block = 0 (normal archival case) - should be overwritten + let mut ptrs = [TriePtr::new(TrieNodeID::Node4 as u8, 0x10, 100)]; + assert_eq!(ptrs[0].back_block, 0); + node_copy_update_ptrs(&mut ptrs, 42); + assert!(is_backptr(ptrs[0].id())); + assert_eq!(ptrs[0].back_block, 42); + assert_eq!(ptrs[0].chr(), 0x10); + assert_eq!(ptrs[0].ptr(), 100); + + // Inline pointer with back_block != 0 (squash annotation) - should be preserved + let mut ptrs = [TriePtr { + id: TrieNodeID::Node4 as u8, + chr: 0x20, + ptr: 200, + back_block: 99, + }]; + node_copy_update_ptrs(&mut ptrs, 42); + assert!(is_backptr(ptrs[0].id())); + assert_eq!( + ptrs[0].back_block, 99, + "squash annotation must be preserved" + ); + assert_eq!(ptrs[0].chr(), 0x20); + assert_eq!(ptrs[0].ptr(), 200); + + // Empty pointer - should be untouched + let mut ptrs = [TriePtr::default()]; + node_copy_update_ptrs(&mut ptrs, 42); + assert_eq!(ptrs[0], TriePtr::default()); + + // Already a backptr - should be skipped entirely + let orig = TriePtr { + id: set_backptr(TrieNodeID::Node16 as u8), + chr: 0x30, + ptr: 300, + back_block: 7, + }; + let mut ptrs = [orig]; + node_copy_update_ptrs(&mut ptrs, 42); + assert_eq!(ptrs[0], orig, "existing backptr must not be touched"); +} diff --git a/stackslib/src/chainstate/stacks/index/test/node_patch.rs b/stackslib/src/chainstate/stacks/index/test/node_patch.rs index d6b3d2d4ad..49f06ca898 100644 --- a/stackslib/src/chainstate/stacks/index/test/node_patch.rs +++ b/stackslib/src/chainstate/stacks/index/test/node_patch.rs @@ -156,6 +156,28 @@ fn trie_node_patch_u64_ptr_roundtrip_ok() { assert_eq!(patch_node, decoded); } +#[test] +fn trie_node_patch_apply_node4_preserves_inline_payload_pointer_identity() { + let mut old_node = TrieNode4::new(&[]); + let mut inline_with_payload = TriePtr::new(TrieNodeID::Node16 as u8, 0x10, 1234); + inline_with_payload.back_block = 55; + assert!(old_node.insert(&inline_with_payload)); + + let patch = TrieNodePatch { + ptr: TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x00, 1, 7), + ptr_diff: vec![TriePtr::new(TrieNodeID::Node16 as u8, 0x20, 2345)], + }; + + let patched = patch + .apply_node4(old_node, 8, 99) + .expect("patch application should succeed"); + let patched_ptr = patched + .walk(0x10) + .expect("inline child with payload should still exist"); + assert!(is_backptr(patched_ptr.id())); + assert_eq!(patched_ptr.back_block(), 55); +} + #[test] fn trie_node_patch_u64_ptr_serialize_fails_with_ptr_diffs_len_0() { let patch_node = TrieNodePatch { diff --git a/stackslib/src/chainstate/stacks/index/test/squash.rs b/stackslib/src/chainstate/stacks/index/test/squash.rs new file mode 100644 index 0000000000..0397c30ce3 --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/test/squash.rs @@ -0,0 +1,1884 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::io::{Cursor, Seek}; +use std::path::PathBuf; + +use stacks_common::types::chainstate::{StacksBlockId, TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE}; +use tempfile::tempdir; + +use super::marf::setup_marf; +use crate::chainstate::stacks::index::bits::{ + get_node_byte_len, read_nodetype, resolve_inline_child_offsets, +}; +use crate::chainstate::stacks::index::marf::{ + MARFOpenOpts, MarfConnection, SquashStats, MARF, OWN_BLOCK_HEIGHT_KEY, +}; +use crate::chainstate::stacks::index::node::{ + is_u64_ptr, set_backptr, TrieNode as _, TrieNode16, TrieNode256, TrieNode4, TrieNode48, + TrieNodeID, TrieNodeType, TriePtr, +}; +use crate::chainstate::stacks::index::squash::{ + deserialize_node, serialize_node, stream_squash_blob, NodeStore, +}; +use crate::chainstate::stacks::index::storage::TrieHashCalculationMode; +use crate::chainstate::stacks::index::{ + trie_sql, ClarityMarfTrieId, Error, MARFValue, TrieLeaf, TrieMerkleProof, +}; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn squash_helper( + src_path: &str, + dst_dir: &std::path::Path, + tip: &StacksBlockId, + height: u32, +) -> (PathBuf, SquashStats) { + std::fs::create_dir_all(dst_dir).unwrap(); + let dst_db_path = dst_dir.join("index.sqlite"); + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let stats = MARF::squash_to_path( + src_path, + dst_db_path.to_str().unwrap(), + open_opts, + tip, + height, + "test", + ) + .unwrap(); + (dst_db_path, stats) +} + +/// Build an archival MARF in `dir` and squash it at `squash_height`. +/// Returns the open archival MARF, the open squashed MARF, and the block list +/// from the archival source. Used by the hash-equivalence tests so they don't +/// have to repeat the same five-line scaffolding. +fn build_archival_and_squashed_marfs( + dir: &tempfile::TempDir, + num_blocks: usize, + keys_per_block: usize, + squash_height: u32, +) -> (MARF, MARF, Vec) { + let archival_path = dir.path().join("archival.sqlite"); + let (archival, blocks, _) = + setup_marf(archival_path.to_str().unwrap(), num_blocks, keys_per_block); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + (archival, squashed, blocks) +} + +/// Assert that the archival and squashed MARFs report identical root hashes +/// at every block in `blocks` and that no root is the zero hash. +fn assert_roots_match_at( + archival: &mut MARF, + squashed: &mut MARF, + blocks: &[StacksBlockId], + context: &str, +) { + for (i, bh) in blocks.iter().enumerate() { + let arch = archival.get_root_hash_at(bh).unwrap(); + let sq = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!(arch, sq, "{context}: root hash mismatch at block #{i}"); + assert_ne!(arch, TrieHash::ZERO, "{context}: root #{i} is zero"); + } +} + +const STRESS_SQUASH_BLOCKS: usize = 128; +const STRESS_SQUASH_KEYS_PER_BLOCK: usize = 8; +const STRESS_SQUASH_HEIGHT: u32 = 96; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[test] +fn test_squash_to_path_outputs_data() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, stats) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 1, + ); + + assert_eq!(stats.node_count, 29); + assert!(dst_db_path.exists()); + assert!(PathBuf::from(format!("{}.blobs", dst_db_path.display())).exists()); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut dst = MARF::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + let k1 = dst.get(&blocks[1], "k1").unwrap().unwrap(); + assert_eq!(k1, MARFValue::from_value("v1_at_1")); + let own_height = dst.get(&blocks[1], OWN_BLOCK_HEIGHT_KEY).unwrap().unwrap(); + assert_eq!(own_height, MARFValue::from(1u32)); +} + +#[test] +fn test_squash_info_detected_on_open() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + let tip = + trie_sql::get_latest_confirmed_block_hash::(squashed.sqlite_conn()).unwrap(); + + // Verify squash metadata was detected from the SQL table on open. + let (is_squashed, info_root, info_height) = squashed + .with_conn(|conn| -> Result<(bool, TrieHash, u32), Error> { + let info = conn.squash_info().expect("missing squash info"); + Ok(( + conn.is_squashed(), + info.archival_marf_root_hash, + info.height, + )) + }) + .unwrap(); + + // Cross-check with the SQL table directly. + let (sql_root, _sql_squash_root, sql_height) = + trie_sql::read_squash_info(squashed.sqlite_conn()) + .unwrap() + .expect("SQL squash info missing"); + + assert!(is_squashed); + assert_eq!(info_root, sql_root); + assert_eq!(info_height, sql_height); + assert_eq!(info_height, 1); +} + +#[test] +fn test_squash_info_absent_on_archival_open() { + let (mut marf, _blocks, _expected_keys) = setup_marf(":memory:", 2, 1); + + let (is_squashed, has_info) = marf + .with_conn(|conn| -> Result<(bool, bool), Error> { + Ok((conn.is_squashed(), conn.squash_info().is_some())) + }) + .unwrap(); + + assert!(!is_squashed); + assert!(!has_info); +} + +#[test] +fn test_squashed_marf_can_extend_past_snapshot_height() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + + let b2 = blocks[1].clone(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + let b4 = StacksBlockId::from_bytes(&[4u8; 32]).unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed.insert("k3", MARFValue::from_value("v4")).unwrap(); + squashed.commit().unwrap(); + + squashed.begin(&b3, &b4).unwrap(); + squashed.insert("k4", MARFValue::from_value("v5")).unwrap(); + squashed.commit().unwrap(); + + let v4 = squashed.get(&b4, "k4").unwrap().unwrap(); + assert_eq!(v4, MARFValue::from_value("v5")); + let own_height = squashed.get(&b4, OWN_BLOCK_HEIGHT_KEY).unwrap().unwrap(); + assert_eq!(own_height, MARFValue::from(3u32)); +} + +/// Verify that `get_root_hash_at` and `get_block_height_of` return correct +/// per-height values for blocks *inside* the squashed range. Without the +/// squash-aware overrides these would return the shared blob's root hash +/// (wrong) and the squash height H (wrong) for every historical block. +#[test] +fn test_squashed_historical_root_hash_and_height() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _) = setup_marf(archival_path.to_str().unwrap(), 5, 1); + + // Collect archival root hashes and heights for blocks inside range. + let archival_roots: Vec = (0..=4) + .map(|i| archival.get_root_hash_at(&blocks[i]).unwrap()) + .collect(); + + // The archival roots should not all be identical (sanity). + assert_ne!(archival_roots[0], archival_roots[4]); + + // Squash at height 4 (blocks 0..=4 are in the squashed range). + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 4, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // (a) get_root_hash_at must return the archival per-height root, not + // the shared squash blob root. + for i in 0..=4 { + let sq_root = squashed.get_root_hash_at(&blocks[i]).unwrap(); + assert_eq!( + archival_roots[i], sq_root, + "root hash mismatch at height {i} (inside squashed range)" + ); + } + + // (b) get_block_height_of must return the correct per-block height, + // not the squash height (4) for all of them. + for i in 0..=4usize { + let h = squashed + .get_block_height_of(&blocks[i], &blocks[4]) + .unwrap() + .expect("height should be Some"); + assert_eq!( + h, i as u32, + "height mismatch for block at index {i}: expected {i}, got {h}" + ); + } +} + +/// Verify that `test_squash_info_detected_on_open` also asserts the +/// squash_root_node_hash from the SQL table. +#[test] +fn test_squash_info_sql_squash_root_asserted() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + + let (_, sql_squash_root, _) = trie_sql::read_squash_info(squashed.sqlite_conn()) + .unwrap() + .expect("SQL squash info missing"); + + let cached_root = squashed + .with_conn(|conn| -> Result { + Ok(conn.squash_info().unwrap().squash_root_node_hash) + }) + .unwrap(); + + let sql_root = sql_squash_root.expect("squash_root_node_hash should be set after squash"); + assert_eq!(sql_root, cached_root, "cached vs SQL squash root mismatch"); + + assert_ne!( + cached_root, + TrieHash::EMPTY, + "squash root node hash should be populated after squash" + ); +} + +#[test] +fn test_large_marf_squash_extend_root_hash_matches_archival() { + // Squash a 10-block MARF at height 8, then extend both the archival + // and squashed MARFs with the same data at heights 9 and 10. + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf(archival_path.to_str().unwrap(), 10, 1); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 8, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let b_new_9 = StacksBlockId::from_bytes(&[101u8; 32]).unwrap(); + let b_new_10 = StacksBlockId::from_bytes(&[102u8; 32]).unwrap(); + + // --- Extend archival --- + archival.begin(&blocks[8], &b_new_9).unwrap(); + archival + .insert("k_new_9", MARFValue::from_value("val9")) + .unwrap(); + archival.commit().unwrap(); + + archival.begin(&b_new_9, &b_new_10).unwrap(); + archival + .insert("k_new_10", MARFValue::from_value("val10")) + .unwrap(); + archival.commit().unwrap(); + + // --- Extend squashed --- + squashed.begin(&blocks[8], &b_new_9).unwrap(); + squashed + .insert("k_new_9", MARFValue::from_value("val9")) + .unwrap(); + squashed.commit().unwrap(); + + squashed.begin(&b_new_9, &b_new_10).unwrap(); + squashed + .insert("k_new_10", MARFValue::from_value("val10")) + .unwrap(); + squashed.commit().unwrap(); + + // (a) Data inserted at the extended heights is readable. + assert_eq!( + squashed.get(&b_new_9, "k_new_9").unwrap().unwrap(), + MARFValue::from_value("val9") + ); + assert_eq!( + squashed.get(&b_new_10, "k_new_10").unwrap().unwrap(), + MARFValue::from_value("val10") + ); + assert_eq!( + squashed.get(&b_new_10, "k1").unwrap().unwrap(), + MARFValue::from_value("v1_at_8") + ); + + // (b) MARF root hashes at the extended heights must match. + let archival_root_9 = archival.get_root_hash_at(&b_new_9).unwrap(); + let squashed_root_9 = squashed.get_root_hash_at(&b_new_9).unwrap(); + assert_eq!( + archival_root_9, squashed_root_9, + "Root hash mismatch at height 9" + ); + + let archival_root_10 = archival.get_root_hash_at(&b_new_10).unwrap(); + let squashed_root_10 = squashed.get_root_hash_at(&b_new_10).unwrap(); + assert_eq!( + archival_root_10, squashed_root_10, + "Root hash mismatch at height 10" + ); + + assert_ne!(archival_root_9, TrieHash::ZERO, "root at 9 is zero"); + assert_ne!(archival_root_10, TrieHash::ZERO, "root at 10 is zero"); + assert_ne!( + archival_root_9, archival_root_10, + "roots at 9 and 10 should differ" + ); + + let own_h = squashed + .get(&b_new_10, OWN_BLOCK_HEIGHT_KEY) + .unwrap() + .unwrap(); + assert_eq!(own_h, MARFValue::from(10u32)); +} + +/// Squash a larger MARF at a deep height, then extend both MARFs through 10 additional +/// heights and verify hash equality at EVERY extended height. +#[test] +fn test_multi_height_extension_hash_equality() { + let dir = tempdir().unwrap(); + let (mut archival, mut squashed, blocks) = build_archival_and_squashed_marfs( + &dir, + STRESS_SQUASH_BLOCKS, + STRESS_SQUASH_KEYS_PER_BLOCK, + STRESS_SQUASH_HEIGHT, + ); + + let mut prev_block = blocks[STRESS_SQUASH_HEIGHT as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + for i in 0..10u8 { + let new_bh = StacksBlockId::from_bytes(&[200 + i; 32]).unwrap(); + let key = format!("ext_k{i}"); + let val = format!("ext_v{i}"); + + archival.begin(&prev_block, &new_bh).unwrap(); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + archival.commit().unwrap(); + + squashed.begin(&prev_block, &new_bh).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + assert_roots_match_at( + &mut archival, + &mut squashed, + &new_blocks, + "multi-height extension", + ); + + let last = new_blocks.last().unwrap(); + assert_eq!( + squashed.get(last, "k1").unwrap().unwrap(), + MARFValue::from_value(&format!("v1_at_{STRESS_SQUASH_HEIGHT}")), + ); + assert_eq!( + squashed.get(last, "ext_k9").unwrap().unwrap(), + MARFValue::from_value("ext_v9"), + ); +} + +/// Test that extending a squashed MARF with blocks that write MANY keys +/// per block produces the same root hashes as the archival MARF. +#[test] +fn test_dense_writes_after_squash_hash_equality() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf( + archival_path.to_str().unwrap(), + STRESS_SQUASH_BLOCKS, + STRESS_SQUASH_KEYS_PER_BLOCK, + ); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + STRESS_SQUASH_HEIGHT, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // Extend with blocks that write MANY keys each - simulating a block + // with many contract calls (like mainnet block 201697 with 42 txs). + let keys_per_extension_block = 200; + let extension_blocks = 20; + + let mut prev_block = blocks[STRESS_SQUASH_HEIGHT as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + + for blk in 0..extension_blocks { + let new_bh = StacksBlockId::from_bytes(&[200 + blk as u8; 32]).unwrap(); + + archival.begin(&prev_block, &new_bh).unwrap(); + squashed.begin(&prev_block, &new_bh).unwrap(); + + for k in 0..keys_per_extension_block { + let key = format!("dense_blk{blk}_k{k}"); + let val = format!("dense_blk{blk}_v{k}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Also overwrite some keys from the archival history to force + // COW copies of deeper trie nodes. + for k in 0..STRESS_SQUASH_KEYS_PER_BLOCK { + let key = format!("k{k}"); + let val = format!("overwritten_blk{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Simulate at-block: read a key from a post-squash block mid-transaction. + // This exercises the open_block/restore cycle on the squashed blob. + if blk > 0 { + let historical_block = &new_blocks[blk - 1]; + let _arch_val = archival.get(historical_block, "dense_blk0_k0").unwrap(); + let _sq_val = squashed.get(historical_block, "dense_blk0_k0").unwrap(); + assert_eq!(_arch_val, _sq_val, "at-block read mismatch at blk {blk}"); + + // Reading at a pre-squash block must be rejected on the squashed + // MARF. The archival side keeps working. + let old_block = &blocks[STRESS_SQUASH_HEIGHT as usize / 2]; + let _arch_val2 = archival.get(old_block, "k0").unwrap(); + match squashed.get(old_block, "k0") { + Err(Error::HistoricalReadInSquashedRange { .. }) => {} + other => { + panic!("expected HistoricalReadInSquashedRange at blk {blk}, got {other:?}") + } + } + } + + archival.commit().unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + for (i, bh) in new_blocks.iter().enumerate() { + let arch_root = archival.get_root_hash_at(bh).unwrap(); + let sq_root = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!( + arch_root, sq_root, + "Root hash mismatch at dense extension block {} (wrote {} keys + {} overwrites)", + i, keys_per_extension_block, STRESS_SQUASH_KEYS_PER_BLOCK + ); + } +} + +/// Reading a key at a pre-squash block on a squashed MARF must be rejected +/// explicitly. The squashed MARF only retains the canonical state at the +/// squash height; per-block historical reads cannot be served and the API +/// returns `HistoricalReadInSquashedRange` instead of plausible bad data. +/// Reads at the squash tip and at post-squash blocks remain valid. +#[test] +fn test_squash_historical_read_rejected() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + // 64 blocks, 4 keys per block, squash at height 48 + let (mut archival, blocks, _) = setup_marf(src_path.to_str().unwrap(), 64, 4); + + let squash_height: u32 = 48; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let tip_block = &blocks[squash_height as usize]; + let early_block = &blocks[10]; + + // Archival still serves historical reads correctly. + let arch_val = archival.get(early_block, "k1").unwrap(); + assert_eq!( + arch_val, + Some(MARFValue::from_value("v1_at_10")), + "archival should return the historical value" + ); + + // Squashed MARF must reject the historical read. + match squashed.get(early_block, "k1") { + Err(Error::HistoricalReadInSquashedRange { + block_height, + squash_height: sh, + }) => { + assert_eq!(block_height, 10); + assert_eq!(sh, squash_height); + } + other => panic!("expected HistoricalReadInSquashedRange, got {other:?}"), + } + + // Reading at the squash tip is still valid. + let tip_val = squashed.get(tip_block, "k1").unwrap(); + assert_eq!(tip_val, Some(MARFValue::from_value("v1_at_48"))); +} + +/// Reads at any block strictly below the squash height are rejected, even +/// for keys that aren't written at every block. This complements +/// `test_squash_historical_read_rejected` to make sure the guard doesn't +/// depend on which key is being read - only on the block's height relative +/// to the squash height. +#[test] +fn test_squash_historical_read_intermittent_key_rejected() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (mut archival, blocks, _) = setup_marf(src_path.to_str().unwrap(), 64, 4); + + let squash_height: u32 = 48; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let early_block = &blocks[10]; + + // Archival still resolves intermittent keys to the last-written value at/before block 10. + let arch_val = archival.get(early_block, "common_some_0").unwrap(); + assert_eq!( + arch_val, + Some(MARFValue::from_value("common_some_0_at_9")), + "archival should return value from height 9" + ); + + // Squashed MARF rejects the read regardless of the key. + match squashed.get(early_block, "common_some_0") { + Err(Error::HistoricalReadInSquashedRange { + block_height, + squash_height: sh, + }) => { + assert_eq!(block_height, 10); + assert_eq!(sh, squash_height); + } + other => panic!("expected HistoricalReadInSquashedRange, got {other:?}"), + } +} + +/// Extend a squashed MARF through enough blocks to exercise deep backpointer +/// chains and node promotions, then verify hash equality with the archival. +/// Uses 256 blocks, 32 keys/block at squash height 192 (leaving 64 extension blocks). +#[test] +fn test_deep_extension_hash_equality() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let num_blocks: usize = 256; + let keys_per_block: usize = 32; + let squash_height: u32 = 192; + let extension_blocks: usize = 20; + + let (mut archival, blocks, _) = + setup_marf(archival_path.to_str().unwrap(), num_blocks, keys_per_block); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let mut prev_block = blocks[squash_height as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + + for blk in 0..extension_blocks { + let new_bh = { + let mut bytes = [0u8; 32]; + bytes[0] = 0xee; + bytes[24..28].copy_from_slice(&(blk as u32).to_be_bytes()); + StacksBlockId::from_bytes(&bytes).unwrap() + }; + + archival.begin(&prev_block, &new_bh).unwrap(); + squashed.begin(&prev_block, &new_bh).unwrap(); + + // Write many new keys (forces node promotions in the trie) + for k in 0..(keys_per_block * 4) { + let key = format!("ext_blk{blk}_k{k}"); + let val = format!("ext_blk{blk}_v{k}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Overwrite keys from across the squash range (deep COW walks) + for k in 0..keys_per_block { + let key_index = 2 + (squash_height as usize / 2) * keys_per_block + k; + let key = format!("k{key_index}"); + let val = format!("deep_overwrite_blk{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Also overwrite common keys (causes COW of root-adjacent nodes) + for c in 0..10 { + let key = format!("common_all_{c}"); + let val = format!("common_all_{c}_ext_{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + archival.commit().unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + for (i, bh) in new_blocks.iter().enumerate() { + let arch_root = archival.get_root_hash_at(bh).unwrap(); + let sq_root = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!( + arch_root, + sq_root, + "Root hash mismatch at deep extension block {i} (256 blocks, \ + 32 keys/block, squash at 192, {} new keys + {} overwrites per ext block)", + keys_per_block * 4, + keys_per_block + 10 + ); + } +} + +/// Verify that all historical marf_data entries share the same +/// external_offset (i.e. point to the single shared trie storage). +#[test] +fn test_marf_data_entries_share_blob_offset() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _expected_keys) = setup_marf(src_path.to_str().unwrap(), 10, 1); + + let (dst_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 8, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let squashed = MARF::::from_path(dst_path.to_str().unwrap(), open_opts).unwrap(); + let conn = squashed.sqlite_conn(); + + let tip_id = trie_sql::get_block_identifier(conn, &blocks[8]).unwrap(); + let (tip_offset, tip_length) = trie_sql::get_external_trie_offset_length(conn, tip_id).unwrap(); + assert!(tip_length > 0, "blob length should be non-zero"); + + for i in 0..8 { + let blk_id = trie_sql::get_block_identifier(conn, &blocks[i]).unwrap(); + let (offset, length) = trie_sql::get_external_trie_offset_length(conn, blk_id).unwrap(); + assert_eq!(offset, tip_offset, "block {i} offset mismatch"); + assert_eq!(length, tip_length, "block {i} length mismatch"); + } +} + +/// Verify that walk_cow correctly follows annotated back_block values +/// when copying nodes from a squashed blob into a new block. +#[test] +fn test_walk_cow_preserves_backpointer_identity() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf(archival_path.to_str().unwrap(), 10, 1); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 8, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let b_new = StacksBlockId::from_bytes(&[250u8; 32]).unwrap(); + squashed.begin(&blocks[8], &b_new).unwrap(); + squashed + .insert("k1", MARFValue::from_value("v1_at_10")) + .unwrap(); + squashed + .insert("new_key", MARFValue::from_value("new_val")) + .unwrap(); + squashed.commit().unwrap(); + + for key in ["k2", "k5", "k9"] { + let result = squashed.get(&b_new, &key).unwrap(); + assert!(result.is_some(), "missing key {key} after extend"); + } + + assert_eq!( + squashed.get(&b_new, "k1").unwrap().unwrap(), + MARFValue::from_value("v1_at_10"), + ); + + assert_eq!( + squashed.get(&b_new, "new_key").unwrap().unwrap(), + MARFValue::from_value("new_val"), + ); + + archival.begin(&blocks[8], &b_new).unwrap(); + archival + .insert("k1", MARFValue::from_value("v1_at_10")) + .unwrap(); + archival + .insert("new_key", MARFValue::from_value("new_val")) + .unwrap(); + archival.commit().unwrap(); + + let arch_root = archival.get_root_hash_at(&b_new).unwrap(); + let sq_root = squashed.get_root_hash_at(&b_new).unwrap(); + assert_eq!(arch_root, sq_root, "Root hash mismatch after walk_cow"); +} + +#[test] +fn test_squash_internal_blobs_extend_with_compression() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("sort.sqlite"); + + let squash_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false); + let mut src = MARF::from_path(src_db_path.to_str().unwrap(), squash_opts.clone()).unwrap(); + + let b1 = StacksBlockId::from_bytes(&[1u8; 32]).unwrap(); + let b2 = StacksBlockId::from_bytes(&[2u8; 32]).unwrap(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + + src.begin(&StacksBlockId::sentinel(), &b1).unwrap(); + for i in 0u8..32 { + src.insert( + &format!("k{i:02}"), + MARFValue::from_value(&format!("v1-{i:02}")), + ) + .unwrap(); + } + src.commit().unwrap(); + + src.begin(&b1, &b2).unwrap(); + for i in 0u8..32 { + src.insert( + &format!("k{i:02}"), + MARFValue::from_value(&format!("v2-{i:02}")), + ) + .unwrap(); + } + src.commit().unwrap(); + drop(src); + + let dst_dir = dir.path().join("squashed-compressed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("sort.sqlite"); + + MARF::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + squash_opts, + &b2, + 1, + "test", + ) + .unwrap(); + + let compressed_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true).with_compression(true); + let mut squashed = MARF::from_path(dst_db_path.to_str().unwrap(), compressed_opts).unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed + .insert("k_extra", MARFValue::from_value("v3-extra")) + .unwrap(); + squashed.commit().unwrap(); + + let value = squashed.get(&b3, "k_extra").unwrap().unwrap(); + assert_eq!(value, MARFValue::from_value("v3-extra")); +} + +// --------------------------------------------------------------------------- +// Targeted unit tests for the disk-backed squash mechanisms +// --------------------------------------------------------------------------- + +/// Helper: build a leaf node for tests. +fn make_test_leaf(path: &[u8], value_byte: u8) -> TrieNodeType { + let mut data = [0u8; 40]; + data[0] = value_byte; + TrieNodeType::Leaf(TrieLeaf { + path: path.to_vec(), + data: MARFValue(data), + }) +} + +/// Helper: build a Node4 with the given child pointers. +fn make_test_node4(path: &[u8], ptrs: [TriePtr; 4]) -> TrieNodeType { + TrieNodeType::Node4(TrieNode4 { + path: path.to_vec(), + ptrs, + cowptr: None, + patches: vec![], + }) +} + +#[test] +fn test_node_store_roundtrip_all_variants() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let mut store = NodeStore::new(dir_str).unwrap(); + + // Leaf + let leaf = make_test_leaf(&[1, 2, 3], 0xAA); + let leaf_hash = TrieHash::from_data(&[1]); + store.push(&leaf, leaf_hash, 10).unwrap(); + + // Node4 + let n4 = make_test_node4( + &[4, 5], + [ + TriePtr::new(1, b'a', 100), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + let n4_hash = TrieHash::from_data(&[2]); + store.push(&n4, n4_hash, 20).unwrap(); + + // Node16 + let mut ptrs16 = [TriePtr::default(); 16]; + ptrs16[0] = TriePtr::new(2, b'b', 200); + let n16 = TrieNodeType::Node16(TrieNode16 { + path: vec![6, 7, 8], + ptrs: ptrs16, + cowptr: None, + patches: vec![], + }); + let n16_hash = TrieHash::from_data(&[3]); + store.push(&n16, n16_hash, 30).unwrap(); + + // Node48 + let mut indexes48 = [-1i8; 256]; + indexes48[b'c' as usize] = 0; + let mut ptrs48 = [TriePtr::default(); 48]; + ptrs48[0] = TriePtr::new(3, b'c', 300); + let n48 = TrieNodeType::Node48(Box::new(TrieNode48 { + path: vec![9, 10], + indexes: indexes48, + ptrs: ptrs48, + cowptr: None, + patches: vec![], + })); + let n48_hash = TrieHash::from_data(&[4]); + store.push(&n48, n48_hash, 40).unwrap(); + + // Node256 + let mut ptrs256 = [TriePtr::default(); 256]; + ptrs256[b'd' as usize] = TriePtr::new(4, b'd', 400); + let n256 = TrieNodeType::Node256(Box::new(TrieNode256 { + path: vec![11], + ptrs: ptrs256, + cowptr: None, + patches: vec![], + })); + let n256_hash = TrieHash::from_data(&[5]); + store.push(&n256, n256_hash, 50).unwrap(); + + store.flush().unwrap(); + assert_eq!(store.len(), 5); + + // Leaf round-trip + let rt_leaf = store.read_node(0).unwrap(); + assert!(rt_leaf.is_leaf()); + assert_eq!(rt_leaf.path_bytes(), &[1, 2, 3]); + assert_eq!(*store.hash(0), leaf_hash); + assert_eq!(store.block_id(0), 10); + + // Node4 round-trip + let rt_n4 = store.read_node(1).unwrap(); + assert_eq!(rt_n4.ptrs()[0].chr(), b'a'); + assert_eq!(rt_n4.ptrs()[0].ptr(), 100); + + // Node16 round-trip + let rt_n16 = store.read_node(2).unwrap(); + assert_eq!(rt_n16.ptrs()[0].chr(), b'b'); + assert_eq!(rt_n16.ptrs()[0].ptr(), 200); + + // Node48 round-trip + let rt_n48 = store.read_node(3).unwrap(); + assert_eq!(rt_n48.ptrs()[0].chr(), b'c'); + assert_eq!(rt_n48.ptrs()[0].ptr(), 300); + + // Node256 round-trip + let rt_n256 = store.read_node(4).unwrap(); + assert_eq!(rt_n256.ptrs()[b'd' as usize].chr(), b'd'); + assert_eq!(rt_n256.ptrs()[b'd' as usize].ptr(), 400); +} + +#[test] +fn test_node_store_spill_file_cleaned_on_drop() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let spill_path; + { + let mut store = NodeStore::new(dir_str).unwrap(); + spill_path = store.path.clone(); + + let leaf = make_test_leaf(&[1], 0x01); + store.push(&leaf, TrieHash::EMPTY, 0).unwrap(); + store.flush().unwrap(); + + // File should exist while store is alive + assert!(spill_path.exists(), "spill file should exist before drop"); + } + // After drop, file should be cleaned up + assert!( + !spill_path.exists(), + "spill file should be removed after drop" + ); +} + +#[test] +fn test_node_store_unique_temp_file_names() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let store1 = NodeStore::new(dir_str).unwrap(); + // Ensure different nanos by adding a tiny sleep + std::thread::sleep(std::time::Duration::from_millis(1)); + let store2 = NodeStore::new(dir_str).unwrap(); + + assert_ne!( + store1.path, store2.path, + "concurrent NodeStores should have different temp file paths" + ); +} + +#[test] +fn test_serialize_deserialize_node_roundtrip() { + // Test the raw serialize/deserialize functions independently of NodeStore + let nodes: Vec = vec![ + make_test_leaf(&[1, 2, 3, 4], 0xFF), + make_test_node4( + &[10, 20], + [ + TriePtr::new(1, b'x', 42), + TriePtr::new(1, b'y', 99), + TriePtr::default(), + TriePtr::default(), + ], + ), + ]; + + for original in &nodes { + let mut buf = Vec::new(); + serialize_node(&mut buf, original).unwrap(); + + let mut cursor = Cursor::new(&buf); + let roundtripped = deserialize_node(&mut cursor).unwrap(); + + assert_eq!(original.path_bytes(), roundtripped.path_bytes()); + assert_eq!(original.ptrs().len(), roundtripped.ptrs().len()); + for (a, b) in original.ptrs().iter().zip(roundtripped.ptrs().iter()) { + assert_eq!(a.id(), b.id()); + assert_eq!(a.chr(), b.chr()); + assert_eq!(a.ptr(), b.ptr()); + assert_eq!(a.back_block(), b.back_block()); + } + } +} + +/// Build a branching trie with mixed node types and verify that +/// `stream_squash_blob` writes a readable child-before-parent blob. +/// +/// Trie layout (indices 0–6): +/// 0: Node16 (root) -> children 1, 2 +/// 1: Node4 -> children 3, 4 +/// 2: Node4 -> child 5 +/// 3: Leaf +/// 4: Leaf +/// 5: Node4 -> child 6 +/// 6: Leaf +#[test] +fn test_stream_squash_blob_mixed_node_types() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + let h = TrieHash([0; 32]); + + // Index 0: Node16 root with two forward children. + let mut root_ptrs = [TriePtr::default(); 16]; + root_ptrs[0] = TriePtr::new(TrieNodeID::Node4 as u8, b'a', 1); + root_ptrs[1] = TriePtr::new(TrieNodeID::Node4 as u8, b'b', 2); + let root = TrieNodeType::Node16(TrieNode16 { + path: vec![0], + ptrs: root_ptrs, + cowptr: None, + patches: vec![], + }); + store.push(&root, h, 0).unwrap(); + + // Index 1: Node4 with two forward children. + store + .push( + &make_test_node4( + &[1], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'c', 3), + TriePtr::new(TrieNodeID::Leaf as u8, b'd', 4), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 2: Node4 with one forward child. + store + .push( + &make_test_node4( + &[2], + [ + TriePtr::new(TrieNodeID::Node4 as u8, b'e', 5), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 3: Leaf + store.push(&make_test_leaf(&[3, 4], 0xAA), h, 0).unwrap(); + // Index 4: Leaf + store.push(&make_test_leaf(&[5, 6], 0xBB), h, 0).unwrap(); + + // Index 5: Node4 with one forward child (deeper subtree). + store + .push( + &make_test_node4( + &[7], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'f', 6), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 6: Leaf + store.push(&make_test_leaf(&[8, 9], 0xCC), h, 0).unwrap(); + + store.flush().unwrap(); + + let parent_hash = StacksBlockId::sentinel(); + let mut output = Cursor::new(Vec::new()); + let bytes_written = stream_squash_blob(&mut store, &parent_hash, &mut output).unwrap(); + + // Verify blob header. + let blob = output.into_inner(); + assert_eq!(blob.len() as u64, bytes_written); + assert_eq!(&blob[..32], parent_hash.as_bytes()); + assert_eq!( + &blob[BLOCK_HEADER_HASH_ENCODED_SIZE..BLOCK_HEADER_HASH_ENCODED_SIZE + 4], + &0u32.to_le_bytes() + ); + + // Verify the root was written at the canonical root position and its child + // pointers target readable descendants. + let header_size = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + let mut cursor = Cursor::new(blob.as_slice()); + let root_ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0, header_size); + let (root_node, _) = read_nodetype(&mut cursor, &root_ptr).unwrap(); + assert_eq!(root_node.ptrs()[0].id(), TrieNodeID::Node4 as u8); + assert_eq!(root_node.ptrs()[1].id(), TrieNodeID::Node4 as u8); + assert!(root_node.ptrs()[0].ptr() > header_size); + assert!(root_node.ptrs()[1].ptr() > header_size); + + let first_child_ptr = root_node.ptrs()[0]; + let (first_child, _) = read_nodetype(&mut cursor, &first_child_ptr).unwrap(); + assert_eq!(first_child.ptrs()[0].id(), TrieNodeID::Leaf as u8); +} + +/// Verify that writing the blob at a non-zero sink offset doesn't corrupt +/// the output. bytes_written equals total_size and the prefix is untouched. +#[test] +fn test_stream_squash_blob_at_nonzero_offset() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + + let leaf = make_test_leaf(&[1, 2], 0xBB); + let root = make_test_node4( + &[0], + [ + TriePtr::new(1, b'a', 1), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&root, TrieHash::from_data(&[0xAA]), 0).unwrap(); + store.push(&leaf, TrieHash::from_data(&[0xBB]), 0).unwrap(); + store.flush().unwrap(); + + let parent_hash = StacksBlockId::sentinel(); + + // Write to a sink that already has 1000 bytes of garbage prefix. + let prefix_len: u64 = 1000; + let mut buf = vec![0xFFu8; prefix_len as usize]; + let mut output = Cursor::new(&mut buf); + output.seek(std::io::SeekFrom::End(0)).unwrap(); + + let bytes_written = stream_squash_blob(&mut store, &parent_hash, &mut output).unwrap(); + + let total_buf = output.into_inner(); + assert_eq!(total_buf.len() as u64, prefix_len + bytes_written); + assert!(total_buf[..prefix_len as usize].iter().all(|&b| b == 0xFF)); + assert_eq!( + &total_buf[prefix_len as usize..prefix_len as usize + 32], + parent_hash.as_bytes() + ); +} + +/// Test `resolve_inline_child_offsets` directly: verify it replaces forward +/// child pointers with their blob offsets, leaves back/empty pointers +/// untouched, and returns CorruptionError for out-of-bounds indices. +#[test] +fn test_resolve_inline_child_offsets() { + // Build a Node4 with a mix of pointer types: + // slot 0: forward ptr to child index 1 + // slot 1: back ptr (should be left untouched) + // slot 2: empty (should be left untouched) + // slot 3: forward ptr to child index 2 + let back_id = set_backptr(TrieNodeID::Node4 as u8); + let mut node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 1), + TriePtr { + id: back_id, + chr: b'x', + ptr: 999, + back_block: 5, + }, + TriePtr::default(), + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 2), + ], + ); + + let offsets: Vec = vec![100, 200, 300]; + resolve_inline_child_offsets(node.ptrs_mut(), &offsets).unwrap(); + + let ptrs = node.ptrs(); + // Forward ptrs remapped to blob offsets. + assert_eq!(ptrs[0].ptr(), 200); // child_idx 1 -> offset 200 + assert_eq!(ptrs[3].ptr(), 300); // child_idx 2 -> offset 300 + // Back ptr untouched. + assert_eq!(ptrs[1].ptr(), 999); + assert_eq!(ptrs[1].back_block(), 5); + // Empty ptr untouched. + assert_eq!(ptrs[2].ptr(), 0); + + // Empty pointer slices are a no-op. + let mut empty: [TriePtr; 0] = []; + resolve_inline_child_offsets(&mut empty, &offsets).unwrap(); + + // Out-of-bounds child index returns CorruptionError. + let mut bad_node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 99), // index 99 > offsets.len() + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + assert!(resolve_inline_child_offsets(bad_node.ptrs_mut(), &offsets).is_err()); +} + +/// Verify that `resolve_inline_child_offsets` with offsets > u32::MAX causes +/// the node's serialized size to grow (u32 -> u64 pointer encoding). +#[test] +fn test_resolve_inline_child_offsets_u64_encoding_widens_node() { + let mut node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 0), // child_idx 0 + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 1), // child_idx 1 + TriePtr::default(), + TriePtr::default(), + ], + ); + + let size_before = get_node_byte_len(&node); + + // One offset below u32::MAX, one above -> mixed encoding. + let offsets: Vec = vec![1000, u64::from(u32::MAX) + 1]; + resolve_inline_child_offsets(node.ptrs_mut(), &offsets).unwrap(); + + let size_after = get_node_byte_len(&node); + + // Exactly one pointer widened from u32 (4 bytes) to u64 (8 bytes) -> +4 bytes. + assert_eq!( + size_after - size_before, + 4, + "one u64 pointer should add exactly 4 bytes" + ); + + // The ptr that stayed below u32::MAX should still use u32 encoding. + assert_eq!(node.ptrs()[0].ptr(), 1000); + assert!(!is_u64_ptr(node.ptrs()[0].encoded_id())); + + // The ptr that crossed u32::MAX should use u64 encoding. + assert_eq!(node.ptrs()[1].ptr(), u64::from(u32::MAX) + 1); + assert!(is_u64_ptr(node.ptrs()[1].encoded_id())); +} + +#[test] +fn test_squash_rejects_proof_generation() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_path.to_str().unwrap(), 4, 1); + + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 2, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // Squash-aware proofs are out of scope for this PR. The squashed MARF + // must reject both `get_with_proof` entry points so callers don't get + // proofs that commit to the wrong (H-state) leaf for historical blocks. + let tip = &blocks[2]; + match squashed.get_with_proof(tip, "k1") { + Err(Error::UnsupportedOnSquashedMarf(op)) => assert_eq!(op, "get_with_proof"), + other => panic!("expected UnsupportedOnSquashedMarf, got {other:?}"), + } + + let path = TrieHash::from_data(b"k1"); + match squashed.get_with_proof_from_hash(tip, &path) { + Err(Error::UnsupportedOnSquashedMarf(op)) => { + assert_eq!(op, "get_with_proof_from_hash") + } + other => panic!("expected UnsupportedOnSquashedMarf, got {other:?}"), + } +} + +/// `MARF::get_with_proof` rejects squashed MARFs, but a caller holding a +/// `TrieStorageConnection` could previously bypass that by calling +/// `TrieMerkleProof::from_path` (or `from_entry` / `from_raw_entry`, which +/// delegate to it). Direct invocation must also be rejected so squashed +/// MARFs cannot produce silently-wrong proofs by any code path. +#[test] +fn test_trie_merkle_proof_from_path_rejects_squashed_marf() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_path.to_str().unwrap(), 4, 1); + + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + 2, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let tip = blocks[2].clone(); + let value = MARFValue::from_value("v1_at_2"); + let path = TrieHash::from_key("k1"); + + let result = squashed.with_conn(|conn| TrieMerkleProof::from_path(conn, &path, &value, &tip)); + match result { + Err(Error::UnsupportedOnSquashedMarf(op)) => { + assert_eq!(op, "TrieMerkleProof::from_path"); + } + other => panic!("expected UnsupportedOnSquashedMarf, got {other:?}"), + } +} + +/// `get_block_at_height` is chain metadata - the answer is in +/// `marf_squashed_blocks` even when the caller is standing on a pre-squash +/// block. Without the squash-aware short-circuit the lookup would route +/// through `MARF::get_path` and trip the historical-read guard. +#[test] +fn test_get_block_at_height_works_for_pre_squash_caller() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_path.to_str().unwrap(), 16, 1); + + let squash_height: u32 = 12; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // Stand on a pre-squash block and ask for an even earlier height. The + // answer must come from `marf_squashed_blocks`, not from a trie metadata + // read that would be rejected. + let standing_block = &blocks[8]; + let resolved = squashed + .get_bhh_at_height(standing_block, 4) + .unwrap() + .expect("squashed MARF must resolve metadata height -> block"); + assert_eq!(resolved, blocks[4]); + + // Asking for a height equal to the standing block returns that block. + let same = squashed + .get_bhh_at_height(standing_block, 8) + .unwrap() + .expect("height == current_block_height short-circuits"); + assert_eq!(same, blocks[8]); + + // Asking for a height strictly greater than the standing block returns + // None (the future is unknown from that vantage point). + assert!(squashed + .get_bhh_at_height(standing_block, 9) + .unwrap() + .is_none()); + + // Standing on the squash tip still resolves earlier heights via the + // side-table. + let tip = &blocks[squash_height as usize]; + let resolved_tip = squashed.get_bhh_at_height(tip, 0).unwrap().unwrap(); + assert_eq!(resolved_tip, blocks[0]); +} + +#[test] +fn test_get_block_height_of_same_pre_squash_block() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_path.to_str().unwrap(), 16, 1); + + let squash_height: u32 = 12; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + blocks.last().unwrap(), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = MARF::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let standing_block = &blocks[8]; + let height = squashed + .get_block_height_of(standing_block, standing_block) + .unwrap(); + + assert_eq!(height, Some(8)); +} + +/// `squash_to_path` must follow the explicit `tip` argument, not the +/// highest `block_id` in `marf_data`. Build a forked MARF where the +/// canonical tip is inserted *before* the non-canonical fork, so picking by +/// insertion order would land on the wrong fork. Then squash with the +/// canonical tip and verify the resulting MARF reflects canonical state at +/// the squash height. +#[test] +fn test_squash_follows_explicit_tip_on_forked_marf() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut src = MARF::from_path(src_path.to_str().unwrap(), open_opts.clone()).unwrap(); + + let g = StacksBlockId::from_bytes(&[0x01; 32]).unwrap(); + let canonical_tip = StacksBlockId::from_bytes(&[0x02; 32]).unwrap(); + let fork_tip = StacksBlockId::from_bytes(&[0x03; 32]).unwrap(); + + // Genesis - height 0. + src.begin(&StacksBlockId::sentinel(), &g).unwrap(); + src.insert("shared", MARFValue::from_value("genesis")) + .unwrap(); + src.commit().unwrap(); + + // Canonical fork at height 1 (inserted FIRST → lower block_id). + src.begin(&g, &canonical_tip).unwrap(); + src.insert("contested", MARFValue::from_value("canonical_v1")) + .unwrap(); + src.commit().unwrap(); + + // Non-canonical fork at height 1 (inserted SECOND → higher block_id). + // get_latest_confirmed_block_hash would pick this one. + src.begin(&g, &fork_tip).unwrap(); + src.insert("contested", MARFValue::from_value("fork_v1")) + .unwrap(); + src.commit().unwrap(); + + drop(src); + + // Squash with the explicit canonical tip - must NOT pick the fork. + let (dst_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + &canonical_tip, + 1, + ); + + let mut squashed = MARF::from_path( + dst_path.to_str().unwrap(), + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true), + ) + .unwrap(); + + // The squash root must reflect canonical state, not the fork. + let value = squashed + .get(&canonical_tip, "contested") + .unwrap() + .expect("contested key must be present at canonical tip"); + assert_eq!( + value, + MARFValue::from_value("canonical_v1"), + "squash followed the wrong fork (insertion-order tip selection regression)" + ); + + // The fork's tip should NOT be in the squashed range - its height (1) + // would be ambiguous, so confirm it isn't recorded as the height-1 block. + let h1 = squashed + .get_bhh_at_height(&canonical_tip, 1) + .unwrap() + .expect("height 1 must resolve"); + assert_eq!(h1, canonical_tip); + assert_ne!(h1, fork_tip); +} + +#[test] +fn test_squash_rejects_existing_destination() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let dst_db_path = dir.path().join("dst.sqlite"); + // Pre-create an empty file at the destination path: the guard must trip. + std::fs::write(&dst_db_path, b"").unwrap(); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let result = MARF::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + open_opts, + blocks.last().unwrap(), + 1, + "test", + ); + match result { + Err(Error::DestinationExists(path)) => assert_eq!(path, dst_db_path.to_str().unwrap()), + other => panic!("expected DestinationExists for the .sqlite collision, got {other:?}"), + } + + // Same check for an existing .blobs sibling without the .sqlite file. + std::fs::remove_file(&dst_db_path).unwrap(); + let dst_blobs_path = dir.path().join("dst.sqlite.blobs"); + std::fs::write(&dst_blobs_path, b"").unwrap(); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let result = MARF::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + open_opts, + blocks.last().unwrap(), + 1, + "test", + ); + match result { + Err(Error::DestinationExists(path)) => assert_eq!(path, dst_blobs_path.to_str().unwrap()), + other => panic!("expected DestinationExists for the .blobs collision, got {other:?}"), + } +} + +#[test] +fn test_squash_rejects_compress_true() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let dst_dir = dir.path().join("squashed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("index.sqlite"); + + let mut open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + open_opts.compress = true; + + let result = MARF::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + open_opts, + blocks.last().unwrap(), + 1, + "test", + ); + assert!(result.is_err(), "compress=true should be rejected"); + let err_msg = format!("{}", result.unwrap_err()); + assert!( + err_msg.contains("compress=true"), + "error should mention compress=true: {err_msg}" + ); +} + +/// `stream_squash_blob` relies on NodeStore's root-first DFS preorder. If a +/// parent appears after its child in the temp store, reverse iteration would +/// try to write the parent before the child's offset is known. Make sure this +/// invariant fails loudly instead of writing a bogus offset 0 pointer. +#[test] +fn test_stream_squash_blob_rejects_non_preorder_nodes() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + let h = TrieHash([0; 32]); + + // Invalid order for the child-before-parent writer: + // 0: root -> parent at index 2 + // 1: leaf + // 2: parent -> leaf at index 1 + // A valid root-first DFS preorder would have placed index 2 before index 1. + let root = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Node4 as u8, b'a', 2), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&root, h, 0).unwrap(); + store.push(&make_test_leaf(&[2, 3], 0xAA), h, 0).unwrap(); + + let inner = make_test_node4( + &[1], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 1), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&inner, h, 0).unwrap(); + store.flush().unwrap(); + + let parent_hash = StacksBlockId::sentinel(); + let mut output = Cursor::new(Vec::new()); + let err = stream_squash_blob(&mut store, &parent_hash, &mut output).unwrap_err(); + assert!( + format!("{err}").contains("has not been written"), + "expected unwritten child offset error, got {err}" + ); +} + +/// Build a synthetic >4 GiB squash blob so the root's first child pointer +/// crosses `u32::MAX` and is emitted with the u64-width encoding bit. +#[test] +#[ignore = "synthetic large-offset regression"] +fn stream_squash_blob_large_offset_sets_u64_ptr_bit() { + let dir = tempdir().expect("create temp dir"); + let dir_str = dir.path().to_str().unwrap(); + let path = dir + .path() + .join("stream_squash_blob_large_offset_sets_u64_ptr_bit.bin"); + + let mut store = NodeStore::new(dir_str).expect("create node store"); + let template = TrieNodeType::Node256(Box::new(TrieNode256::new(&[]))); + let per_node_size = u64::try_from(get_node_byte_len(&template)).expect("infallible"); + let required_nodes = u64::from(u32::MAX) / per_node_size + 2; + let hash = TrieHash([0; 32]); + for i in 0..required_nodes { + let mut node = TrieNode256::new(&[]); + if i + 1 < required_nodes { + assert!(node.insert(&TriePtr::new(TrieNodeID::Node256 as u8, 0x00, i + 1))); + } + store + .push(&TrieNodeType::Node256(Box::new(node)), hash, 0) + .expect("push trie node"); + } + store.flush().expect("flush node store"); + + let parent_hash = StacksBlockId([0x55; 32]); + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path) + .expect("create temp squash blob"); + let bytes_written = + stream_squash_blob(&mut store, &parent_hash, &mut file).expect("stream squash blob"); + assert!(bytes_written > u64::from(u32::MAX)); + + let header_size = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + let root_ptr = TriePtr::new(TrieNodeID::Node256 as u8, 0, header_size); + let (root_node, _) = read_nodetype(&mut file, &root_ptr).expect("read root node"); + let child_ptr = root_node.ptrs()[0]; + assert!(child_ptr.ptr() > u64::from(u32::MAX)); + assert!(is_u64_ptr(child_ptr.encoded_id())); +} + +/// Extending a squashed MARF must correctly serialize patch nodes even when +/// the squash tip has many inline (forward-ptr) children that become +/// backpointers in the new block. +/// +/// This test uses `insert_raw` with controlled `TrieHash` paths to build a +/// deterministic wide Node256 root (64 children in distinct chr() slots). +/// After squashing and extending with a single-key modification, the root is +/// COW'd as a patch: 1 forward child + 63 inherited backpointers. +/// +/// The test verifies: +/// 1. The `assert!(node_forward.eq(diff_forward))` in `dump_compressed_consume` +/// does not panic - the forward-ptr sequence filtering is correct. +/// 2. The root of b3's blob is actually stored as a `TrieNodeID::Patch`, +/// proving the patch path was exercised (not silently skipped). +/// 3. Both archival and squashed MARFs produce identical data when extended +/// with the same operations. +/// +/// Regression test for the `assert_eq!(num_new_nodes, patch_node.ptr_diff.len())` +/// panic that occurred when extending squashed mainnet MARFs. +#[test] +fn test_squash_extend_many_keys_patch_backptr_regression() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("src.sqlite"); + + // Compression MUST be enabled for the patch path in dump_compressed_consume. + let open_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false).with_compression(true); + + // Build a controlled trie path for each first-byte value. + // 64 distinct first bytes guarantees a Node256 root (>48 children). + let make_path = |first_byte: u8| -> TrieHash { + let mut bytes = [0u8; 32]; + bytes[0] = first_byte; + TrieHash(bytes) + }; + let make_leaf = |val: u8| -> TrieLeaf { + let mut data = [0u8; 40]; + data[0] = val; + TrieLeaf { + path: vec![], + data: MARFValue(data), + } + }; + let num_keys: u8 = 64; + + let b1 = StacksBlockId::from_bytes(&[1u8; 32]).unwrap(); + let b2 = StacksBlockId::from_bytes(&[2u8; 32]).unwrap(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + + // --- Build archival source MARF --- + let mut src = MARF::from_path(src_db_path.to_str().unwrap(), open_opts.clone()).unwrap(); + + src.begin(&StacksBlockId::sentinel(), &b1).unwrap(); + for i in 0..num_keys { + src.insert_raw(make_path(i), make_leaf(i)).unwrap(); + } + src.commit().unwrap(); + + src.begin(&b1, &b2).unwrap(); + for i in 0..num_keys { + src.insert_raw(make_path(i), make_leaf(i.wrapping_add(100))) + .unwrap(); + } + src.commit().unwrap(); + + // Extend archival to b3: modify ONE key so the root is COW'd with + // 1 changed child + (num_keys - 1) inherited backpointers. + src.begin(&b2, &b3).unwrap(); + src.insert_raw(make_path(0), make_leaf(255)).unwrap(); + src.commit().unwrap(); + + // Collect archival values at b3 for later comparison. + let archival_val_0 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(0))) + .unwrap(); + let archival_val_1 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(1))) + .unwrap(); + let archival_val_63 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(num_keys - 1))) + .unwrap(); + drop(src); + + // --- Squash at height 1 (= b2) --- + let dst_dir = dir.path().join("squashed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("dst.sqlite"); + + // squash_to_path requires compress=false; compression is for the extend step. + let squash_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false); + MARF::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + squash_opts, + &b2, + 1, + "test", + ) + .unwrap(); + + // --- Extend squashed MARF to b3 with compression enabled --- + // Compression enables the patch-node path in dump_compressed_consume. + let squashed_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true).with_compression(true); + let mut squashed = + MARF::from_path(dst_db_path.to_str().unwrap(), squashed_opts.clone()).unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed.insert_raw(make_path(0), make_leaf(255)).unwrap(); + // The commit exercises dump_compressed_consume with a COW'd Node256 + // root where most children are backpointers. The forward-ptr sequence + // assertion must pass for this to succeed. + squashed.commit().unwrap(); + + // --- Verify patch node was actually emitted --- + // b3's blob is in the .blobs file. The root node starts at + // blob_offset + HEADER (36 bytes). Its type ID byte is at +68. + // TrieNodeID::Patch = 6 proves patch encoding was used, not Normal. + let b3_block_id = trie_sql::get_block_identifier(squashed.sqlite_conn(), &b3).unwrap(); + let (b3_blob_offset, b3_blob_length) = + trie_sql::get_external_trie_offset_length(squashed.sqlite_conn(), b3_block_id).unwrap(); + assert!(b3_blob_length > 0, "b3 blob should have non-zero length"); + + let blobs_path = format!("{}.blobs", dst_db_path.display()); + let mut blobs_file = std::fs::File::open(&blobs_path).unwrap(); + // Root node type ID is at: blob_offset + 32 (parent hash) + 4 (identifier) + 32 (node hash) + let root_type_offset = b3_blob_offset + (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4 + 32; + blobs_file + .seek(std::io::SeekFrom::Start(root_type_offset)) + .unwrap(); + let mut type_byte = [0u8; 1]; + std::io::Read::read_exact(&mut blobs_file, &mut type_byte).unwrap(); + assert_eq!( + type_byte[0], + TrieNodeID::Patch as u8, + "Root of b3 should be stored as a Patch node (type {}), got type {}. \ + This means dump_compressed_consume fell back to Normal serialization.", + TrieNodeID::Patch as u8, + type_byte[0] + ); + + // --- Verify data matches archival MARF --- + let squashed_val_0 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(0))) + .unwrap(); + let squashed_val_1 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(1))) + .unwrap(); + let squashed_val_63 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(num_keys - 1))) + .unwrap(); + + assert_eq!(archival_val_0, squashed_val_0, "modified key mismatch"); + assert_eq!(archival_val_1, squashed_val_1, "inherited key mismatch"); + assert_eq!( + archival_val_63, squashed_val_63, + "last inherited key mismatch" + ); + + // Pre-squash data still readable through the squash tip. + let val_at_b2 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b2, &make_path(1))) + .unwrap(); + assert!(val_at_b2.is_some(), "data at b2 should still be readable"); +} diff --git a/stackslib/src/chainstate/stacks/index/trie.rs b/stackslib/src/chainstate/stacks/index/trie.rs index fec9bb99e9..d6588168f1 100644 --- a/stackslib/src/chainstate/stacks/index/trie.rs +++ b/stackslib/src/chainstate/stacks/index/trie.rs @@ -24,7 +24,7 @@ use crate::chainstate::stacks::index::node::{ TrieNode4, TrieNode48, TrieNodeID, TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::storage::{TrieHashCalculationMode, TrieStorageConnection}; -use crate::chainstate::stacks::index::{Error, MarfTrieId, TrieHasher, TrieLeaf}; +use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId, TrieHasher, TrieLeaf}; use crate::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; use crate::util::macros::is_trace; @@ -747,6 +747,26 @@ impl Trie { } } + /// Resolve the height of `block_header` from the MARF's own height-mapping + /// keys (the trie path). + fn block_height_from_trie( + storage: &mut TrieStorageConnection, + block_header: &T, + ) -> Result { + MARF::get_block_height_miner_tip(storage, block_header, block_header) + .map_err(|e| match e { + Error::NotFoundError => Error::CorruptionError(format!( + "Could not obtain block height for block {block_header}: not found" + )), + x => x, + })? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain block height for block {block_header}: got None" + )) + }) + } + /// Perform the reads, lookups, etc. for computing the ancestor byte vector. /// This method _does not_ restore the previously open block on failure, the caller will do that. fn inner_get_trie_ancestor_hashes_bytes( @@ -761,40 +781,40 @@ impl Trie { // here is where some mind-bending things begin to happen. // we want to find the block at a given _height_. but how to do so? // use the data stored already in the MARF. - let cur_block_height = - MARF::get_block_height_miner_tip(storage, &cur_block_header, &cur_block_header) - .map_err(|e| match e { - Error::NotFoundError => Error::CorruptionError(format!( - "Could not obtain block height for block {}: not found", - &cur_block_header - )), - x => x, - })? - .ok_or_else(|| { - Error::CorruptionError(format!( - "Could not obtain block height for block {}: got None", - &cur_block_header - )) - })?; + let cur_block_height = Self::block_height_from_trie(storage, &cur_block_header)?; let mut log_depth = 0; while log_depth < 32 && (1u32 << log_depth) <= cur_block_height { - let prev_block_header = MARF::get_block_at_height( - storage, - cur_block_height - (1u32 << log_depth), - &cur_block_header, - )? - .ok_or_else(|| { - Error::CorruptionError(format!( - "Could not obtain block hash at block height {}", - cur_block_height - (1u32 << log_depth) - )) - })?; - - storage.open_block(&prev_block_header)?; - - let root_ptr = storage.root_trieptr(); - let ancestor_hash = storage.read_node_hash_bytes(&root_ptr)?; + let ancestor_height = cur_block_height - (1u32 << log_depth); + let prev_block_header = + MARF::get_block_at_height(storage, ancestor_height, &cur_block_header)? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain block hash at block height {ancestor_height}" + )) + })?; + + // Use the per-height archival root from the side-table when the + // ancestor falls inside the squashed range; otherwise fall back to + // opening the ancestor's own trie blob. + let ancestor_hash = if storage + .squash_height() + .is_some_and(|h| ancestor_height <= h) + { + trie_sql::read_squashed_block_root_hash_by_height( + storage.sqlite_conn(), + ancestor_height, + )? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain squashed root hash at height {ancestor_height}" + )) + })? + } else { + storage.open_block(&prev_block_header)?; + let root_ptr = storage.root_trieptr(); + storage.read_node_hash_bytes(&root_ptr)? + }; trace!( "Include root hash {} from block {} in ancestor #{}", diff --git a/stackslib/src/chainstate/stacks/index/trie_sql.rs b/stackslib/src/chainstate/stacks/index/trie_sql.rs index 9f46416d61..62475ce16d 100644 --- a/stackslib/src/chainstate/stacks/index/trie_sql.rs +++ b/stackslib/src/chainstate/stacks/index/trie_sql.rs @@ -212,7 +212,7 @@ pub fn update_squash_root_node_hash(conn: &Connection, hash: &TrieHash) -> Resul /// Read the stored root hash for a given height from the squashed blocks table. /// Returns `None` if the height is not present (archival MARF or height /// outside the squashed range). -pub fn read_squash_archival_marf_root_hash( +pub fn read_squashed_block_root_hash_by_height( conn: &Connection, height: u32, ) -> Result, Error> { @@ -239,10 +239,39 @@ pub fn read_squash_archival_marf_root_hash( } } +/// Read the stored archival root hash for a squashed block hash. +/// Returns `None` if the block is not in the squashed range. +pub fn read_squashed_block_root_hash_by_hash( + conn: &Connection, + block_hash: &T, +) -> Result, Error> { + let result: Option> = conn + .query_row( + "SELECT marf_root_hash FROM marf_squashed_blocks WHERE block_hash = ?1", + params![block_hash.as_bytes()], + |row| row.get(0), + ) + .optional()?; + + match result { + Some(bytes) => { + if bytes.len() != TRIEHASH_ENCODED_SIZE { + return Err(Error::CorruptionError( + "Invalid squash root hash length".to_string(), + )); + } + Ok(Some(TrieHash::from_bytes(&bytes).ok_or_else(|| { + Error::CorruptionError("Invalid squash root hash bytes".to_string()) + })?)) + } + None => Ok(None), + } +} + /// Read the stored height for a block hash from the squashed blocks table. /// Returns `None` if the block hash is not present (archival MARF or block /// outside the squashed range). -pub fn read_squash_block_height( +pub fn read_squashed_block_height_by_hash( conn: &Connection, block_hash: &T, ) -> Result, Error> { @@ -264,8 +293,7 @@ pub fn read_squash_block_height( /// Read the block hash for a given height from the squashed blocks table. /// Returns `None` if the height is not in the squashed range. -#[cfg(test)] -pub fn read_squash_block_hash( +pub fn read_squashed_block_hash_by_height( conn: &Connection, height: u32, ) -> Result, Error> { @@ -319,8 +347,7 @@ pub fn bulk_read_block_entries( } /// Bulk-update all `marf_data` entries to share the same blob offset/length, -/// except for the tip block. Used post-commit in the squash pipeline to -/// point all historical placeholder entries at the shared squash trie storage. +/// except for the tip block. Called during squash finalization. pub fn bulk_update_blob_offsets( conn: &Connection, offset: u64,