diff --git a/changelog.d/marf-snapshot-framework.added b/changelog.d/marf-snapshot-framework.added
new file mode 100644
index 00000000000..716eef3343d
--- /dev/null
+++ b/changelog.d/marf-snapshot-framework.added
@@ -0,0 +1 @@
+Add snapshot framework for copying chainstate index and SPV side-tables into squashed output
diff --git a/changelog.d/marf-squash-engine.added b/changelog.d/marf-squash-engine.added
new file mode 100644
index 00000000000..ed709c40e49
--- /dev/null
+++ b/changelog.d/marf-squash-engine.added
@@ -0,0 +1 @@
+Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights
diff --git a/changelog.d/marf-squash-foundation.added b/changelog.d/marf-squash-foundation.added
new file mode 100644
index 00000000000..61dc95db510
--- /dev/null
+++ b/changelog.d/marf-squash-foundation.added
@@ -0,0 +1 @@
+Add squash metadata SQL tables and storage foundation for MARF squashing support
diff --git a/changelog.d/marf-u64-offset-pointers.added b/changelog.d/marf-u64-offset-pointers.added
new file mode 100644
index 00000000000..f71a49df8ce
--- /dev/null
+++ b/changelog.d/marf-u64-offset-pointers.added
@@ -0,0 +1 @@
+Add support for u64 children pointer offsets in the MARF trie, using a mixed u32/u64 encoding with a 0x20 bit flag for backward compatibility
\ No newline at end of file
diff --git a/stackslib/src/burnchains/bitcoin/spv.rs b/stackslib/src/burnchains/bitcoin/spv.rs
index c46baf007c0..3e88e24f5f2 100644
--- a/stackslib/src/burnchains/bitcoin/spv.rs
+++ b/stackslib/src/burnchains/bitcoin/spv.rs
@@ -56,7 +56,7 @@ const BLOCK_DIFFICULTY_INTERVAL: u32 = 14 * 24 * 60 * 60; // two weeks, in secon
pub const SPV_DB_VERSION: &str = "3";
-const SPV_INITIAL_SCHEMA: &[&str] = &[
+pub(crate) const SPV_INITIAL_SCHEMA: &[&str] = &[
r#"
CREATE TABLE headers(
version INTEGER NOT NULL,
@@ -75,7 +75,7 @@ const SPV_INITIAL_SCHEMA: &[&str] = &[
// unlike the `headers` table, this table will never be deleted from, since we use it to determine
// whether or not newly-arrived headers represent a better chain than the best-known chain. The
// only way to _replace_ a row is to find a header difficulty interval with a _higher_ work score.
-const SPV_SCHEMA_2: &[&str] = &[r#"
+pub(crate) const SPV_SCHEMA_2: &[&str] = &[r#"
CREATE TABLE chain_work(
interval INTEGER PRIMARY KEY,
work TEXT NOT NULL -- 32-byte (256-bit) integer
@@ -83,7 +83,7 @@ const SPV_SCHEMA_2: &[&str] = &[r#"
"#];
// force the node to go and store the burnchain block header hash as well
-const SPV_SCHEMA_3: &[&str] = &[
+pub(crate) const SPV_SCHEMA_3: &[&str] = &[
r#"
DROP TABLE headers;
"#,
diff --git a/stackslib/src/chainstate/stacks/db/mod.rs b/stackslib/src/chainstate/stacks/db/mod.rs
index 7d5c094c19b..821040bd05e 100644
--- a/stackslib/src/chainstate/stacks/db/mod.rs
+++ b/stackslib/src/chainstate/stacks/db/mod.rs
@@ -86,6 +86,7 @@ pub mod accounts;
pub mod blocks;
pub mod contracts;
pub mod headers;
+pub mod snapshot;
pub mod transactions;
pub mod unconfirmed;
diff --git a/stackslib/src/chainstate/stacks/db/snapshot/common.rs b/stackslib/src/chainstate/stacks/db/snapshot/common.rs
new file mode 100644
index 00000000000..c8e529d8d18
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/db/snapshot/common.rs
@@ -0,0 +1,343 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+use std::collections::HashSet;
+use std::time::Instant;
+
+use rusqlite::{params, Connection};
+use stacks_common::util::hash::to_hex;
+
+use crate::chainstate::stacks::index::marf::{MARFOpenOpts, MarfConnection, MARF};
+use crate::chainstate::stacks::index::storage::{TrieFileStorage, TrieHashCalculationMode};
+use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId};
+
+/// A spec for copying a single table from the ATTACHed `src` database.
+///
+/// The `source_sql` is the exact `SELECT` used to filter source rows.
+/// Copy uses plain `INSERT ... SELECT` (no `OR IGNORE`) so that unexpected
+/// pre-population in the destination fails loudly.
+pub struct TableCopySpec {
+ pub table: &'static str,
+ /// The exact SELECT for the source side, e.g.
+ /// `"SELECT * FROM src.snapshots WHERE sortition_id IN (SELECT sortition_id FROM canonical_sortitions)"`.
+ pub source_sql: String,
+}
+
+/// Clone table and index schemas from the source DB (via `sqlite_master`) into the
+/// destination connection. This avoids duplicating any CREATE TABLE / ALTER TABLE /
+/// CREATE INDEX statements and is always in sync with whatever migration version the
+/// source is at.
+///
+/// Expects the source DB to be ATTACHed as `src`.
+pub fn clone_schemas_from_source(conn: &Connection, tables: &[&str]) -> Result<(), Error> {
+ let mut stmts: Vec = Vec::new();
+
+ for table in tables {
+ let sql: Option = conn
+ .query_row(
+ "SELECT sql FROM src.sqlite_master WHERE type='table' AND name=?1",
+ params![table],
+ |row| row.get(0),
+ )
+ .ok();
+
+ if let Some(create_sql) = sql {
+ let safe_sql = if create_sql.contains("IF NOT EXISTS") {
+ create_sql
+ } else {
+ create_sql.replacen("CREATE TABLE", "CREATE TABLE IF NOT EXISTS", 1)
+ };
+ stmts.push(safe_sql);
+ }
+
+ let mut idx_stmt = conn
+ .prepare("SELECT sql FROM src.sqlite_master WHERE type='index' AND tbl_name=?1 AND sql IS NOT NULL")
+ .map_err(Error::SQLError)?;
+ let idx_rows = idx_stmt
+ .query_map(params![table], |row| row.get::<_, String>(0))
+ .map_err(Error::SQLError)?;
+ for idx_sql in idx_rows {
+ let idx_sql = idx_sql.map_err(Error::SQLError)?;
+ let safe_sql = if idx_sql.contains("IF NOT EXISTS") {
+ idx_sql
+ } else {
+ idx_sql.replacen("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
+ };
+ stmts.push(safe_sql);
+ }
+ }
+
+ for stmt in &stmts {
+ conn.execute_batch(stmt).map_err(Error::SQLError)?;
+ }
+
+ Ok(())
+}
+
+/// Clone schemas only for tables that exist in the source DB.
+/// Returns the list of tables that were actually cloned.
+pub fn clone_optional_schemas_from_source(
+ conn: &Connection,
+ tables: &[&str],
+) -> Result, Error> {
+ let mut present = Vec::new();
+ for table in tables {
+ let exists: bool = conn
+ .query_row(
+ "SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name=?1",
+ params![table],
+ |row| row.get(0),
+ )
+ .map_err(Error::SQLError)?;
+ if exists {
+ clone_schemas_from_source(conn, &[table])?;
+ present.push(table.to_string());
+ }
+ }
+ Ok(present)
+}
+
+/// Check if a table exists in the given schema prefix (empty for main, "src" for attached).
+pub fn table_exists(conn: &Connection, schema: &str, table: &str) -> bool {
+ let master = if schema.is_empty() {
+ "sqlite_master".to_string()
+ } else {
+ format!("{schema}.sqlite_master")
+ };
+ conn.query_row(
+ &format!("SELECT COUNT(*) > 0 FROM {master} WHERE type='table' AND name=?1"),
+ params![table],
+ |row| row.get(0),
+ )
+ .unwrap_or(false)
+}
+
+/// Check bidirectional full-row EXCEPT equality.
+/// Returns true if the two result sets are identical.
+pub fn full_row_except_match(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool {
+ let extra_in_dst: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(1);
+ let extra_in_src: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM ({src_sql} EXCEPT {dst_sql})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(1);
+ extra_in_dst == 0 && extra_in_src == 0
+}
+
+/// One-directional subset check: every row in `dst_sql` must exist in
+/// `src_sql`, but `src_sql` may contain additional rows. Use this for
+/// non-consensus tables that grow after the snapshot (e.g. signer_stats,
+/// matured_rewards).
+pub fn dst_subset_of_src(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool {
+ let extra_in_dst: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(1);
+ extra_in_dst == 0
+}
+
+/// Execute a slice of copy specs inside the current transaction.
+/// Returns a vec of (table_name, rows_copied).
+pub fn execute_copy_specs(
+ conn: &Connection,
+ specs: &[TableCopySpec],
+) -> Result, Error> {
+ let mut results = Vec::with_capacity(specs.len());
+ for spec in specs {
+ let t = Instant::now();
+ let sql = format!("INSERT INTO {} {}", spec.table, spec.source_sql);
+ let rows = conn.execute(&sql, []).map_err(Error::SQLError)? as u64;
+ info!(
+ " copy: {} ({} rows) in {:?}",
+ spec.table,
+ rows,
+ t.elapsed()
+ );
+ results.push((spec.table, rows));
+ }
+ Ok(results)
+}
+
+/// Check an optional table's match status.
+/// Returns None if absent in both, Some(false) if present in one but not other,
+/// Some(true/false) from full-row EXCEPT if present in both.
+pub fn check_optional_table_match(
+ conn: &Connection,
+ table: &str,
+ src_filter: Option<&str>,
+) -> Option {
+ let in_dst = table_exists(conn, "", table);
+ let in_src = table_exists(conn, "src", table);
+
+ match (in_dst, in_src) {
+ (false, false) => None,
+ (true, false) | (false, true) => Some(false),
+ (true, true) => {
+ let src_sql = match src_filter {
+ Some(filter) => format!("SELECT * FROM src.{table} {filter}"),
+ None => format!("SELECT * FROM src.{table}"),
+ };
+ Some(full_row_except_match(
+ conn,
+ &format!("SELECT * FROM {table}"),
+ &src_sql,
+ ))
+ }
+ }
+}
+
+/// Collect the hex-encoded `MARFValue` of every leaf in the squashed trie.
+///
+/// Opens the MARF at `db_path` read-only, resolves the tip, and walks the
+/// trie via `for_each_leaf`. Auto-detects external blobs.
+///
+/// Returns `(tip_block_hash, leaf_value_hashes)`.
+pub fn collect_leaf_value_hashes(
+ db_path: &str,
+) -> Result<(T, HashSet), Error> {
+ let external_blobs = std::path::Path::new(&format!("{db_path}.blobs")).exists();
+ let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", external_blobs);
+ let storage = TrieFileStorage::open_readonly(db_path, open_opts)?;
+ let mut marf = MARF::::from_storage(storage);
+ let tip = trie_sql::get_latest_confirmed_block_hash::(marf.sqlite_conn())?;
+
+ let mut hashes = HashSet::new();
+ marf.with_conn(|conn| {
+ MARF::for_each_leaf(conn, &tip, |_hash, value| {
+ hashes.insert(to_hex(&value.to_vec()));
+ Ok(())
+ })
+ })?;
+
+ Ok((tip, hashes))
+}
+
+/// Copy only the `__fork_storage` rows that are referenced by leaf nodes
+/// in the squashed MARF trie. Non-canonical entries from forks are excluded.
+///
+/// Opens the squashed MARF read-only and walks the trie via `for_each_leaf`
+/// to collect canonical leaf value hashes, then copies only the matching
+/// `__fork_storage` rows from the source.
+///
+/// Falls back to a full copy if `marf_data` is absent (e.g. in test
+/// fixtures that don't go through `squash_to_path`).
+///
+/// Returns the number of rows copied.
+pub fn copy_canonical_fork_storage(
+ conn: &Connection,
+ dst_path: &str,
+) -> Result {
+ // Check if the source even has __fork_storage (test fixtures may not).
+ let src_has_table: bool = conn
+ .query_row(
+ "SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name='__fork_storage'",
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(false);
+
+ if !src_has_table {
+ info!(" copy_canonical_fork_storage: source has no __fork_storage, skipping");
+ return Ok(0);
+ }
+
+ // Ensure the destination table exists (clone schema from source).
+ clone_schemas_from_source(conn, &["__fork_storage"])?;
+
+ // If marf_data doesn't exist, fall back to full copy.
+ let has_marf_data: bool = conn
+ .query_row(
+ "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='marf_data'",
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(false);
+
+ if !has_marf_data {
+ let rows = conn
+ .execute(
+ "INSERT OR REPLACE INTO __fork_storage SELECT * FROM src.__fork_storage",
+ [],
+ )
+ .map_err(Error::SQLError)? as u64;
+ info!(" copy_canonical_fork_storage: no marf_data table, full copy ({rows} rows)");
+ return Ok(rows);
+ }
+
+ let t = Instant::now();
+
+ let (_tip, leaf_hashes) = collect_leaf_value_hashes::(dst_path)?;
+ let insert_count = leaf_hashes.len() as u64;
+
+ // Build a temp table of canonical leaf value hashes.
+ conn.execute_batch("CREATE TEMP TABLE __squash_leaf_values (value_hash TEXT PRIMARY KEY)")
+ .map_err(Error::SQLError)?;
+
+ {
+ let mut stmt = conn
+ .prepare("INSERT OR IGNORE INTO __squash_leaf_values (value_hash) VALUES (?1)")
+ .map_err(Error::SQLError)?;
+ for hash in &leaf_hashes {
+ stmt.execute(params![hash]).map_err(Error::SQLError)?;
+ }
+ }
+ drop(leaf_hashes);
+
+ info!(
+ " copy_canonical_fork_storage: extracted {insert_count} leaf hashes in {:?}",
+ t.elapsed()
+ );
+
+ // Copy only the referenced rows.
+ let t2 = Instant::now();
+ let rows = conn
+ .execute(
+ "INSERT OR REPLACE INTO __fork_storage \
+ SELECT f.* FROM src.__fork_storage f \
+ INNER JOIN __squash_leaf_values lv ON f.value_hash = lv.value_hash",
+ [],
+ )
+ .map_err(Error::SQLError)? as u64;
+
+ conn.execute_batch("DROP TABLE IF EXISTS __squash_leaf_values")
+ .map_err(Error::SQLError)?;
+
+ info!(
+ " copy_canonical_fork_storage: copied {rows} rows (from {insert_count} leaves) in {:?}",
+ t2.elapsed()
+ );
+
+ Ok(rows)
+}
+
+pub fn checkpoint_destination_wal(conn: &Connection) -> Result<(), Error> {
+ let _: (i64, i64, i64) = conn
+ .query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
+ Ok((row.get(0)?, row.get(1)?, row.get(2)?))
+ })
+ .map_err(Error::SQLError)?;
+ Ok(())
+}
diff --git a/stackslib/src/chainstate/stacks/db/snapshot/index.rs b/stackslib/src/chainstate/stacks/db/snapshot/index.rs
new file mode 100644
index 00000000000..1c5492df4ab
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/db/snapshot/index.rs
@@ -0,0 +1,695 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+use std::time::Instant;
+
+use rusqlite::{params, Connection, OptionalExtension};
+use stacks_common::types::chainstate::StacksBlockId;
+
+use super::common::{
+ checkpoint_destination_wal, clone_schemas_from_source, collect_leaf_value_hashes,
+ copy_canonical_fork_storage, dst_subset_of_src, execute_copy_specs, full_row_except_match,
+ table_exists, TableCopySpec,
+};
+use crate::burnchains::PoxConstants;
+use crate::chainstate::stacks::index::Error;
+
+/// Required table names that must be present in the squashed index DB.
+const REQUIRED_TABLES: &[&str] = &[
+ "db_config",
+ "block_headers",
+ "nakamoto_block_headers",
+ "payments",
+ "transactions",
+ "nakamoto_tenure_events",
+ "nakamoto_reward_sets",
+ "signer_stats",
+ "matured_rewards",
+ "burnchain_txids",
+ "epoch_transitions",
+ "staging_blocks",
+ "staging_microblocks",
+ "staging_microblocks_data",
+ // Schema fidelity: these tables exist in archival nodes but are expected
+ // unused in a Nakamoto-era GSS node. Included to prevent missing-table
+ // crashes if any code path references them.
+ "invalidated_microblocks_data", // Epoch 2.x block orphaning only (blocks.rs:2189)
+ "user_supporters", // Dead table: zero runtime references
+];
+
+/// Row-count statistics returned by [`copy_index_side_tables`].
+#[derive(Debug, Clone)]
+pub struct IndexSideTableStats {
+ pub block_headers_rows: u64,
+ pub nakamoto_block_headers_rows: u64,
+ pub payments_rows: u64,
+ pub transactions_rows: u64,
+ pub nakamoto_tenure_events_rows: u64,
+ pub nakamoto_reward_sets_rows: u64,
+ pub signer_stats_rows: u64,
+ pub matured_rewards_rows: u64,
+ pub burnchain_txids_rows: u64,
+ pub epoch_transitions_rows: u64,
+ pub staging_blocks_rows: u64,
+ pub fork_storage_rows: u64,
+}
+
+/// Validation result for index side tables in a squashed DB.
+#[derive(Debug, Clone)]
+pub struct IndexSideTableValidation {
+ pub tables_present: bool,
+ pub db_config_matches: bool,
+ pub fork_storage_match: bool,
+ pub block_headers_count_match: bool,
+ pub nakamoto_headers_count_match: bool,
+ pub payments_count_match: bool,
+ pub transactions_count_match: bool,
+ pub nakamoto_tenure_events_count_match: bool,
+ pub nakamoto_reward_sets_match: bool,
+ pub signer_stats_match: bool,
+ pub matured_rewards_match: bool,
+ pub burnchain_txids_match: bool,
+ pub epoch_transitions_match: bool,
+ pub staging_blocks_match: bool,
+ pub invalidated_microblocks_data_empty: bool,
+ pub transactions_no_extra_blocks: bool,
+ pub tenure_events_no_extra_blocks: bool,
+}
+
+impl IndexSideTableValidation {
+ pub fn is_valid(&self) -> bool {
+ self.tables_present
+ && self.db_config_matches
+ && self.fork_storage_match
+ && self.block_headers_count_match
+ && self.nakamoto_headers_count_match
+ && self.payments_count_match
+ && self.transactions_count_match
+ && self.nakamoto_tenure_events_count_match
+ && self.nakamoto_reward_sets_match
+ && self.signer_stats_match
+ && self.matured_rewards_match
+ && self.burnchain_txids_match
+ && self.epoch_transitions_match
+ && self.staging_blocks_match
+ && self.invalidated_microblocks_data_empty
+ && self.transactions_no_extra_blocks
+ && self.tenure_events_no_extra_blocks
+ }
+}
+
+/// Populate a temp table with the canonical block hashes from the squashed MARF's
+/// `marf_squash_block_heights` metadata.
+fn populate_canonical_blocks(conn: &Connection) -> Result<(), Error> {
+ conn.execute_batch("CREATE TEMP TABLE canonical_blocks (index_block_hash TEXT PRIMARY KEY)")
+ .map_err(Error::SQLError)?;
+ conn.execute(
+ "INSERT OR IGNORE INTO canonical_blocks (index_block_hash) \
+ SELECT block_hash FROM marf_squash_block_heights",
+ [],
+ )
+ .map_err(Error::SQLError)?;
+ Ok(())
+}
+
+/// Derive the maximum reward cycle from the canonical squashed tip's burn height.
+fn derive_max_reward_cycle(
+ conn: &Connection,
+ first_burn_height: u64,
+ reward_cycle_len: u64,
+) -> Result, Error> {
+ let tip_burn_height: Option = conn
+ .query_row(
+ "SELECT nh.burn_header_height \
+ FROM marf_squash_block_heights mh \
+ JOIN src.nakamoto_block_headers nh ON nh.index_block_hash = mh.block_hash \
+ ORDER BY mh.height DESC LIMIT 1",
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .optional()
+ .map_err(Error::SQLError)?
+ .map(|h| h as u64);
+
+ match tip_burn_height {
+ Some(tbh) => {
+ let cycle = PoxConstants::static_block_height_to_reward_cycle(
+ tbh,
+ first_burn_height,
+ reward_cycle_len,
+ )
+ .ok_or_else(|| {
+ Error::CorruptionError(format!(
+ "cannot derive reward cycle: tip_burn_height={tbh}, \
+ first_burn_height={first_burn_height}, reward_cycle_len={reward_cycle_len}"
+ ))
+ })?;
+ info!(" derive_max_reward_cycle: {cycle} (tip_burn_height={tbh})");
+ Ok(Some(cycle))
+ }
+ None => Ok(None),
+ }
+}
+
+/// Build the copy specs for descriptor-driven index tables.
+/// These are the uniform `index_block_hash IN canonical_blocks` tables.
+fn index_copy_specs() -> Vec {
+ let cb = "SELECT index_block_hash FROM canonical_blocks";
+ vec![
+ TableCopySpec {
+ table: "block_headers",
+ source_sql: format!("SELECT * FROM src.block_headers WHERE index_block_hash IN ({cb})"),
+ },
+ TableCopySpec {
+ table: "nakamoto_block_headers",
+ source_sql: format!(
+ "SELECT * FROM src.nakamoto_block_headers WHERE index_block_hash IN ({cb})"
+ ),
+ },
+ TableCopySpec {
+ table: "payments",
+ source_sql: format!("SELECT * FROM src.payments WHERE index_block_hash IN ({cb})"),
+ },
+ TableCopySpec {
+ table: "transactions",
+ source_sql: format!("SELECT * FROM src.transactions WHERE index_block_hash IN ({cb})"),
+ },
+ TableCopySpec {
+ table: "nakamoto_tenure_events",
+ source_sql: format!(
+ "SELECT * FROM src.nakamoto_tenure_events WHERE block_id IN ({cb})"
+ ),
+ },
+ TableCopySpec {
+ table: "nakamoto_reward_sets",
+ source_sql: format!(
+ "SELECT * FROM src.nakamoto_reward_sets WHERE index_block_hash IN ({cb})"
+ ),
+ },
+ TableCopySpec {
+ table: "matured_rewards",
+ source_sql: format!(
+ "SELECT * FROM src.matured_rewards WHERE child_index_block_hash IN ({cb})"
+ ),
+ },
+ TableCopySpec {
+ table: "burnchain_txids",
+ source_sql: format!(
+ "SELECT * FROM src.burnchain_txids WHERE index_block_hash IN ({cb})"
+ ),
+ },
+ TableCopySpec {
+ table: "epoch_transitions",
+ source_sql: format!("SELECT * FROM src.epoch_transitions WHERE block_id IN ({cb})"),
+ },
+ ]
+}
+
+/// Copy required non-MARF tables from the source `index.sqlite` into the
+/// squashed destination. Only canonical rows (determined by the squashed MARF's
+/// `marf_squash_block_heights`) are included, excluding non-canonical fork data.
+pub fn copy_index_side_tables(
+ src_path: &str,
+ dst_path: &str,
+ first_burn_height: u64,
+ reward_cycle_len: u64,
+) -> Result {
+ let conn = Connection::open(dst_path).map_err(Error::SQLError)?;
+
+ conn.execute("ATTACH DATABASE ?1 AS src", params![src_path])
+ .map_err(Error::SQLError)?;
+
+ conn.execute_batch("BEGIN IMMEDIATE")
+ .map_err(Error::SQLError)?;
+
+ if let Err(e) = clone_schemas_from_source(&conn, REQUIRED_TABLES) {
+ let _ = conn.execute_batch("ROLLBACK");
+ let _ = conn.execute_batch("DETACH DATABASE src");
+ return Err(e);
+ }
+
+ let result = copy_tables_inner(&conn, dst_path, first_burn_height, reward_cycle_len);
+
+ match result {
+ Ok(stats) => {
+ conn.execute_batch("COMMIT").map_err(Error::SQLError)?;
+ conn.execute_batch("DETACH DATABASE src")
+ .map_err(Error::SQLError)?;
+ checkpoint_destination_wal(&conn)?;
+ Ok(stats)
+ }
+ Err(e) => {
+ let _ = conn.execute_batch("ROLLBACK");
+ let _ = conn.execute_batch("DETACH DATABASE src");
+ Err(e)
+ }
+ }
+}
+
+fn copy_tables_inner(
+ conn: &Connection,
+ dst_path: &str,
+ first_burn_height: u64,
+ reward_cycle_len: u64,
+) -> Result {
+ let total_start = Instant::now();
+
+ // Copy db_config verbatim.
+ let t = Instant::now();
+ conn.execute(
+ "INSERT OR REPLACE INTO db_config SELECT * FROM src.db_config",
+ [],
+ )
+ .map_err(Error::SQLError)?;
+ info!(" copy_side_tables: db_config done in {:?}", t.elapsed());
+
+ // Copy only canonical __fork_storage rows - the squashed MARF trie
+ // leaves reference these by value_hash. Non-canonical fork entries
+ // are excluded.
+ let fork_storage_rows = copy_canonical_fork_storage::(conn, dst_path)?;
+
+ // Build canonical block set from squash metadata.
+ let t = Instant::now();
+ populate_canonical_blocks(conn)?;
+ info!(
+ " copy_side_tables: canonical_blocks temp table built in {:?}",
+ t.elapsed()
+ );
+
+ // Execute descriptor-driven copies for uniform tables.
+ let specs = index_copy_specs();
+ let results = execute_copy_specs(conn, &specs)?;
+
+ let get = |name: &str| -> u64 {
+ results
+ .iter()
+ .find(|(t, _)| *t == name)
+ .map(|(_, r)| *r)
+ .unwrap_or(0)
+ };
+
+ // Custom: signer_stats filtered by derived reward cycle.
+ let max_reward_cycle = derive_max_reward_cycle(conn, first_burn_height, reward_cycle_len)?;
+
+ let t = Instant::now();
+ let signer_stats_rows = match max_reward_cycle {
+ Some(cycle) => conn
+ .execute(
+ "INSERT INTO signer_stats SELECT * FROM src.signer_stats \
+ WHERE reward_cycle <= ?1",
+ params![cycle as i64],
+ )
+ .map_err(Error::SQLError)? as u64,
+ None => conn
+ .execute(
+ "INSERT INTO signer_stats SELECT * FROM src.signer_stats",
+ [],
+ )
+ .map_err(Error::SQLError)? as u64,
+ };
+ info!(
+ " copy_side_tables: signer_stats ({signer_stats_rows} rows) in {:?}",
+ t.elapsed()
+ );
+
+ // Custom: staging_blocks with semantic predicate.
+ let t = Instant::now();
+ let staging_blocks_rows = conn
+ .execute(
+ "INSERT INTO staging_blocks \
+ SELECT s.* FROM src.staging_blocks s \
+ WHERE s.index_block_hash IN (SELECT index_block_hash FROM canonical_blocks) \
+ AND s.processed = 1 \
+ AND s.orphaned = 0",
+ [],
+ )
+ .map_err(Error::SQLError)? as u64;
+ info!(
+ " copy_side_tables: staging_blocks ({staging_blocks_rows} rows) in {:?}",
+ t.elapsed()
+ );
+
+ conn.execute_batch("DROP TABLE IF EXISTS canonical_blocks")
+ .map_err(Error::SQLError)?;
+
+ info!(
+ " copy_side_tables: all tables done in {:?}",
+ total_start.elapsed()
+ );
+
+ Ok(IndexSideTableStats {
+ block_headers_rows: get("block_headers"),
+ nakamoto_block_headers_rows: get("nakamoto_block_headers"),
+ payments_rows: get("payments"),
+ transactions_rows: get("transactions"),
+ nakamoto_tenure_events_rows: get("nakamoto_tenure_events"),
+ nakamoto_reward_sets_rows: get("nakamoto_reward_sets"),
+ signer_stats_rows,
+ matured_rewards_rows: get("matured_rewards"),
+ burnchain_txids_rows: get("burnchain_txids"),
+ epoch_transitions_rows: get("epoch_transitions"),
+ staging_blocks_rows,
+ fork_storage_rows,
+ })
+}
+
+/// Validate that the squashed index DB has the correct side tables by
+/// comparing against the source.
+pub fn validate_index_side_tables(
+ src_path: &str,
+ dst_path: &str,
+ first_burn_height: u64,
+ reward_cycle_len: u64,
+) -> Result {
+ let conn = Connection::open(dst_path).map_err(Error::SQLError)?;
+ conn.execute("ATTACH DATABASE ?1 AS src", params![src_path])
+ .map_err(Error::SQLError)?;
+
+ // Check all required tables exist.
+ let tables_present = REQUIRED_TABLES.iter().all(|table| {
+ conn.query_row(
+ "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
+ params![table],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(0)
+ > 0
+ });
+
+ // db_config verbatim match.
+ let db_config_matches = conn
+ .query_row(
+ "SELECT COUNT(*) FROM (
+ SELECT version, mainnet, chain_id FROM db_config
+ EXCEPT
+ SELECT version, mainnet, chain_id FROM src.db_config
+ )",
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(1)
+ == 0
+ && conn
+ .query_row(
+ "SELECT COUNT(*) FROM (
+ SELECT version, mainnet, chain_id FROM src.db_config
+ EXCEPT
+ SELECT version, mainnet, chain_id FROM db_config
+ )",
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(1)
+ == 0;
+
+ // __fork_storage: canonical-only copy. Validate against the canonical
+ // filtered source set (same leaf-hash filter used by copy_canonical_fork_storage).
+ let fork_storage_match = {
+ let dst_has = table_exists(&conn, "", "__fork_storage");
+ let src_has = table_exists(&conn, "src", "__fork_storage");
+ match (dst_has, src_has) {
+ (false, false) => true,
+ (true, true) => {
+ let has_marf_data = table_exists(&conn, "", "marf_data");
+
+ if has_marf_data {
+ let (_tip, leaf_hashes) = collect_leaf_value_hashes::(dst_path)?;
+
+ conn.execute_batch(
+ "CREATE TEMP TABLE val_fork_leaf_values (value_hash TEXT PRIMARY KEY)",
+ )
+ .map_err(Error::SQLError)?;
+
+ {
+ let mut stmt = conn
+ .prepare(
+ "INSERT OR IGNORE INTO val_fork_leaf_values (value_hash) VALUES (?1)",
+ )
+ .map_err(Error::SQLError)?;
+ for hash in &leaf_hashes {
+ stmt.execute([hash]).map_err(Error::SQLError)?;
+ }
+ }
+
+ let ok = full_row_except_match(
+ &conn,
+ "SELECT * FROM __fork_storage",
+ "SELECT f.* FROM src.__fork_storage f \
+ INNER JOIN val_fork_leaf_values lv ON f.value_hash = lv.value_hash",
+ );
+
+ conn.execute_batch("DROP TABLE IF EXISTS val_fork_leaf_values")
+ .map_err(Error::SQLError)?;
+
+ ok
+ } else {
+ // fixture fallback, matching copy_canonical_fork_storage()
+ full_row_except_match(
+ &conn,
+ "SELECT * FROM __fork_storage",
+ "SELECT * FROM src.__fork_storage",
+ )
+ }
+ }
+ _ => false,
+ }
+ };
+
+ // Build canonical block set.
+ let _ = conn.execute_batch(
+ "CREATE TEMP TABLE IF NOT EXISTS val_canonical_blocks (index_block_hash TEXT PRIMARY KEY)",
+ );
+ let _ = conn.execute(
+ "INSERT OR IGNORE INTO val_canonical_blocks (index_block_hash) \
+ SELECT block_hash FROM marf_squash_block_heights",
+ [],
+ );
+
+ let cb = "SELECT index_block_hash FROM val_canonical_blocks";
+
+ // Count-match validations (cheaper for large tables).
+ let block_headers_count_match = {
+ let src_count: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM src.block_headers WHERE index_block_hash IN ({cb})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(-1);
+ let dst_count: i64 = conn
+ .query_row("SELECT COUNT(*) FROM block_headers", [], |row| row.get(0))
+ .unwrap_or(-2);
+ src_count == dst_count
+ };
+
+ let nakamoto_headers_count_match = {
+ let src_count: i64 = conn
+ .query_row(
+ &format!(
+ "SELECT COUNT(*) FROM src.nakamoto_block_headers \
+ WHERE index_block_hash IN ({cb})"
+ ),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(-1);
+ let dst_count: i64 = conn
+ .query_row("SELECT COUNT(*) FROM nakamoto_block_headers", [], |row| {
+ row.get(0)
+ })
+ .unwrap_or(-2);
+ src_count == dst_count
+ };
+
+ let payments_count_match = {
+ let src_count: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM src.payments WHERE index_block_hash IN ({cb})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(-1);
+ let dst_count: i64 = conn
+ .query_row("SELECT COUNT(*) FROM payments", [], |row| row.get(0))
+ .unwrap_or(-2);
+ src_count == dst_count
+ };
+
+ let transactions_count_match = {
+ let src_count: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM src.transactions WHERE index_block_hash IN ({cb})"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(-1);
+ let dst_count: i64 = conn
+ .query_row("SELECT COUNT(*) FROM transactions", [], |row| row.get(0))
+ .unwrap_or(-2);
+ src_count == dst_count
+ };
+
+ let nakamoto_tenure_events_count_match = {
+ let src_count: i64 = conn
+ .query_row(
+ &format!(
+ "SELECT COUNT(*) FROM src.nakamoto_tenure_events WHERE block_id IN ({cb})"
+ ),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(-1);
+ let dst_count: i64 = conn
+ .query_row("SELECT COUNT(*) FROM nakamoto_tenure_events", [], |row| {
+ row.get(0)
+ })
+ .unwrap_or(-2);
+ src_count == dst_count
+ };
+
+ // No out-of-range rows leaked.
+ let transactions_no_extra_blocks = conn
+ .query_row(
+ &format!(
+ "SELECT COUNT(*) FROM transactions \
+ WHERE index_block_hash NOT IN ({cb})"
+ ),
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(1)
+ == 0;
+
+ let tenure_events_no_extra_blocks = conn
+ .query_row(
+ &format!(
+ "SELECT COUNT(*) FROM nakamoto_tenure_events \
+ WHERE block_id NOT IN ({cb})"
+ ),
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(1)
+ == 0;
+
+ // staging_blocks: bidirectional full-row EXCEPT against canonical source rows.
+ let staging_blocks_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM staging_blocks",
+ &format!(
+ "SELECT s.* FROM src.staging_blocks s \
+ WHERE s.index_block_hash IN ({cb}) \
+ AND s.processed = 1 AND s.orphaned = 0"
+ ),
+ );
+
+ // Schema-fidelity tables should be empty.
+ let invalidated_microblocks_data_empty = conn
+ .query_row(
+ "SELECT COUNT(*) FROM invalidated_microblocks_data",
+ [],
+ |row| row.get::<_, i64>(0),
+ )
+ .unwrap_or(1)
+ == 0;
+
+ // Canonical-filtered tables: bidirectional full-row EXCEPT match.
+ let nakamoto_reward_sets_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM nakamoto_reward_sets",
+ &format!("SELECT * FROM src.nakamoto_reward_sets WHERE index_block_hash IN ({cb})"),
+ );
+
+ let max_reward_cycle = derive_max_reward_cycle(&conn, first_burn_height, reward_cycle_len)?;
+
+ // signer_stats is a non-consensus counter table whose only writer uses
+ // INSERT ... ON CONFLICT DO UPDATE SET blocks_signed = blocks_signed + 1.
+ // After the snapshot the source keeps incrementing, so we check:
+ // 1. every (public_key, reward_cycle) key in dst exists in filtered src
+ // 2. dst.blocks_signed <= src.blocks_signed
+ let signer_stats_match = {
+ let cycle_filter = match max_reward_cycle {
+ Some(cycle) => format!(" WHERE reward_cycle <= {cycle}"),
+ None => String::new(),
+ };
+ // No fabricated keys.
+ let keys_ok = dst_subset_of_src(
+ &conn,
+ "SELECT public_key, reward_cycle FROM signer_stats",
+ &format!("SELECT public_key, reward_cycle FROM src.signer_stats{cycle_filter}"),
+ );
+ // No inflated counters.
+ let counters_ok: i64 = conn
+ .query_row(
+ &format!(
+ "SELECT COUNT(*) FROM signer_stats d \
+ JOIN src.signer_stats s \
+ ON d.public_key = s.public_key AND d.reward_cycle = s.reward_cycle \
+ WHERE d.blocks_signed > s.blocks_signed"
+ ),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(1);
+ keys_ok && counters_ok == 0
+ };
+
+ // matured_rewards is a non-consensus cache populated as new blocks
+ // trigger maturation of older canonical blocks' rewards. The source
+ // legitimately gains rows after the snapshot, so we only verify no
+ // fabricated rows exist in the destination.
+ let matured_rewards_match = dst_subset_of_src(
+ &conn,
+ "SELECT * FROM matured_rewards",
+ &format!("SELECT * FROM src.matured_rewards WHERE child_index_block_hash IN ({cb})"),
+ );
+
+ let burnchain_txids_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM burnchain_txids",
+ &format!("SELECT * FROM src.burnchain_txids WHERE index_block_hash IN ({cb})"),
+ );
+
+ let epoch_transitions_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM epoch_transitions",
+ &format!("SELECT * FROM src.epoch_transitions WHERE block_id IN ({cb})"),
+ );
+
+ let _ = conn.execute_batch("DROP TABLE IF EXISTS val_canonical_blocks");
+
+ conn.execute_batch("DETACH DATABASE src")
+ .map_err(Error::SQLError)?;
+
+ Ok(IndexSideTableValidation {
+ tables_present,
+ db_config_matches,
+ fork_storage_match,
+ block_headers_count_match,
+ nakamoto_headers_count_match,
+ payments_count_match,
+ transactions_count_match,
+ nakamoto_tenure_events_count_match,
+ nakamoto_reward_sets_match,
+ signer_stats_match,
+ matured_rewards_match,
+ burnchain_txids_match,
+ epoch_transitions_match,
+ staging_blocks_match,
+ invalidated_microblocks_data_empty,
+ transactions_no_extra_blocks,
+ tenure_events_no_extra_blocks,
+ })
+}
diff --git a/stackslib/src/chainstate/stacks/db/snapshot/mod.rs b/stackslib/src/chainstate/stacks/db/snapshot/mod.rs
new file mode 100644
index 00000000000..65d1af33978
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/db/snapshot/mod.rs
@@ -0,0 +1,27 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+pub mod common;
+pub mod index;
+pub mod spv;
+
+#[cfg(test)]
+mod tests;
+
+pub use index::{
+ copy_index_side_tables, validate_index_side_tables, IndexSideTableStats,
+ IndexSideTableValidation,
+};
+pub use spv::{copy_spv_headers, validate_spv_headers, SpvHeadersCopyStats, SpvHeadersValidation};
diff --git a/stackslib/src/chainstate/stacks/db/snapshot/spv.rs b/stackslib/src/chainstate/stacks/db/snapshot/spv.rs
new file mode 100644
index 00000000000..ad82ca9f8a5
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/db/snapshot/spv.rs
@@ -0,0 +1,222 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+use std::fs;
+use std::path::Path;
+
+use rusqlite::{params, Connection, OpenFlags};
+
+use super::common::{
+ clone_optional_schemas_from_source, clone_schemas_from_source, full_row_except_match,
+ table_exists,
+};
+use crate::chainstate::stacks::db::snapshot::common::checkpoint_destination_wal;
+use crate::chainstate::stacks::index::Error;
+
+/// Tables required in all headers.sqlite versions.
+const REQUIRED_TABLES: &[&str] = &["headers", "db_config"];
+
+/// Tables present only in SPV schema v2+ (may be absent in very old DBs).
+const OPTIONAL_TABLES: &[&str] = &[
+ "chain_work", // Added in SPV_SCHEMA_2
+];
+
+/// Bitcoin difficulty chunk size (2016 blocks per difficulty interval).
+const DIFFICULTY_CHUNK_SIZE: u32 = 2016;
+
+/// Row-count statistics returned by [`copy_spv_headers`].
+#[derive(Debug, Clone)]
+pub struct SpvHeadersCopyStats {
+ pub headers_rows: u64,
+ pub chain_work_rows: u64,
+}
+
+/// Validation result for a copied headers.sqlite.
+#[derive(Debug, Clone)]
+pub struct SpvHeadersValidation {
+ pub headers_match: bool,
+ pub chain_work_match: bool,
+ pub db_config_match: bool,
+ pub no_extra_headers: bool,
+}
+
+impl SpvHeadersValidation {
+ pub fn is_valid(&self) -> bool {
+ self.headers_match && self.chain_work_match && self.db_config_match && self.no_extra_headers
+ }
+}
+
+/// Copy canonical SPV headers up to `burn_height` into a new destination.
+///
+/// Returns an error if the source file does not exist.
+pub fn copy_spv_headers(
+ src_path: &str,
+ dst_path: &str,
+ burn_height: u32,
+) -> Result {
+ if !Path::new(src_path).exists() {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::NotFound,
+ format!("SPV headers source not found: {src_path}"),
+ )));
+ }
+
+ if let Some(parent) = Path::new(dst_path).parent() {
+ fs::create_dir_all(parent).map_err(Error::IOError)?;
+ }
+
+ // Remove stale destination to ensure a clean copy.
+ let dst = Path::new(dst_path);
+ if dst.exists() {
+ fs::remove_file(dst).map_err(Error::IOError)?;
+ }
+
+ let conn = Connection::open(dst_path).map_err(Error::SQLError)?;
+
+ // Match the journal mode used by stacks-node (WAL) so the database can be
+ // opened later without needing write access to switch modes.
+ conn.pragma_update(None, "journal_mode", "WAL")
+ .map_err(Error::SQLError)?;
+
+ conn.execute("ATTACH DATABASE ?1 AS src", params![src_path])
+ .map_err(Error::SQLError)?;
+
+ conn.execute_batch("BEGIN IMMEDIATE")
+ .map_err(Error::SQLError)?;
+
+ let result = copy_spv_headers_inner(&conn, burn_height);
+
+ match result {
+ Ok(stats) => {
+ conn.execute_batch("COMMIT").map_err(Error::SQLError)?;
+ conn.execute_batch("DETACH DATABASE src")
+ .map_err(Error::SQLError)?;
+ checkpoint_destination_wal(&conn)?;
+ Ok(stats)
+ }
+ Err(e) => {
+ let _ = conn.execute_batch("ROLLBACK");
+ let _ = conn.execute_batch("DETACH DATABASE src");
+ Err(e)
+ }
+ }
+}
+
+fn copy_spv_headers_inner(
+ conn: &Connection,
+ burn_height: u32,
+) -> Result {
+ clone_schemas_from_source(conn, REQUIRED_TABLES)?;
+ let optional_present = clone_optional_schemas_from_source(conn, OPTIONAL_TABLES)?;
+ let has_chain_work = optional_present.contains(&"chain_work".to_string());
+
+ conn.execute("INSERT INTO db_config SELECT * FROM src.db_config", [])
+ .map_err(Error::SQLError)?;
+
+ let headers_rows = conn
+ .execute(
+ "INSERT INTO headers SELECT * FROM src.headers WHERE height <= ?1",
+ params![burn_height],
+ )
+ .map_err(Error::SQLError)? as u64;
+
+ // Copy chain_work for complete intervals only.
+ let chain_work_rows = if has_chain_work {
+ conn.execute(
+ "INSERT INTO chain_work SELECT * FROM src.chain_work \
+ WHERE (interval + 1) * ?1 - 1 <= ?2",
+ params![DIFFICULTY_CHUNK_SIZE, burn_height],
+ )
+ .map_err(Error::SQLError)? as u64
+ } else {
+ 0
+ };
+
+ Ok(SpvHeadersCopyStats {
+ headers_rows,
+ chain_work_rows,
+ })
+}
+
+/// Validate a copied headers.sqlite against its source.
+pub fn validate_spv_headers(
+ src_path: &str,
+ dst_path: &str,
+ burn_height: u32,
+) -> Result {
+ if !Path::new(src_path).exists() {
+ return Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::NotFound,
+ format!("SPV headers source not found: {src_path}"),
+ )));
+ }
+ if !Path::new(dst_path).exists() {
+ return Err(Error::NotFoundError);
+ }
+
+ let conn = Connection::open_with_flags(dst_path, OpenFlags::SQLITE_OPEN_READ_ONLY)
+ .map_err(Error::SQLError)?;
+
+ conn.execute("ATTACH DATABASE ?1 AS src", params![src_path])
+ .map_err(Error::SQLError)?;
+
+ let db_config_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM db_config",
+ "SELECT * FROM src.db_config",
+ );
+
+ let headers_match = full_row_except_match(
+ &conn,
+ "SELECT * FROM headers",
+ &format!("SELECT * FROM src.headers WHERE height <= {burn_height}"),
+ );
+
+ let has_src_cw = table_exists(&conn, "src", "chain_work");
+ let has_dst_cw = table_exists(&conn, "", "chain_work");
+
+ let chain_work_match = match (has_src_cw, has_dst_cw) {
+ (false, false) => true,
+ (true, true) => full_row_except_match(
+ &conn,
+ "SELECT * FROM chain_work",
+ &format!(
+ "SELECT * FROM src.chain_work \
+ WHERE (interval + 1) * {DIFFICULTY_CHUNK_SIZE} - 1 <= {burn_height}"
+ ),
+ ),
+ _ => false,
+ };
+
+ // No headers above burn_height in destination.
+ let extra_above: i64 = conn
+ .query_row(
+ &format!("SELECT COUNT(*) FROM headers WHERE height > {burn_height}"),
+ [],
+ |row| row.get(0),
+ )
+ .unwrap_or(1);
+ let no_extra_headers = extra_above == 0;
+
+ conn.execute_batch("DETACH DATABASE src")
+ .map_err(Error::SQLError)?;
+
+ Ok(SpvHeadersValidation {
+ headers_match,
+ chain_work_match,
+ db_config_match,
+ no_extra_headers,
+ })
+}
diff --git a/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs b/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs
new file mode 100644
index 00000000000..e32211516dd
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs
@@ -0,0 +1,902 @@
+use rusqlite::{params, Connection};
+use tempfile::tempdir;
+
+use super::index::{copy_index_side_tables, validate_index_side_tables};
+use crate::burnchains::bitcoin::spv::{
+ SPV_DB_VERSION, SPV_INITIAL_SCHEMA, SPV_SCHEMA_2, SPV_SCHEMA_3,
+};
+use crate::chainstate::nakamoto::{
+ NAKAMOTO_CHAINSTATE_SCHEMA_1, NAKAMOTO_CHAINSTATE_SCHEMA_2, NAKAMOTO_CHAINSTATE_SCHEMA_3,
+ NAKAMOTO_CHAINSTATE_SCHEMA_4, NAKAMOTO_CHAINSTATE_SCHEMA_5, NAKAMOTO_CHAINSTATE_SCHEMA_6,
+ NAKAMOTO_CHAINSTATE_SCHEMA_7, NAKAMOTO_CHAINSTATE_SCHEMA_8,
+};
+use crate::chainstate::stacks::db::{
+ CHAINSTATE_INDEXES, CHAINSTATE_INITIAL_SCHEMA, CHAINSTATE_SCHEMA_2, CHAINSTATE_SCHEMA_3,
+ CHAINSTATE_SCHEMA_4, CHAINSTATE_SCHEMA_5,
+};
+
+/// Create a source `index.sqlite` with the full chainstate schema by replaying
+/// the real migration pipeline. Returns the connection for inserting test data.
+fn create_source_db(path: &std::path::Path) -> Connection {
+ let conn = Connection::open(path).unwrap();
+
+ for cmd in CHAINSTATE_INITIAL_SCHEMA {
+ conn.execute_batch(cmd).unwrap();
+ }
+ conn.execute(
+ "INSERT INTO db_config (version, mainnet, chain_id) VALUES (?1, ?2, ?3)",
+ params!["1", 1i64, 1i64],
+ )
+ .unwrap();
+
+ // Apply all migrations in order (same as StacksChainState::apply_schema_migrations).
+ for cmd in CHAINSTATE_SCHEMA_2 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in CHAINSTATE_SCHEMA_3 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_1.iter() {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_2 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_3 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_4 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_5 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in CHAINSTATE_SCHEMA_4 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_6 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in CHAINSTATE_SCHEMA_5 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_7 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_8 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in CHAINSTATE_INDEXES {
+ conn.execute_batch(cmd).unwrap();
+ }
+
+ conn
+}
+
+/// Create a destination DB that simulates a squashed MARF by adding the
+/// `marf_squash_block_heights` table with the given canonical block hashes.
+fn create_dest_db_with_canonical_blocks(path: &std::path::Path, canonical: &[&str]) {
+ let conn = Connection::open(path).unwrap();
+ conn.execute_batch(
+ "CREATE TABLE IF NOT EXISTS marf_squash_block_heights (block_hash TEXT NOT NULL, height INTEGER NOT NULL)",
+ )
+ .unwrap();
+ for (h, bh) in canonical.iter().enumerate() {
+ conn.execute(
+ "INSERT INTO marf_squash_block_heights (block_hash, height) VALUES (?1, ?2)",
+ params![bh, h as i64],
+ )
+ .unwrap();
+ }
+}
+
+/// Insert a block_headers row at the given height.
+fn insert_block_header(conn: &Connection, height: u32, suffix: &str) {
+ conn.execute(
+ "INSERT INTO block_headers (version, total_burn, total_work, proof, parent_block, \
+ parent_microblock, parent_microblock_sequence, tx_merkle_root, state_index_root, \
+ microblock_pubkey_hash, block_hash, index_block_hash, block_height, index_root, \
+ consensus_hash, burn_header_hash, burn_header_height, burn_header_timestamp, \
+ parent_block_id, cost, block_size) \
+ VALUES (1,'0','0','p','par','mb',0,'mr','sr','mph',?1,?2,?3,'ir',?4,'bhh',?3,0,'pid','0','0')",
+ params![
+ format!("bh{suffix}"),
+ format!("ibh{suffix}"),
+ height,
+ format!("ch{suffix}"),
+ ],
+ )
+ .unwrap();
+}
+
+/// Insert a payment row at the given height.
+fn insert_payment(conn: &Connection, height: u32, suffix: &str) {
+ conn.execute(
+ "INSERT INTO payments (address, block_hash, consensus_hash, parent_block_hash, \
+ parent_consensus_hash, coinbase, tx_fees_anchored, tx_fees_streamed, stx_burns, \
+ burnchain_commit_burn, burnchain_sortition_burn, miner, stacks_block_height, \
+ index_block_hash, vtxindex, recipient, schedule_type) \
+ VALUES ('addr',?1,?2,'pbh','pch','100','0','0','0',0,0,1,?3,?4,0,NULL,'Epoch2')",
+ params![
+ format!("bh{suffix}"),
+ format!("ch{suffix}"),
+ height,
+ format!("ibh{suffix}"),
+ ],
+ )
+ .unwrap();
+}
+
+/// Insert a transaction row for the given index_block_hash.
+fn insert_transaction(conn: &Connection, id: i64, ibh: &str) {
+ conn.execute(
+ "INSERT INTO transactions (id, txid, index_block_hash, tx_hex, result) \
+ VALUES (?1, ?2, ?3, '0x00', 'ok')",
+ params![id, format!("tx{id}"), ibh],
+ )
+ .unwrap();
+}
+#[test]
+fn test_copy_index_side_tables_round_trip() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ // Insert test data at heights 1, 2, 3.
+ for (h, s) in [(1, "1"), (2, "2"), (3, "3")] {
+ insert_block_header(&conn, h, s);
+ insert_payment(&conn, h, s);
+ insert_transaction(&conn, h as i64, &format!("ibh{s}"));
+ }
+ conn.execute(
+ "INSERT INTO nakamoto_tenure_events (tenure_id_consensus_hash, prev_tenure_id_consensus_hash, \
+ burn_view_consensus_hash, cause, block_hash, block_id, coinbase_height, num_blocks_confirmed) \
+ VALUES ('ch1','ch0','bv1',0,'bh1','ibh1',1,0)",
+ [],
+ )
+ .unwrap();
+ conn.execute(
+ "INSERT INTO nakamoto_reward_sets (index_block_hash, reward_set) VALUES ('ibh1','{}')",
+ [],
+ )
+ .unwrap();
+ drop(conn);
+
+ // Destination: canonical blocks are ibh1, ibh2 (height 0, 1) - ibh3 is NOT canonical.
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1", "ibh2"]);
+
+ // Copy: only canonical blocks ibh1 and ibh2 should be included.
+ let stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert_eq!(stats.block_headers_rows, 2, "2 canonical block_headers");
+ assert_eq!(stats.payments_rows, 2, "2 canonical payments");
+ assert_eq!(stats.transactions_rows, 2, "2 canonical transactions");
+ assert_eq!(
+ stats.nakamoto_tenure_events_rows, 1,
+ "1 tenure event for ibh1"
+ );
+ assert_eq!(stats.nakamoto_reward_sets_rows, 1);
+
+ // Validate.
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ validation.is_valid(),
+ "validation should pass: {validation:?}"
+ );
+ assert!(validation.tables_present);
+ assert!(validation.db_config_matches);
+ assert!(validation.block_headers_count_match);
+ assert!(validation.payments_count_match);
+ assert!(validation.transactions_count_match);
+ assert!(validation.nakamoto_tenure_events_count_match);
+ assert!(validation.transactions_no_extra_blocks);
+ assert!(validation.tenure_events_no_extra_blocks);
+ assert!(validation.staging_blocks_match);
+ assert!(validation.invalidated_microblocks_data_empty);
+}
+
+#[test]
+fn test_copy_excludes_fork_rows() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ // Insert canonical block at height 1.
+ insert_block_header(&conn, 1, "1_canonical");
+ insert_transaction(&conn, 1, "ibh1_canonical");
+ // Insert fork block at same height 1 (different consensus hash).
+ insert_block_header(&conn, 1, "1_fork");
+ insert_transaction(&conn, 2, "ibh1_fork");
+ drop(conn);
+
+ // Only ibh1_canonical is in the canonical set.
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1_canonical"]);
+
+ let stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Only canonical block should be copied, not the fork.
+ assert_eq!(stats.block_headers_rows, 1, "only canonical block_headers");
+ assert_eq!(stats.transactions_rows, 1, "only canonical transactions");
+
+ // Validate passes - fork rows excluded.
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+ assert!(
+ validation.is_valid(),
+ "validation should pass without fork rows: {validation:?}"
+ );
+}
+
+#[test]
+fn test_validate_index_side_tables_detects_extra_rows() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ // Insert one block + transaction.
+ insert_block_header(&conn, 1, "1");
+ insert_transaction(&conn, 1, "ibh1");
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Inject a transaction for a block NOT in the canonical set.
+ {
+ let conn = Connection::open(&dst_path).unwrap();
+ conn.execute(
+ "INSERT INTO transactions VALUES (99, 'tx_bad', 'ibh_UNKNOWN', '0x00', 'ok')",
+ [],
+ )
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ !validation.transactions_no_extra_blocks,
+ "should detect extra block"
+ );
+ assert!(
+ !validation.transactions_count_match,
+ "count should mismatch"
+ );
+ assert!(!validation.is_valid(), "validation must fail");
+}
+
+#[test]
+fn test_all_required_tables_exist() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let _conn = create_source_db(&src_path);
+ drop(_conn);
+
+ let dst_path = dir.path().join("dst.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &[]);
+
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1).unwrap();
+
+ let dst_conn = Connection::open(&dst_path).unwrap();
+
+ // Verify all required tables exist including the newly added ones.
+ for table in &[
+ "staging_blocks",
+ "staging_microblocks",
+ "staging_microblocks_data",
+ "invalidated_microblocks_data",
+ "user_supporters",
+ ] {
+ let count: i64 = dst_conn
+ .query_row(
+ "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
+ params![table],
+ |row| row.get(0),
+ )
+ .unwrap();
+ assert_eq!(count, 1, "table '{table}' should exist");
+ }
+
+ // invalidated_microblocks_data should be empty.
+ let count: i64 = dst_conn
+ .query_row(
+ "SELECT COUNT(*) FROM invalidated_microblocks_data",
+ [],
+ |row| row.get(0),
+ )
+ .unwrap();
+ assert_eq!(count, 0, "invalidated_microblocks_data should be empty");
+}
+
+/// Insert a minimal nakamoto_block_headers row into the source DB.
+fn insert_nakamoto_header(conn: &Connection, ibh: &str, burn_height: u32) {
+ conn.execute(
+ "INSERT INTO nakamoto_block_headers ( \
+ block_height, index_root, burn_header_hash, burn_header_height, \
+ burn_header_timestamp, block_size, version, chain_length, burn_spent, \
+ consensus_hash, parent_block_id, tx_merkle_root, state_index_root, \
+ miner_signature, signer_signature, signer_bitvec, header_type, \
+ block_hash, index_block_hash, cost, total_tenure_cost, tenure_changed, \
+ tenure_tx_fees, vrf_proof, timestamp, burn_view, height_in_tenure, \
+ total_tenure_size) \
+ VALUES (?1,'ir','bhh',?2,0,'0',1,?1,0,'ch','pid','mr','sr','ms','ss','bv', \
+ 'nakamoto','bh',?3,'0','0',0,'0',NULL,0,NULL,0,0)",
+ params![burn_height, burn_height, ibh],
+ )
+ .unwrap();
+}
+
+#[test]
+fn test_signer_stats_validates_with_source_drift() {
+ // signer_stats is a non-consensus counter table. After the squash, the
+ // source node continues running and increments blocks_signed for existing
+ // (public_key, reward_cycle) pairs. Validation should still pass because
+ // we only check that the destination keys are a subset of the source keys.
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ insert_block_header(&conn, 1, "1");
+ // Nakamoto header so derive_max_reward_cycle can compute a cycle.
+ insert_nakamoto_header(&conn, "ibh1", 10);
+ conn.execute(
+ "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \
+ VALUES ('pk1', 1, 5), ('pk2', 1, 3)",
+ [],
+ )
+ .unwrap();
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ // Copy with first_burn_height=0, reward_cycle_len=1 so max_cycle = 10/1 = 10,
+ // which covers the test row at reward_cycle=1.
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Simulate source drift: increment blocks_signed counters.
+ {
+ let src_conn = Connection::open(&src_path).unwrap();
+ src_conn
+ .execute("UPDATE signer_stats SET blocks_signed = 100", [])
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ validation.signer_stats_match,
+ "signer_stats should pass with drifted counter values"
+ );
+ assert!(
+ validation.is_valid(),
+ "overall validation should pass: {validation:?}"
+ );
+}
+
+#[test]
+fn test_signer_stats_detects_fabricated_keys() {
+ // If the destination has a (public_key, reward_cycle) pair that doesn't
+ // exist in the source at all, validation must fail.
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ insert_block_header(&conn, 1, "1");
+ insert_nakamoto_header(&conn, "ibh1", 10);
+ conn.execute(
+ "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \
+ VALUES ('pk1', 1, 5)",
+ [],
+ )
+ .unwrap();
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Inject a fabricated signer key into the destination.
+ {
+ let dst_conn = Connection::open(&dst_path).unwrap();
+ dst_conn
+ .execute(
+ "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \
+ VALUES ('pk_FAKE', 1, 99)",
+ [],
+ )
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ !validation.signer_stats_match,
+ "signer_stats should fail with fabricated key"
+ );
+ assert!(!validation.is_valid());
+}
+
+#[test]
+fn test_signer_stats_detects_inflated_counters() {
+ // If the destination has blocks_signed > source for an existing key,
+ // validation must fail (the counter is monotonically increasing).
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ insert_block_header(&conn, 1, "1");
+ insert_nakamoto_header(&conn, "ibh1", 10);
+ conn.execute(
+ "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \
+ VALUES ('pk1', 1, 5)",
+ [],
+ )
+ .unwrap();
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Inflate the counter in the destination beyond the source value.
+ {
+ let dst_conn = Connection::open(&dst_path).unwrap();
+ dst_conn
+ .execute(
+ "UPDATE signer_stats SET blocks_signed = 999 WHERE public_key = 'pk1'",
+ [],
+ )
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ !validation.signer_stats_match,
+ "signer_stats should fail with inflated counter"
+ );
+ assert!(!validation.is_valid());
+}
+
+#[test]
+fn test_matured_rewards_validates_with_source_growth() {
+ // matured_rewards is a non-consensus cache. After the squash, new blocks
+ // on the source trigger maturation of rewards for older canonical blocks,
+ // adding rows that match the canonical filter. Validation should still
+ // pass because we only check dst ⊆ filtered-src.
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ insert_block_header(&conn, 1, "1");
+ insert_nakamoto_header(&conn, "ibh1", 10);
+ conn.execute(
+ "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \
+ tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \
+ child_index_block_hash, parent_index_block_hash) \
+ VALUES ('addr1', NULL, 0, '100', '0', '0', '0', 'ibh1', 'pibh0')",
+ [],
+ )
+ .unwrap();
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Simulate source growth: add a new matured_rewards row for a canonical block.
+ {
+ let src_conn = Connection::open(&src_path).unwrap();
+ src_conn
+ .execute(
+ "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \
+ tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \
+ child_index_block_hash, parent_index_block_hash) \
+ VALUES ('addr2', NULL, 0, '0', '0', '0', '0', 'ibh1', 'pibh0')",
+ [],
+ )
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ validation.matured_rewards_match,
+ "matured_rewards should pass when source has grown"
+ );
+ assert!(
+ validation.is_valid(),
+ "overall validation should pass: {validation:?}"
+ );
+}
+
+#[test]
+fn test_matured_rewards_detects_fabricated_rows() {
+ // If the destination has a matured_rewards row not in the filtered source,
+ // validation must fail.
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_index.sqlite");
+ let conn = create_source_db(&src_path);
+
+ insert_block_header(&conn, 1, "1");
+ insert_nakamoto_header(&conn, "ibh1", 10);
+ drop(conn);
+
+ let dst_path = dir.path().join("dst_index.sqlite");
+ create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]);
+
+ let _stats =
+ copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ // Inject a fabricated matured_rewards row.
+ {
+ let dst_conn = Connection::open(&dst_path).unwrap();
+ dst_conn
+ .execute(
+ "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \
+ tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \
+ child_index_block_hash, parent_index_block_hash) \
+ VALUES ('addr_FAKE', NULL, 0, '999', '0', '0', '0', 'ibh1', 'pibh0')",
+ [],
+ )
+ .unwrap();
+ }
+
+ let validation =
+ validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1)
+ .unwrap();
+
+ assert!(
+ !validation.matured_rewards_match,
+ "matured_rewards should fail with fabricated row"
+ );
+ assert!(!validation.is_valid());
+}
+
+/// Create a source headers.sqlite (SPV v3 schema with chain_work).
+/// Replays the real SPV migration pipeline: INITIAL -> SCHEMA_2 -> SCHEMA_3.
+fn create_spv_headers_db(path: &std::path::Path) -> Connection {
+ let conn = Connection::open(path).unwrap();
+ for cmd in SPV_INITIAL_SCHEMA {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in SPV_SCHEMA_2 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ for cmd in SPV_SCHEMA_3 {
+ conn.execute_batch(cmd).unwrap();
+ }
+ conn.execute(
+ &format!("INSERT INTO db_config (version) VALUES ('{SPV_DB_VERSION}')"),
+ [],
+ )
+ .unwrap();
+ conn
+}
+
+#[test]
+fn test_spv_headers_copy_and_validate() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src_headers.sqlite");
+ let dst_path = dir.path().join("dst_headers.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ // Insert headers at heights 0..=5000.
+ for h in 0..=5000u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'prev', 'merkle', 0, 0, 0, ?1, ?2)",
+ params![h, format!("hash_{h}")],
+ )
+ .unwrap();
+ }
+ // Insert chain_work for intervals 0, 1, 2.
+ src.execute("INSERT INTO chain_work VALUES (0, 'work_0')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (1, 'work_1')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (2, 'work_2')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4500)
+ .unwrap();
+
+ // Headers 0..=4500 = 4501 rows.
+ assert_eq!(stats.headers_rows, 4501);
+ // Interval 0: (0+1)*2016-1=2015 <= 4500 ✓
+ // Interval 1: (1+1)*2016-1=4031 <= 4500 ✓
+ // Interval 2: (2+1)*2016-1=6047 <= 4500 ✗
+ assert_eq!(stats.chain_work_rows, 2);
+
+ let v = super::spv::validate_spv_headers(
+ src_path.to_str().unwrap(),
+ dst_path.to_str().unwrap(),
+ 4500,
+ )
+ .unwrap();
+ assert!(v.is_valid(), "validation failed: {v:?}");
+}
+
+#[test]
+fn test_spv_headers_chain_work_boundary_0() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, 0, 'h0')",
+ [],
+ )
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (0, 'w0')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 1);
+ // (0+1)*2016-1 = 2015 > 0 -> no intervals included.
+ assert_eq!(stats.chain_work_rows, 0);
+}
+
+#[test]
+fn test_spv_headers_chain_work_boundary_2015() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ for h in 0..=2015u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)",
+ params![h, format!("h{h}")],
+ )
+ .unwrap();
+ }
+ src.execute("INSERT INTO chain_work VALUES (0, 'w0')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (1, 'w1')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 2015)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 2016);
+ // (0+1)*2016-1 = 2015 <= 2015 ✓ -> 1 interval.
+ assert_eq!(stats.chain_work_rows, 1);
+}
+
+#[test]
+fn test_spv_headers_chain_work_boundary_2016() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ for h in 0..=2016u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)",
+ params![h, format!("h{h}")],
+ )
+ .unwrap();
+ }
+ src.execute("INSERT INTO chain_work VALUES (0, 'w0')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (1, 'w1')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 2016)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 2017);
+ // (0+1)*2016-1 = 2015 <= 2016 ✓
+ // (1+1)*2016-1 = 4031 <= 2016 ✗
+ assert_eq!(stats.chain_work_rows, 1);
+}
+
+#[test]
+fn test_spv_headers_chain_work_boundary_4031() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ for h in 0..=4031u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)",
+ params![h, format!("h{h}")],
+ )
+ .unwrap();
+ }
+ src.execute("INSERT INTO chain_work VALUES (0, 'w0')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (1, 'w1')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (2, 'w2')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4031)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 4032);
+ // (0+1)*2016-1 = 2015 <= 4031 ✓
+ // (1+1)*2016-1 = 4031 <= 4031 ✓
+ // (2+1)*2016-1 = 6047 <= 4031 ✗
+ assert_eq!(stats.chain_work_rows, 2);
+}
+
+#[test]
+fn test_spv_headers_chain_work_boundary_4032() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ for h in 0..=4032u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)",
+ params![h, format!("h{h}")],
+ )
+ .unwrap();
+ }
+ src.execute("INSERT INTO chain_work VALUES (0, 'w0')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (1, 'w1')", [])
+ .unwrap();
+ src.execute("INSERT INTO chain_work VALUES (2, 'w2')", [])
+ .unwrap();
+ drop(src);
+
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4032)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 4033);
+ // (2+1)*2016-1 = 6047 <= 4032 ✗ -> still only 2 intervals.
+ assert_eq!(stats.chain_work_rows, 2);
+}
+
+#[test]
+fn test_spv_headers_missing_source_is_error() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("nonexistent.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let result =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 100);
+ assert!(result.is_err(), "missing source should error");
+}
+
+#[test]
+fn test_spv_headers_validate_source_present_dest_missing_fails() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("nonexistent.sqlite");
+
+ create_spv_headers_db(&src_path);
+
+ let result = super::spv::validate_spv_headers(
+ src_path.to_str().unwrap(),
+ dst_path.to_str().unwrap(),
+ 100,
+ );
+ assert!(result.is_err());
+}
+
+#[test]
+fn test_spv_headers_validate_both_absent_is_error() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("no_src.sqlite");
+ let dst_path = dir.path().join("no_dst.sqlite");
+
+ let result = super::spv::validate_spv_headers(
+ src_path.to_str().unwrap(),
+ dst_path.to_str().unwrap(),
+ 100,
+ );
+ assert!(result.is_err(), "both absent should error");
+}
+
+#[test]
+fn test_spv_headers_stale_destination_errors_when_source_absent() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("nonexistent.sqlite");
+ let dst_path = dir.path().join("stale_headers.sqlite");
+
+ // Create a stale destination file (simulates reused output dir).
+ std::fs::write(&dst_path, b"stale data").unwrap();
+ assert!(dst_path.exists());
+
+ let result =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 100);
+ assert!(
+ result.is_err(),
+ "missing source should error even with stale destination"
+ );
+}
+
+#[test]
+fn test_spv_headers_reused_output_dir() {
+ let dir = tempdir().unwrap();
+ let src_path = dir.path().join("src.sqlite");
+ let dst_path = dir.path().join("dst.sqlite");
+
+ let src = create_spv_headers_db(&src_path);
+ for h in 0..=10u32 {
+ src.execute(
+ "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)",
+ params![h, format!("h{h}")],
+ )
+ .unwrap();
+ }
+ drop(src);
+
+ // First copy.
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 10)
+ .unwrap();
+
+ // Second copy into the same destination (reused output dir).
+ let stats =
+ super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 10)
+ .unwrap();
+
+ assert_eq!(stats.headers_rows, 11);
+
+ // Validate to confirm no duplicate rows.
+ let v = super::spv::validate_spv_headers(
+ src_path.to_str().unwrap(),
+ dst_path.to_str().unwrap(),
+ 10,
+ )
+ .unwrap();
+ assert!(
+ v.is_valid(),
+ "reused output dir should produce valid copy: {v:?}"
+ );
+}
diff --git a/stackslib/src/chainstate/stacks/index/bits.rs b/stackslib/src/chainstate/stacks/index/bits.rs
index 8eb4647c966..e68167fcfd0 100644
--- a/stackslib/src/chainstate/stacks/index/bits.rs
+++ b/stackslib/src/chainstate/stacks/index/bits.rs
@@ -22,7 +22,7 @@ use sha2::{Digest, Sha512_256 as TrieHasher};
use crate::chainstate::stacks::index::node::{
clear_compressed, clear_ctrl_bits, is_compressed, ptrs_fmt, ConsensusSerializable, TrieNode,
TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, TrieNodeType,
- TriePtr, TRIEPTR_SIZE,
+ TriePtr,
};
use crate::chainstate::stacks::index::storage::TrieStorageConnection;
use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieLeaf};
@@ -101,10 +101,10 @@ fn node_id_to_ptr_count(node_id: u8) -> usize {
}
}
-/// Helper to determine the maximum number of bytes a Trie node's child pointers will take to encode.
+/// Helper to determine how many bytes a Trie node's child pointers will take to encode.
pub fn get_ptrs_byte_len(ptrs: &[TriePtr]) -> usize {
let node_id_len = 1;
- node_id_len + TRIEPTR_SIZE * ptrs.len()
+ node_id_len + ptrs.iter().map(TriePtr::encoded_size).sum::()
}
/// Helper to determine a sparse TriePtr list's bitmap size, given the node ID's numeric value.
@@ -238,7 +238,9 @@ pub fn ptrs_from_bytes(
ptrs_start_disk_ptr
);
- let mut bytes = vec![0u8; 1 + num_ptrs * TRIEPTR_SIZE];
+ let max_ptr_size = TriePtr::max_encoded_size();
+ let patch_overhead = max_ptr_size + 1;
+ let mut bytes = vec![0u8; 1 + num_ptrs * max_ptr_size + patch_overhead];
let mut offset = 0;
loop {
let nr = match r.read(
@@ -269,7 +271,11 @@ pub fn ptrs_from_bytes(
offset = offset.checked_add(nr).ok_or_else(|| Error::OverflowError)?;
}
- trace!("Read bytes ({}) {}", bytes.len(), &to_hex(&bytes));
+ let bytes = bytes
+ .get(0..offset)
+ .ok_or_else(|| Error::CorruptionError("Failed to trim bytes array".into()))?;
+
+ trace!("Read bytes ({}) {}", bytes.len(), &to_hex(bytes));
// verify the id is correct
let nid = bytes
@@ -445,15 +451,30 @@ pub fn ptrs_from_bytes(
}
} else {
// ptrs list is not compressed
- // iterate over the read-in bytes in chunks of TRIEPTR_SIZE and store them
- // to `ptrs_buf`
+ // iterate over the read-in bytes one pointer at a time since each encoded pointer
+ // can independently choose u32 or u64 storage.
trace!("Node {} has uncompressed ptrs", cleared_nid);
- let reading_ptrs = ptr_bytes
- .chunks_exact(TRIEPTR_SIZE)
- .zip(ptrs_buf.iter_mut());
- for (next_ptr_bytes, ptr_slot) in reading_ptrs {
- *ptr_slot = TriePtr::from_bytes(next_ptr_bytes);
+ let mut cursor = 0;
+ for ptr_slot in ptrs_buf.iter_mut() {
+ let ptr_id = *ptr_bytes
+ .get(cursor)
+ .ok_or_else(|| Error::CorruptionError("ptr_bytes runs short".into()))?;
+ *ptr_slot = TriePtr::from_bytes(
+ ptr_bytes
+ .get(cursor..)
+ .ok_or_else(|| Error::CorruptionError("ptr_bytes runs short".into()))?,
+ );
+ cursor = cursor
+ .checked_add(TriePtr::encoded_size_for_id(ptr_id))
+ .ok_or_else(|| Error::OverflowError)?;
}
+ let seek_target = u64::try_from(cursor)
+ .ok()
+ .and_then(|c| c.checked_add(1))
+ .and_then(|c| ptrs_start_disk_ptr.checked_add(c))
+ .ok_or(Error::OverflowError)?;
+ r.seek(SeekFrom::Start(seek_target))
+ .inspect_err(|e| error!("Failed to seek to the end of the uncompressed ptrs: {e:?}"))?;
}
Ok(clear_compressed(*nid))
@@ -568,8 +589,7 @@ pub fn read_node_hash_bytes(
f: &mut F,
ptr: &TriePtr,
) -> Result<[u8; TRIEHASH_ENCODED_SIZE], Error> {
- f.seek(SeekFrom::Start(ptr.ptr() as u64))
- .map_err(Error::IOError)?;
+ f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?;
read_hash_bytes(f)
}
@@ -601,8 +621,7 @@ pub fn read_nodetype(
f: &mut F,
ptr: &TriePtr,
) -> Result<(TrieNodeType, TrieHash), Error> {
- f.seek(SeekFrom::Start(ptr.ptr() as u64))
- .map_err(Error::IOError)?;
+ f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?;
trace!("read_nodetype at {:?}", ptr);
read_nodetype_at_head(f, ptr.id())
}
@@ -615,8 +634,7 @@ pub fn read_nodetype_nohash(
f: &mut F,
ptr: &TriePtr,
) -> Result {
- f.seek(SeekFrom::Start(ptr.ptr() as u64))
- .map_err(Error::IOError)?;
+ f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?;
trace!("read_nodetype_nohash at {:?}", ptr);
read_nodetype_at_head_nohash(f, ptr.id())
}
diff --git a/stackslib/src/chainstate/stacks/index/file.rs b/stackslib/src/chainstate/stacks/index/file.rs
index a23cdf9cbff..76d187fdb7b 100644
--- a/stackslib/src/chainstate/stacks/index/file.rs
+++ b/stackslib/src/chainstate/stacks/index/file.rs
@@ -106,6 +106,15 @@ impl TrieFile {
}
}
+ /// Durably sync blob data to disk.
+ /// No-op for RAM-backed TrieFiles.
+ pub fn sync_data(&mut self) -> Result<(), io::Error> {
+ if let TrieFile::Disk(ref mut data) = self {
+ data.fd.sync_data()?;
+ }
+ Ok(())
+ }
+
/// Get a copy of the path to this TrieFile.
/// If in RAM, then the path will be ":memory:"
pub fn get_path(&self) -> String {
@@ -326,8 +335,7 @@ impl<'a> TrieFileNodeHashReader<'a> {
impl NodeHashReader for TrieFileNodeHashReader<'_> {
fn read_node_hash_bytes(&mut self, ptr: &TriePtr, w: &mut W) -> Result<(), Error> {
let trie_offset = self.file.get_trie_offset(self.db, self.block_id)?;
- self.file
- .seek(SeekFrom::Start(trie_offset + (ptr.ptr() as u64)))?;
+ self.file.seek(SeekFrom::Start(trie_offset + (ptr.ptr())))?;
let hash_buff = read_hash_bytes(self.file)?;
w.write_all(&hash_buff).map_err(|e| e.into())
}
@@ -362,7 +370,7 @@ impl TrieFile {
ptr: &TriePtr,
) -> Result {
let offset = self.get_trie_offset(db, block_id)?;
- self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?;
+ self.seek(SeekFrom::Start(offset + (ptr.ptr())))?;
let hash_buff = read_hash_bytes(self)?;
Ok(TrieHash(hash_buff))
}
@@ -376,7 +384,7 @@ impl TrieFile {
ptr: &TriePtr,
) -> Result<(TrieNodeType, TrieHash), Error> {
let offset = self.get_trie_offset(db, block_id)?;
- self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?;
+ self.seek(SeekFrom::Start(offset + (ptr.ptr())))?;
read_nodetype_at_head(self, ptr.id())
}
@@ -388,7 +396,7 @@ impl TrieFile {
ptr: &TriePtr,
) -> Result {
let offset = self.get_trie_offset(db, block_id)?;
- self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?;
+ self.seek(SeekFrom::Start(offset + (ptr.ptr())))?;
read_nodetype_at_head_nohash(self, ptr.id())
}
@@ -401,7 +409,7 @@ impl TrieFile {
ptr: &TriePtr,
) -> Result {
let (offset, _length) = trie_sql::get_external_trie_offset_length_by_bhh(db, bhh)?;
- self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?;
+ self.seek(SeekFrom::Start(offset + (ptr.ptr())))?;
let hash_buff = read_hash_bytes(self)?;
Ok(TrieHash(hash_buff))
}
@@ -443,10 +451,7 @@ impl TrieFile {
self.seek(SeekFrom::Start(offset))?;
self.write_all(buf)?;
self.flush()?;
-
- if let TrieFile::Disk(ref mut data) = self {
- data.fd.sync_data()?;
- }
+ self.sync_data()?;
Ok(offset)
}
}
diff --git a/stackslib/src/chainstate/stacks/index/marf.rs b/stackslib/src/chainstate/stacks/index/marf.rs
index 9a12f23f121..a34c5674c9e 100644
--- a/stackslib/src/chainstate/stacks/index/marf.rs
+++ b/stackslib/src/chainstate/stacks/index/marf.rs
@@ -13,6 +13,8 @@
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
+#[cfg(test)]
+use std::collections::HashSet;
use std::ops::DerefMut;
#[cfg(any(test, feature = "testing"))]
use std::sync::LazyLock;
@@ -24,6 +26,9 @@ use rusqlite::{Connection, Transaction};
use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE};
use stacks_common::util::hash::Sha512Trunc256Sum;
+pub use super::squash::{
+ SquashStats, MARF_SQUASHED_BLOCK_ROOT_HASH_KEY, MARF_SQUASH_HEIGHT_KEY, MARF_SQUASH_ROOT_KEY,
+};
use super::storage::ReopenedTrieStorageConnection;
use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash};
use crate::chainstate::stacks::index::node::{
@@ -31,10 +36,13 @@ use crate::chainstate::stacks::index::node::{
TrieCursor, TrieNode256, TrieNodeID, TrieNodeType, TriePtr,
};
use crate::chainstate::stacks::index::storage::{
- TrieFileStorage, TrieHashCalculationMode, TrieStorageConnection, TrieStorageTransaction,
+ SquashInfo, TrieFileStorage, TrieHashCalculationMode, TrieStorageConnection,
+ TrieStorageTransaction,
};
use crate::chainstate::stacks::index::trie::Trie;
-use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof};
+use crate::chainstate::stacks::index::{
+ trie_sql, Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof,
+};
use crate::util_lib::db::Error as db_error;
pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT";
@@ -101,7 +109,7 @@ pub struct MARF {
}
pub struct MarfTransaction<'a, T: MarfTrieId> {
- storage: TrieStorageTransaction<'a, T>,
+ pub(crate) storage: TrieStorageTransaction<'a, T>,
open_chain_tip: &'a mut Option>,
}
@@ -409,6 +417,42 @@ impl<'a, T: MarfTrieId> MarfTransaction<'a, T> {
self.storage.sqlite_tx_mut()
}
+ /// Commit the SQL transaction without flushing TrieRAM to disk.
+ ///
+ /// Used by `squash_to_path` which writes the blob directly, bypassing
+ /// the normal TrieRAM flush path.
+ pub(crate) fn commit_squash(mut self) -> Result<(), Error> {
+ if self.storage.readonly() {
+ return Err(Error::ReadOnlyError);
+ }
+ self.open_chain_tip.take();
+ self.storage.drop_extending_trie();
+ self.storage.commit_tx();
+ Ok(())
+ }
+
+ /// Set squash metadata on the underlying storage connection.
+ ///
+ /// Called during `squash_to_path` so the ancestor-hash computation can
+ /// use stored root hashes instead of opening pruned historical blocks.
+ pub(crate) fn set_squash_info(&mut self, info: Option) {
+ self.storage.set_squash_info(info);
+ }
+
+ /// Write a trie node directly to the uncommitted TrieRAM at `slot`.
+ ///
+ /// Used by `squash_to_path` to populate the TrieRAM with a
+ /// structure-preserving deep copy of the source trie, bypassing the
+ /// normal walk-cow insertion path.
+ pub(crate) fn write_node_direct(
+ &mut self,
+ slot: u64,
+ node: &TrieNodeType,
+ hash: TrieHash,
+ ) -> Result<(), Error> {
+ self.storage.write_nodetype(slot, node, hash)
+ }
+
/// Reopen this MARF transaction with readonly storage.
/// NOTE: any pending operations in the SQLite transaction _will not_
/// have materialized in the reopened view.
@@ -596,6 +640,22 @@ impl<'a, T: MarfTrieId> MarfTransaction<'a, T> {
Ok(())
}
+ pub fn insert_raw(&mut self, path: TrieHash, marf_leaf: TrieLeaf) -> Result<(), Error> {
+ if self.storage.readonly() {
+ return Err(Error::ReadOnlyError);
+ }
+ let block_hash = match self.open_chain_tip {
+ None => Err(Error::WriteNotBegunError),
+ Some(WriteChainTip { ref block_hash, .. }) => Ok(block_hash.clone()),
+ }?;
+
+ let (cur_block_hash, cur_block_id) = self.storage.get_cur_block_and_id();
+ let result = MARF::insert_leaf(&mut self.storage, &block_hash, &path, &marf_leaf);
+ self.storage
+ .open_block_maybe_id(&cur_block_hash, cur_block_id)?;
+ result
+ }
+
/// Begin extending the MARF to an unconfirmed trie. The resulting trie will have a block hash
/// equal to MARF::make_unconfirmed_block_hash(chain_tip) to avoid collision
/// and block hash reuse.
@@ -746,16 +806,17 @@ impl MARF {
}
}
+ /// Copy a node forward from an ancestor trie by converting its inline children into
+ /// back-pointers. Returns the node hash (leaf hash for leaves, empty hash for internal
+ /// nodes whose hash will be computed at commit time).
fn node_copy_update(node: &mut TrieNodeType, child_block_id: u32) -> TrieHash {
- let hash = match node {
+ match node {
TrieNodeType::Leaf(leaf) => get_leaf_hash(leaf),
_ => {
node_copy_update_ptrs(node.ptrs_mut(), child_block_id);
TrieHash::EMPTY
}
- };
-
- hash
+ }
}
/// Given a node, and the chr of one of its children, go find the last instance of that child in
@@ -1317,6 +1378,16 @@ impl MARF {
}
}
+ // In a squashed MARF, OWN_BLOCK_HEIGHT_KEY returns the squash
+ // height H for every block in the squashed range. Use the
+ // side-table when available.
+ if storage.squash_info().is_some() {
+ if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)?
+ {
+ return Ok(Some(h));
+ }
+ }
+
let marf_value = if block_hash == current_block_hash {
MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)?
} else {
@@ -1659,6 +1730,21 @@ impl MARF {
self.storage.connection()
}
+ /// Build the set of trusted squash trie root-node hashes from this
+ /// MARF's squash metadata. Returns an empty set for archival
+ /// (non-squashed) MARFs.
+ #[cfg(test)]
+ pub fn trusted_squash_node_hashes(&self) -> HashSet {
+ let mut set = HashSet::new();
+ if let Some(info) = self.storage.squash_info() {
+ let h = info.squash_root_node_hash;
+ if h != TrieHash::from_data(&[]) {
+ set.insert(h);
+ }
+ }
+ set
+ }
+
#[cfg(test)]
pub fn borrow_storage_transaction(&mut self) -> TrieStorageTransaction<'_, T> {
self.storage.transaction().unwrap()
@@ -1726,7 +1812,7 @@ impl MARF {
}
}
-// --- Leaf traversal -----------------------------------------------------------
+// Leaf traversal
impl MARF {
/// Walk all leaves in the trie at `block_hash`, yielding full paths and values.
@@ -1736,13 +1822,13 @@ impl MARF {
pub(crate) fn for_each_leaf(
storage: &mut TrieStorageConnection,
block_hash: &T,
- handle_leaf: F,
+ mut handle_leaf: F,
) -> Result
where
- F: Fn(TrieHash, MARFValue) -> Result<(), Error>,
+ F: FnMut(TrieHash, MARFValue) -> Result<(), Error>,
{
let (original_block_hash, original_block_id) = storage.get_cur_block_and_id();
- let result = Self::inner_each_leaf(storage, block_hash, &handle_leaf);
+ let result = Self::inner_each_leaf(storage, block_hash, &mut handle_leaf);
storage
.open_block_maybe_id(&original_block_hash, original_block_id)
@@ -1762,10 +1848,10 @@ impl MARF {
fn inner_each_leaf(
storage: &mut TrieStorageConnection,
block_hash: &T,
- handle_leaf: &F,
+ handle_leaf: &mut F,
) -> Result
where
- F: Fn(TrieHash, MARFValue) -> Result<(), Error>,
+ F: FnMut(TrieHash, MARFValue) -> Result<(), Error>,
{
storage.open_block(block_hash)?;
let (root_node, _root_hash) = Trie::read_root(storage)?;
@@ -1774,11 +1860,11 @@ impl MARF {
let mut stack: Vec<(TriePtr, Vec, T, Option)> = Vec::new();
// Process a node: emit leaf or push children onto the stack.
- let process_node = |node: TrieNodeType,
- prefix: Vec,
- block_hash: T,
- block_id: Option,
- stack: &mut Vec<(TriePtr, Vec, T, Option)>|
+ let mut process_node = |node: TrieNodeType,
+ prefix: Vec,
+ block_hash: T,
+ block_id: Option,
+ stack: &mut Vec<(TriePtr, Vec, T, Option)>|
-> Result {
let mut full_prefix = prefix;
full_prefix.extend_from_slice(node.path_bytes());
diff --git a/stackslib/src/chainstate/stacks/index/mod.rs b/stackslib/src/chainstate/stacks/index/mod.rs
index 5c2d023c752..88a33d3d107 100644
--- a/stackslib/src/chainstate/stacks/index/mod.rs
+++ b/stackslib/src/chainstate/stacks/index/mod.rs
@@ -33,6 +33,7 @@ pub mod marf;
pub mod node;
pub mod profile;
pub mod proofs;
+pub mod squash;
pub mod storage;
pub mod trie;
pub mod trie_sql;
diff --git a/stackslib/src/chainstate/stacks/index/node.rs b/stackslib/src/chainstate/stacks/index/node.rs
index 2b3f44ca999..64bdc60808b 100644
--- a/stackslib/src/chainstate/stacks/index/node.rs
+++ b/stackslib/src/chainstate/stacks/index/node.rs
@@ -53,11 +53,11 @@ impl error::Error for CursorError {
}
// All numeric values of a Trie node when encoded.
-// They are all 6-bit numbers
-// * the 8th bit is used to indicate whether or not the value
-// identifies a back-pointer to be followed.
-// * the 7th bit is used to indicate whether or not the ptrs
-// are compressed. This bit is cleared on read.
+// They are all 4-bit numbers (values 0-6)
+// * the 8th bit (0x80) indicates a back-pointer to be followed
+// * the 7th bit (0x40) indicates the ptrs are compressed. Cleared on read.
+// * the 6th bit (0x20) indicates the ptr offset is encoded as u64, instead of u32. Cleared on read.
+// * the 5th bit (0x10) indicates a compressed inline pointer contains a back_block payload. Cleared on read.
define_u8_enum!(TrieNodeID {
Empty = 0,
Leaf = 1,
@@ -98,9 +98,46 @@ pub fn clear_compressed(id: u8) -> u8 {
id & 0xbf
}
-/// Clear all control bits (backptr and compressed)
+/// Is this compressed inline pointer flagged to carry `back_block` payload bytes?
+/// This bit is wire-format-only metadata and is cleared after decoding.
+pub fn has_inline_back_block(id: u8) -> bool {
+ id & 0x10 != 0
+}
+
+/// Set the compressed inline `back_block` payload bit.
+pub fn set_inline_back_block(id: u8) -> u8 {
+ id | 0x10
+}
+
+/// Clear the compressed inline `back_block` payload bit.
+pub fn clear_inline_back_block(id: u8) -> u8 {
+ id & 0xef
+}
+
+/// True if a compressed pointer with this encoded id includes a back_block payload.
+#[inline]
+fn has_back_block_payload_bytes(id: u8) -> bool {
+ is_backptr(id) || has_inline_back_block(id)
+}
+
+/// Is this pointer encoded with a u64 offset?
+pub const fn is_u64_ptr(id: u8) -> bool {
+ id & 0x20 != 0
+}
+
+/// Set the u64-pointer bit
+pub const fn set_u64_ptr(id: u8) -> u8 {
+ id | 0x20
+}
+
+/// Clear the u64-pointer bit
+pub const fn clear_u64_ptr(id: u8) -> u8 {
+ id & 0xdf
+}
+
+/// Clear all control bits (backptr, compressed, u64-pointer, annotation)
pub fn clear_ctrl_bits(id: u8) -> u8 {
- id & 0x3f
+ id & 0x0f
}
// Byte writing operations for pointer lists, paths.
@@ -370,18 +407,30 @@ impl ConsensusSerializable for T {
}
}
-/// Child pointer
+/// Child pointer within a MARF trie node.
+///
+/// `back_block` has two modes depending on the backptr flag in `id`:
+///
+/// * Back-pointer (`id & 0x80 != 0`): the child lives in a different block's trie.
+/// `back_block` is the `marf_data` row ID of that block, and `ptr` is the byte offset
+/// within that block's trie storage.
+///
+/// * Inline (`id & 0x80 == 0`): the child lives in the same trie storage.
+/// `back_block` is normally 0. In a squashed MARF, a non-zero `back_block` is a
+/// squash annotation: it records the original archival block ID.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TriePtr {
- pub id: u8, // ID of the child. Will have bit 0x80 set if the child is a back-pointer (in which case, back_block will be nonzero)
- pub chr: u8, // Path character at which this child resides
- pub ptr: u32, // Storage-specific pointer to where the child's encoded bytes can be found
- pub back_block: u32, // Pointer back to the block that contains the child, if it's not in this trie
+ /// Node type ID of the child (see [`TrieNodeID`]). Bit 0x80 marks a back-pointer.
+ pub id: u8,
+ /// Path character at which this child resides.
+ pub chr: u8,
+ /// Byte offset of the child's encoded data within the trie storage.
+ pub ptr: u64,
+ /// Block ID of the trie containing the child. Zero for same-block inline children
+ /// (unless carrying a squash annotation).
+ pub back_block: u32,
}
-pub const TRIEPTR_SIZE: usize = 10; // full size of a TriePtr
-pub const TRIEPTR_SIZE_COMPRESSED: usize = 6; // full size of a compressed TriePtr
-
pub fn ptrs_fmt(ptrs: &[TriePtr]) -> String {
let mut strs = vec![];
for ptr in ptrs.iter() {
@@ -409,7 +458,7 @@ impl Default for TriePtr {
impl TriePtr {
#[inline]
- pub fn new(id: u8, chr: u8, ptr: u32) -> TriePtr {
+ pub fn new(id: u8, chr: u8, ptr: u64) -> TriePtr {
TriePtr {
id,
chr,
@@ -420,7 +469,7 @@ impl TriePtr {
/// Create a back-pointer version of a [`TriePtr`]
#[cfg(test)]
- pub fn new_backptr(id: u8, chr: u8, ptr: u32, back_block: u32) -> TriePtr {
+ pub fn new_backptr(id: u8, chr: u8, ptr: u64, back_block: u32) -> TriePtr {
TriePtr {
id: set_backptr(id),
chr,
@@ -446,10 +495,24 @@ impl TriePtr {
}
#[inline]
- pub fn ptr(&self) -> u32 {
+ pub fn ptr(&self) -> u64 {
self.ptr
}
+ /// Convert `self.ptr()` to a `u32` in-memory index, or return an error
+ /// if the value exceeds `u32::MAX`.
+ #[inline]
+ pub fn ptr_as_u32(&self) -> Result {
+ u32::try_from(self.ptr).map_err(|_| Error::OverflowError)
+ }
+
+ /// Convert `self.ptr()` to a `usize` in-memory index, or return an error
+ /// if the value exceeds `usize::MAX`.
+ #[inline]
+ pub fn ptr_as_usize(&self) -> Result {
+ usize::try_from(self.ptr).map_err(|_| Error::OverflowError)
+ }
+
#[inline]
pub fn back_block(&self) -> u32 {
self.back_block
@@ -465,19 +528,75 @@ impl TriePtr {
}
}
+ /// Return the identifier byte that will be emitted on disk for this pointer.
+ ///
+ /// This preserves the logical node kind while setting or clearing the `0x20`
+ /// control bit to match the encoded pointer width.
+ #[inline]
+ pub fn encoded_id(&self) -> u8 {
+ if self.ptr() > u64::from(u32::MAX) {
+ set_u64_ptr(self.id())
+ } else {
+ clear_u64_ptr(self.id())
+ }
+ }
+
+ /// Return the uncompressed encoded size, in bytes, for a pointer with the
+ /// given on-disk identifier byte.
+ ///
+ /// The `0x20` control bit determines whether the pointer payload is encoded
+ /// as `u32` or `u64`.
+ #[inline]
+ pub const fn encoded_size_for_id(node_id: u8) -> usize {
+ 1 + 1 + if is_u64_ptr(node_id) { 8 } else { 4 } + 4
+ }
+
+ /// Return the maximum possible uncompressed encoded size for any `TriePtr`.
+ #[inline]
+ pub const fn max_encoded_size() -> usize {
+ Self::encoded_size_for_id(set_u64_ptr(TrieNodeID::Empty as u8))
+ }
+
+ /// Return the compressed encoded size, in bytes, for a pointer with the
+ /// given on-disk identifier byte.
+ ///
+ /// The `0x20` control bit determines whether the pointer payload is encoded
+ /// as `u32` or `u64`.
+ #[inline]
+ pub const fn encoded_size_compressed_for_id(node_id: u8) -> usize {
+ 1 + 1 + if is_u64_ptr(node_id) { 8 } else { 4 }
+ }
+
#[inline]
pub fn write_bytes(&self, w: &mut W) -> Result<(), Error> {
- w.write_all(&[self.id(), self.chr()])?;
- w.write_all(&self.ptr().to_be_bytes())?;
+ let encoded_id = self.encoded_id();
+ w.write_all(&[encoded_id, self.chr()])?;
+ if is_u64_ptr(encoded_id) {
+ w.write_all(&self.ptr().to_be_bytes())?;
+ } else {
+ let ptr32 = u32::try_from(self.ptr()).map_err(|_| Error::OverflowError)?;
+ w.write_all(&ptr32.to_be_bytes())?;
+ }
w.write_all(&self.back_block().to_be_bytes())?;
Ok(())
}
#[inline]
pub fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> {
- w.write_all(&[set_compressed(self.id()), self.chr()])?;
- w.write_all(&self.ptr().to_be_bytes())?;
- if is_backptr(self.id()) {
+ // Preserve squash annotation payload on disk for inline pointers that
+ // carry a non-zero back_block, without changing backptr semantics.
+ let mut encoded_id = set_compressed(self.encoded_id());
+ if !is_backptr(self.id()) && self.back_block() != 0 {
+ encoded_id = set_inline_back_block(encoded_id);
+ }
+ w.write_all(&[encoded_id, self.chr()])?;
+ if is_u64_ptr(encoded_id) {
+ w.write_all(&self.ptr().to_be_bytes())?;
+ } else {
+ let ptr32 = u32::try_from(self.ptr()).map_err(|_| Error::OverflowError)?;
+ w.write_all(&ptr32.to_be_bytes())?;
+ }
+ if has_back_block_payload_bytes(encoded_id) {
w.write_all(&self.back_block().to_be_bytes())?;
}
Ok(())
@@ -508,12 +627,24 @@ impl TriePtr {
#[inline]
#[allow(clippy::indexing_slicing)]
+ /// Deserialize a pointer from raw bytes using the encoded width bit.
pub fn from_bytes(bytes: &[u8]) -> TriePtr {
- assert!(bytes.len() >= TRIEPTR_SIZE);
- let id = bytes[0];
+ let encoded_id = bytes[0];
+ let min_len = TriePtr::encoded_size_for_id(encoded_id);
+ assert!(bytes.len() >= min_len);
+ let id = clear_u64_ptr(encoded_id);
let chr = bytes[1];
- let ptr = u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]);
- let back_block = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
+ let (ptr, back_block) = if is_u64_ptr(encoded_id) {
+ let ptr = u64::from_be_bytes([
+ bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9],
+ ]);
+ let back_block = u32::from_be_bytes([bytes[10], bytes[11], bytes[12], bytes[13]]);
+ (ptr, back_block)
+ } else {
+ let ptr = u64::from(u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]));
+ let back_block = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
+ (ptr, back_block)
+ };
TriePtr {
id,
@@ -524,19 +655,39 @@ impl TriePtr {
}
/// Load up this TriePtr from a slice of bytes, assuming that they represent a compressed
- /// TriePtr. A TriePtr that is compressed will not have a stored `back_block` field if the
- /// node ID does not have the backptr bit set.
+ /// TriePtr.
+ ///
+ /// A compressed TriePtr stores `back_block` bytes if either:
+ /// * it is a back-pointer (`is_backptr(id)`), or
+ /// * it is an inline pointer with back_block payload
+ /// (`has_inline_back_block(id)`).
+ ///
+ /// The annotation bit is wire metadata and is cleared on read.
#[inline]
#[allow(clippy::indexing_slicing)]
pub fn from_bytes_compressed(bytes: &[u8]) -> TriePtr {
- assert!(bytes.len() >= TRIEPTR_SIZE_COMPRESSED);
- let id = clear_compressed(bytes[0]);
+ let encoded_id = clear_compressed(bytes[0]);
+ assert!(bytes.len() >= TriePtr::compressed_size_for_id(encoded_id));
+ let id = clear_u64_ptr(clear_inline_back_block(encoded_id));
let chr = bytes[1];
- let ptr = u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]);
+ let ptr = if is_u64_ptr(encoded_id) {
+ u64::from_be_bytes([
+ bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9],
+ ])
+ } else {
+ u64::from(u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]))
+ };
- let back_block = if is_backptr(id) {
- assert!(bytes.len() >= TRIEPTR_SIZE);
- u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]])
+ let back_block = if has_back_block_payload_bytes(encoded_id) {
+ // Backpointers and squash annotations append a 4-byte `back_block` after the compressed ptr payload.
+ let back_block_offset = TriePtr::encoded_size_compressed_for_id(encoded_id);
+ assert!(bytes.len() >= back_block_offset + 4);
+ u32::from_be_bytes([
+ bytes[back_block_offset],
+ bytes[back_block_offset + 1],
+ bytes[back_block_offset + 2],
+ bytes[back_block_offset + 3],
+ ])
} else {
0
};
@@ -555,11 +706,18 @@ impl TriePtr {
#[inline]
pub fn read_bytes_compressed(fd: &mut R) -> Result {
let id_bits: u8 = read_next(fd)?;
- let id = clear_compressed(id_bits);
+ let encoded_id = clear_compressed(id_bits);
+ let id = clear_u64_ptr(clear_inline_back_block(encoded_id));
let chr: u8 = read_next(fd)?;
- let ptr_be_bytes: [u8; 4] = read_next(fd)?;
- let ptr = u32::from_be_bytes(ptr_be_bytes);
- let back_block = if is_backptr(id) {
+ let ptr = if is_u64_ptr(encoded_id) {
+ let hi: [u8; 4] = read_next(fd)?;
+ let lo: [u8; 4] = read_next(fd)?;
+ u64::from_be_bytes([hi[0], hi[1], hi[2], hi[3], lo[0], lo[1], lo[2], lo[3]])
+ } else {
+ let ptr_be_bytes: [u8; 4] = read_next(fd)?;
+ u64::from(u32::from_be_bytes(ptr_be_bytes))
+ };
+ let back_block = if has_back_block_payload_bytes(encoded_id) {
let bytes: [u8; 4] = read_next(fd)?;
u32::from_be_bytes(bytes)
} else {
@@ -574,21 +732,32 @@ impl TriePtr {
})
}
+ /// Size of this TriePtr on disk.
+ #[inline]
+ pub fn encoded_size(&self) -> usize {
+ Self::encoded_size_for_id(self.encoded_id())
+ }
+
/// Size of this TriePtr on disk, if compression is to be used.
#[inline]
pub fn compressed_size(&self) -> usize {
- Self::compressed_size_for_id(self.id)
+ let encoded_id = self.encoded_id();
+ if !is_backptr(self.id) && self.back_block != 0 {
+ Self::encoded_size_for_id(encoded_id)
+ } else {
+ Self::compressed_size_for_id(encoded_id)
+ }
}
/// Returns the size, in bytes, that a node occupies on disk, taking compression into account.
- /// In this case, non-backpointer nodes use a smaller size (`TRIEPTR_SIZE_COMPRESSED`),
- /// while backpointer nodes use the full size (`TRIEPTR_SIZE`).
+ /// Pointers without a `back_block` payload omit it, while backpointers and
+ /// inline-annotation pointers store it.
#[inline]
pub fn compressed_size_for_id(node_id: u8) -> usize {
- if !is_backptr(node_id) {
- TRIEPTR_SIZE_COMPRESSED
+ if !has_back_block_payload_bytes(node_id) {
+ Self::encoded_size_compressed_for_id(node_id)
} else {
- TRIEPTR_SIZE
+ Self::encoded_size_for_id(node_id)
}
}
}
@@ -982,7 +1151,7 @@ impl TrieNode16 {
#[derive(Clone)]
pub struct TrieNode48 {
pub path: Vec,
- indexes: [i8; 256], // indexes[i], if non-negative, is an index into ptrs.
+ pub(crate) indexes: [i8; 256], // indexes[i], if non-negative, is an index into ptrs.
pub ptrs: [TriePtr; 48],
/// If this node was created by copy-on-write, then this points to the node it was copied from.
pub cowptr: Option,
@@ -1229,17 +1398,19 @@ impl StacksMessageCodec for TrieNodePatch {
}
}
-/// Turn each non-empty, non-backptr in `ptrs` into a backptr pointing at `child_block_id`
+/// Turn each non-empty, non-backptr in `ptrs` into a backptr.
+/// If `back_block` is already non-zero (squash annotation), it is preserved;
+/// otherwise it is set to `child_block_id`.
pub(crate) fn node_copy_update_ptrs(ptrs: &mut [TriePtr], child_block_id: u32) {
for pointer in ptrs.iter_mut() {
// if the node is empty, do nothing, if it's a back pointer,
if pointer.id() == TrieNodeID::Empty as u8 || is_backptr(pointer.id()) {
continue;
- } else {
- // make backptr
+ }
+ if pointer.back_block == 0 {
pointer.back_block = child_block_id;
- pointer.id = set_backptr(pointer.id());
}
+ pointer.id = set_backptr(pointer.id());
}
}
@@ -1545,15 +1716,6 @@ impl TrieNodePatch {
}
sz
}
-
- /// Load a TrieNodePatch from a Read object
- /// Returns Ok(Self) on success
- /// Returns Err(codec_error::*) on failure to decode the bytes
- /// Returns Err(IOError(..)) on disk I/O failure
- pub fn from_bytes(f: &mut R) -> Result {
- Self::consensus_deserialize(f)
- .map_err(|e| Error::CorruptionError(format!("Codec error: {e:?}")))
- }
}
impl TrieNode for TrieNode4 {
@@ -1679,7 +1841,6 @@ impl TrieNode for TrieNode16 {
fn from_bytes(r: &mut R) -> Result {
let mut ptrs_slice = [TriePtr::default(); 16];
ptrs_from_bytes(TrieNodeID::Node16 as u8, r, &mut ptrs_slice)?;
-
let path = path_from_bytes(r)?;
Ok(TrieNode16 {
diff --git a/stackslib/src/chainstate/stacks/index/squash.rs b/stackslib/src/chainstate/stacks/index/squash.rs
new file mode 100644
index 00000000000..f9a01c24134
--- /dev/null
+++ b/stackslib/src/chainstate/stacks/index/squash.rs
@@ -0,0 +1,1502 @@
+// Copyright (C) 2026 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+//! MARF squashing: offline snapshot creation and validation.
+//!
+//! A squashed MARF contains only the canonical state at a given
+//! height H plus the metadata needed for ancestor hash lookups and
+//! block-height resolution.
+
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{BufReader, BufWriter, Read as _, Seek, SeekFrom, Write};
+use std::time::{Duration, Instant};
+
+use rusqlite::{params, DatabaseName};
+use sha2::Digest as _;
+use stacks_common::types::chainstate::{
+ StacksBlockId, TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE, TRIEHASH_ENCODED_SIZE,
+};
+
+use crate::chainstate::stacks::index::bits::{
+ get_leaf_hash, get_node_byte_len, write_nodetype_bytes,
+};
+use crate::chainstate::stacks::index::marf::{
+ MARFOpenOpts, MarfConnection, BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, MARF,
+};
+use crate::chainstate::stacks::index::node::{
+ clear_backptr, is_backptr, TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID,
+ TrieNodeType, TriePtr,
+};
+use crate::chainstate::stacks::index::storage::{
+ SquashInfo, TrieFileStorage, TrieStorageConnection,
+};
+use crate::chainstate::stacks::index::trie::Trie;
+use crate::chainstate::stacks::index::{
+ trie_sql, BlockMap, Error, MARFValue, MarfTrieId, TrieHasher, TrieLeaf,
+};
+
+/// Classify a child pointer: resolve the `(block_id, byte_offset)` pair that
+/// locates the child in blob storage. Backpointers carry the target block_id
+/// directly; inline pointers belong to `origin_block_id`.
+/// Returns `None` for empty pointers.
+#[inline]
+fn resolve_child_ptr(ptr: &TriePtr, origin_block_id: u32) -> Option<(u32, u64)> {
+ if ptr.id() == TrieNodeID::Empty as u8 {
+ return None;
+ }
+ if is_backptr(ptr.id()) {
+ Some((ptr.back_block(), ptr.from_backptr().ptr()))
+ } else {
+ Some((origin_block_id, ptr.ptr()))
+ }
+}
+
+/// Returns `true` when a pointer is an inline child (non-empty, non-backptr)
+/// — i.e. it points to a node in the same blob, not to an ancestor block.
+#[inline]
+fn is_inline_child_ptr(ptr: &TriePtr) -> bool {
+ ptr.id() != TrieNodeID::Empty as u8 && !is_backptr(ptr.id())
+}
+
+/// Format a `Duration` as `X.YZ secs` or `X min Y.ZW secs`.
+fn fmt_duration(d: Duration) -> String {
+ let total_centis = d.as_millis() / 10;
+ let mins = total_centis / 6000;
+ let secs = (total_centis % 6000) as f64 / 100.0;
+ if mins == 0 {
+ format!("{secs:.2} secs")
+ } else {
+ format!("{mins} min {secs:.2} secs")
+ }
+}
+
+// ---------------------------------------------------------------------------
+// NodeStore: disk-backed storage for collected trie nodes.
+//
+// Instead of holding all 50M+ collected nodes in a giant in-memory vector,
+// this stores the full node data in a temporary file and keeps only
+// lightweight per-node metadata in memory (~4 GB).
+// ---------------------------------------------------------------------------
+
+/// Tag bytes for node serialization to the temp file.
+const TAG_LEAF: u8 = 0;
+const TAG_NODE4: u8 = 1;
+const TAG_NODE16: u8 = 2;
+const TAG_NODE48: u8 = 3;
+const TAG_NODE256: u8 = 4;
+
+/// Serialize a single `TriePtr` to the writer.
+fn write_trie_ptr(w: &mut W, p: &TriePtr) -> Result<(), Error> {
+ w.write_all(&[p.id, p.chr])?;
+ w.write_all(&p.ptr.to_le_bytes())?;
+ w.write_all(&p.back_block.to_le_bytes())?;
+ Ok(())
+}
+
+/// Deserialize a single `TriePtr` from the reader.
+fn read_trie_ptr(r: &mut R) -> Result {
+ let mut buf2 = [0u8; 2];
+ r.read_exact(&mut buf2)?;
+ let mut buf8 = [0u8; 8];
+ r.read_exact(&mut buf8)?;
+ let ptr = u64::from_le_bytes(buf8);
+ let mut buf4 = [0u8; 4];
+ r.read_exact(&mut buf4)?;
+ let back_block = u32::from_le_bytes(buf4);
+ Ok(TriePtr {
+ id: buf2[0],
+ chr: buf2[1],
+ ptr,
+ back_block,
+ })
+}
+
+/// Serialize a `TrieNodeType` to the writer in a compact binary format.
+/// Format: [tag: u8] [path_len: u32] [path bytes] [variant data]
+pub(crate) fn serialize_node(w: &mut W, node: &TrieNodeType) -> Result<(), Error> {
+ match node {
+ TrieNodeType::Leaf(leaf) => {
+ w.write_all(&[TAG_LEAF])?;
+ w.write_all(&(leaf.path.len() as u32).to_le_bytes())?;
+ w.write_all(&leaf.path)?;
+ w.write_all(&leaf.data.0)?;
+ }
+ TrieNodeType::Node4(n) => {
+ w.write_all(&[TAG_NODE4])?;
+ w.write_all(&(n.path.len() as u32).to_le_bytes())?;
+ w.write_all(&n.path)?;
+ for p in &n.ptrs {
+ write_trie_ptr(w, p)?;
+ }
+ }
+ TrieNodeType::Node16(n) => {
+ w.write_all(&[TAG_NODE16])?;
+ w.write_all(&(n.path.len() as u32).to_le_bytes())?;
+ w.write_all(&n.path)?;
+ for p in &n.ptrs {
+ write_trie_ptr(w, p)?;
+ }
+ }
+ TrieNodeType::Node48(n) => {
+ w.write_all(&[TAG_NODE48])?;
+ w.write_all(&(n.path.len() as u32).to_le_bytes())?;
+ w.write_all(&n.path)?;
+ // Write the 256-byte indexes array
+ let indexes = n.indexes.map(|idx| idx as u8);
+ w.write_all(&indexes)?;
+ for p in &n.ptrs {
+ write_trie_ptr(w, p)?;
+ }
+ }
+ TrieNodeType::Node256(n) => {
+ w.write_all(&[TAG_NODE256])?;
+ w.write_all(&(n.path.len() as u32).to_le_bytes())?;
+ w.write_all(&n.path)?;
+ for p in &n.ptrs {
+ write_trie_ptr(w, p)?;
+ }
+ }
+ }
+ Ok(())
+}
+
+/// Deserialize a `TrieNodeType` from the reader.
+pub(crate) fn deserialize_node(r: &mut R) -> Result {
+ let mut tag = [0u8; 1];
+ r.read_exact(&mut tag)?;
+ let mut path_len_buf = [0u8; 4];
+ r.read_exact(&mut path_len_buf)?;
+ let path_len = u32::from_le_bytes(path_len_buf) as usize;
+ let mut path = vec![0u8; path_len];
+ if path_len > 0 {
+ r.read_exact(&mut path)?;
+ }
+
+ match tag[0] {
+ TAG_LEAF => {
+ let mut data = [0u8; 40];
+ r.read_exact(&mut data)?;
+ Ok(TrieNodeType::Leaf(TrieLeaf {
+ path,
+ data: MARFValue(data),
+ }))
+ }
+ TAG_NODE4 => {
+ let mut ptrs = [TriePtr::default(); 4];
+ for p in ptrs.iter_mut() {
+ *p = read_trie_ptr(r)?;
+ }
+ Ok(TrieNodeType::Node4(TrieNode4 {
+ path,
+ ptrs,
+ cowptr: None,
+ patches: vec![],
+ }))
+ }
+ TAG_NODE16 => {
+ let mut ptrs = [TriePtr::default(); 16];
+ for p in ptrs.iter_mut() {
+ *p = read_trie_ptr(r)?;
+ }
+ Ok(TrieNodeType::Node16(TrieNode16 {
+ path,
+ ptrs,
+ cowptr: None,
+ patches: vec![],
+ }))
+ }
+ TAG_NODE48 => {
+ let mut indexes_u8 = [0u8; 256];
+ r.read_exact(&mut indexes_u8)?;
+ let indexes = indexes_u8.map(|idx| idx as i8);
+ let mut ptrs = [TriePtr::default(); 48];
+ for p in ptrs.iter_mut() {
+ *p = read_trie_ptr(r)?;
+ }
+ Ok(TrieNodeType::Node48(Box::new(TrieNode48 {
+ path,
+ indexes,
+ ptrs,
+ cowptr: None,
+ patches: vec![],
+ })))
+ }
+ TAG_NODE256 => {
+ let mut ptrs = [TriePtr::default(); 256];
+ for p in ptrs.iter_mut() {
+ *p = read_trie_ptr(r)?;
+ }
+ Ok(TrieNodeType::Node256(Box::new(TrieNode256 {
+ path,
+ ptrs,
+ cowptr: None,
+ patches: vec![],
+ })))
+ }
+ _ => Err(Error::CorruptionError(format!(
+ "NodeStore: invalid tag byte {0}",
+ tag[0]
+ ))),
+ }
+}
+
+/// Disk-backed store for collected trie nodes.
+///
+/// Full node data is serialized to a temporary file. Only lightweight
+/// per-node metadata (hash, block_id, file offset) is kept in memory.
+pub(crate) struct NodeStore {
+ /// Temp file holding serialized nodes (write handle).
+ writer: BufWriter,
+ /// Path to the temp file (for re-opening as reader).
+ pub(crate) path: std::path::PathBuf,
+ /// Byte offset in the temp file for each node.
+ pub(crate) file_offsets: Vec,
+ /// Per-node hash.
+ hashes: Vec,
+ /// Per-node origin block ID.
+ block_ids: Vec,
+}
+
+impl NodeStore {
+ pub(crate) fn new(dir: &str) -> Result {
+ let pid = std::process::id();
+ // Try up to 16 times with atomic create_new to avoid collision.
+ for attempt in 0u32..16 {
+ let nanos = std::time::SystemTime::now()
+ .duration_since(std::time::UNIX_EPOCH)
+ .unwrap_or_default()
+ .as_nanos();
+ let path = std::path::PathBuf::from(format!(
+ "{}/.squash_nodes_{pid}_{nanos}_{attempt}.tmp",
+ dir
+ ));
+ match File::options().write(true).create_new(true).open(&path) {
+ Ok(file) => {
+ return Ok(NodeStore {
+ writer: BufWriter::with_capacity(1 << 20, file),
+ path,
+ file_offsets: Vec::new(),
+ hashes: Vec::new(),
+ block_ids: Vec::new(),
+ });
+ }
+ Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => continue,
+ Err(e) => return Err(Error::IOError(e)),
+ }
+ }
+ Err(Error::IOError(std::io::Error::new(
+ std::io::ErrorKind::AlreadyExists,
+ "failed to create unique NodeStore temp file after 16 attempts",
+ )))
+ }
+
+ pub(crate) fn len(&self) -> usize {
+ self.file_offsets.len()
+ }
+
+ /// Append a node. Returns the node's index.
+ pub(crate) fn push(
+ &mut self,
+ node: &TrieNodeType,
+ hash: TrieHash,
+ block_id: u32,
+ ) -> Result {
+ let idx = self.file_offsets.len();
+ let offset = self.writer.stream_position().map_err(Error::IOError)?;
+ self.file_offsets.push(offset);
+ self.hashes.push(hash);
+ self.block_ids.push(block_id);
+ serialize_node(&mut self.writer, node)?;
+ Ok(idx)
+ }
+
+ /// Flush the writer and return a sequential reader over all nodes.
+ pub(crate) fn finish_writing(&mut self) -> Result<(), Error> {
+ self.writer.flush().map_err(Error::IOError)?;
+ Ok(())
+ }
+
+ /// Open a reader for random-access reads.
+ pub(crate) fn open_reader(&self) -> Result, Error> {
+ let file = File::open(&self.path).map_err(Error::IOError)?;
+ Ok(BufReader::with_capacity(1 << 20, file))
+ }
+
+ /// Read a node from the temp file using the given reader.
+ pub(crate) fn read_node_with(
+ &self,
+ reader: &mut BufReader,
+ idx: usize,
+ ) -> Result {
+ let offset = *self.file_offsets.get(idx).ok_or_else(|| {
+ Error::CorruptionError(format!("NodeStore: index {idx} out of bounds"))
+ })?;
+ reader
+ .seek(SeekFrom::Start(offset))
+ .map_err(Error::IOError)?;
+ deserialize_node(reader)
+ }
+
+ pub(crate) fn hash(&self, idx: usize) -> TrieHash {
+ self.hashes.get(idx).copied().unwrap_or_else(|| {
+ panic!(
+ "NodeStore::hash: index {idx} out of bounds (len={})",
+ self.hashes.len()
+ )
+ })
+ }
+
+ pub(crate) fn set_hash(&mut self, idx: usize, hash: TrieHash) {
+ if let Some(slot) = self.hashes.get_mut(idx) {
+ *slot = hash;
+ } else {
+ panic!(
+ "NodeStore::set_hash: index {idx} out of bounds (len={})",
+ self.hashes.len()
+ );
+ }
+ }
+
+ pub(crate) fn block_id(&self, idx: usize) -> u32 {
+ self.block_ids.get(idx).copied().unwrap_or_else(|| {
+ panic!(
+ "NodeStore::block_id: index {idx} out of bounds (len={})",
+ self.block_ids.len()
+ )
+ })
+ }
+
+ /// Drop the block_ids Vec to free memory after remap.
+ fn drop_block_ids(&mut self) {
+ self.block_ids = Vec::new();
+ }
+
+ /// Clean up the temp file.
+ fn cleanup(&self) {
+ let _ = std::fs::remove_file(&self.path);
+ }
+}
+
+impl Drop for NodeStore {
+ fn drop(&mut self) {
+ self.cleanup();
+ }
+}
+
+/// Remap child pointers in a `NodeStore` for the squashed trie layout.
+///
+/// For each non-leaf node, reads it from the temp file, remaps its child
+/// pointers from source (block_id, offset) to sequential indices, and
+/// writes the modified node back.
+///
+/// When `block_id_map` is `Some`, each child's `back_block` is set to the
+/// squashed equivalent of its origin block (needed for the real squash blob
+/// so that COW and hash computation preserve block identity). When `None`,
+/// `back_block` is zeroed (used by `recompute_squash_root_node_hash` where
+/// block identity is irrelevant).
+fn remap_child_ptrs(
+ store: &mut NodeStore,
+ source_to_idx: &HashMap<(u32, u64), usize>,
+ block_id_map: Option<&HashMap>,
+ label: &str,
+) -> Result<(), Error> {
+ let remap_start = Instant::now();
+ let node_count = store.len();
+ let mut reader = store.open_reader()?;
+
+ let write_file = std::fs::OpenOptions::new()
+ .write(true)
+ .open(&store.path)
+ .map_err(Error::IOError)?;
+ let mut writer = BufWriter::with_capacity(1 << 20, write_file);
+
+ for idx in 0..node_count {
+ if idx > 0 && idx % 1_000_000 == 0 {
+ info!(
+ "[{label}] Remap trie pointers: {idx}/{node_count} nodes in {}",
+ fmt_duration(remap_start.elapsed())
+ );
+ }
+
+ let mut node = store.read_node_with(&mut reader, idx)?;
+ let origin_block_id = store.block_id(idx);
+
+ if node.is_leaf() {
+ continue;
+ }
+
+ let ptrs = node.ptrs_mut();
+ let mut modified = false;
+ for ptr in ptrs.iter_mut() {
+ let Some((child_block_id, read_ptr_val)) = resolve_child_ptr(ptr, origin_block_id)
+ else {
+ continue;
+ };
+
+ let source_key = (child_block_id, read_ptr_val);
+ let child_idx = *source_to_idx.get(&source_key).ok_or_else(|| {
+ Error::CorruptionError(format!(
+ "remap_child_ptrs: child {source_key:?} not in source_to_idx"
+ ))
+ })?;
+
+ ptr.ptr = child_idx as u64;
+ ptr.id = clear_backptr(ptr.id);
+
+ ptr.back_block = match block_id_map {
+ Some(map) => *map.get(&child_block_id).ok_or_else(|| {
+ Error::CorruptionError(format!(
+ "remap_child_ptrs: block_id {child_block_id} not in block_id_map"
+ ))
+ })?,
+ None => 0,
+ };
+ modified = true;
+ }
+
+ if modified {
+ let offset = *store.file_offsets.get(idx).ok_or_else(|| {
+ Error::CorruptionError(format!("remap: file_offsets index {idx} out of bounds"))
+ })?;
+ writer
+ .seek(SeekFrom::Start(offset))
+ .map_err(Error::IOError)?;
+ serialize_node(&mut writer, &node)?;
+ }
+ }
+ writer.flush().map_err(Error::IOError)?;
+
+ info!(
+ "[{label}] Remap trie pointers complete: {node_count} nodes in {}",
+ fmt_duration(remap_start.elapsed())
+ );
+ Ok(())
+}
+
+/// Recompute content hashes using a `NodeStore`.
+///
+/// Leaf hashes are computed by reading each leaf from the temp file.
+/// Internal node hashes are computed bottom-up (reverse order) using
+/// the in-memory hashes Vec for child lookups and reading the node
+/// structure from the temp file.
+fn recompute_content_hashes(store: &mut NodeStore) -> Result<(), Error> {
+ let empty_hash = TrieHash::from_data(&[]);
+ let node_count = store.len();
+ let mut reader = store.open_reader()?;
+ let start = Instant::now();
+
+ // Pass 1: compute leaf hashes
+ for idx in 0..node_count {
+ let node = store.read_node_with(&mut reader, idx)?;
+ if let TrieNodeType::Leaf(ref leaf) = node {
+ store.set_hash(idx, get_leaf_hash(leaf));
+ }
+ }
+ info!(
+ "Trie hash: leaf pass done in {}",
+ fmt_duration(start.elapsed())
+ );
+
+ // Pass 2: internal nodes in reverse order
+ for idx in (0..node_count).rev() {
+ let node = store.read_node_with(&mut reader, idx)?;
+ if node.is_leaf() {
+ continue;
+ }
+
+ // Collect child hashes
+ let ptrs = node.ptrs();
+ let mut child_hashes = Vec::with_capacity(ptrs.len());
+ for child_ptr in ptrs {
+ if !is_inline_child_ptr(child_ptr) {
+ child_hashes.push(empty_hash);
+ } else {
+ let child_idx = child_ptr.ptr() as usize;
+ if child_idx >= node_count {
+ return Err(Error::CorruptionError(format!(
+ "Invalid child index {child_idx} at node {idx}"
+ )));
+ }
+ child_hashes.push(store.hash(child_idx));
+ }
+ }
+
+ let new_hash = compute_node_hash(&node, &child_hashes);
+ store.set_hash(idx, new_hash);
+ }
+
+ info!(
+ "Trie hash: {node_count} nodes in {}",
+ fmt_duration(start.elapsed())
+ );
+ Ok(())
+}
+
+/// Replace array-index child pointers in `node` with the corresponding
+/// blob byte offsets from `blob_offsets`. Only forward (non-back, non-empty)
+/// pointers are remapped.
+pub(crate) fn remap_ptrs_to_blob_offsets(
+ node: &mut TrieNodeType,
+ blob_offsets: &[u64],
+) -> Result<(), Error> {
+ if node.is_leaf() {
+ return Ok(());
+ }
+ for ptr in node.ptrs_mut() {
+ if is_inline_child_ptr(ptr) {
+ let child_idx = ptr.ptr() as usize;
+ ptr.ptr = *blob_offsets.get(child_idx).ok_or_else(|| {
+ Error::CorruptionError(format!(
+ "blob offset remap: child index {child_idx} out of bounds"
+ ))
+ })?;
+ }
+ }
+ Ok(())
+}
+
+/// Compute per-node byte offsets within the serialized blob.
+///
+/// Returns `(blob_offsets, total_size)` where `blob_offsets[i]` is the byte
+/// position where node `i` starts in the blob (after the header).
+pub(crate) fn compute_blob_offsets(store: &mut NodeStore) -> Result<(Vec, u64), Error> {
+ compute_blob_offsets_inner(store, u32::MAX as u64)
+}
+
+/// Inner implementation with a configurable early-exit threshold.
+/// When `current_offset <= early_exit_threshold` after pass 1, the fixpoint
+/// loop is skipped because no pointer will switch to u64 encoding.
+pub(crate) fn compute_blob_offsets_inner(
+ store: &mut NodeStore,
+ early_exit_threshold: u64,
+) -> Result<(Vec, u64), Error> {
+ let n = store.len();
+ let mut reader = store.open_reader()?;
+ let header_size = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ let mut blob_offsets: Vec = Vec::with_capacity(n);
+ let mut current_offset = header_size;
+ let mut forward_ptr_count: usize = 0;
+
+ // Per-node byte lengths cached during Pass 1. For nodes without
+ // forward pointers the length is constant across fixpoint passes,
+ // so we can skip re-reading them from disk entirely.
+ let mut byte_lens: Vec = Vec::with_capacity(n);
+ // True when a node has forward pointers (must be re-read in fixpoint).
+ let mut has_forward_ptrs: Vec = Vec::with_capacity(n);
+
+ // Pass 1: compute offsets using original (array-index) pointer values.
+ for idx in 0..n {
+ blob_offsets.push(current_offset);
+ let node = store.read_node_with(&mut reader, idx)?;
+ let mut has_fwd = false;
+ if !node.is_leaf() {
+ for ptr in node.ptrs() {
+ if is_inline_child_ptr(ptr) {
+ forward_ptr_count = forward_ptr_count
+ .checked_add(1)
+ .ok_or(Error::OverflowError)?;
+ has_fwd = true;
+ }
+ }
+ }
+ has_forward_ptrs.push(has_fwd);
+ let byte_len = get_node_byte_len(&node) as u64;
+ byte_lens.push(byte_len);
+ current_offset += byte_len;
+ }
+
+ // If the blob fits in 4 GiB, no pointer will switch to u64 encoding.
+ if current_offset <= early_exit_threshold {
+ return Ok((blob_offsets, current_offset));
+ }
+
+ // Pass 2+: recompute with blob-offset pointer values until stable.
+ // Each forward pointer widens from u32 to u64 at most once, so
+ // `forward_ptr_count + 2` bounds convergence (same as dump_consume).
+ let max_passes = forward_ptr_count.saturating_add(2);
+ let mut converged = false;
+ for _ in 0..max_passes {
+ let prev_total = current_offset;
+ current_offset = header_size;
+
+ for idx in 0..n {
+ // Temporary mutable borrow - released at the semicolon so
+ // `remap_ptrs_to_blob_offsets` can borrow `blob_offsets` immutably.
+ *blob_offsets.get_mut(idx).ok_or_else(|| {
+ Error::CorruptionError("blob offset index out of bounds".into())
+ })? = current_offset;
+
+ let has_fwd = *has_forward_ptrs.get(idx).ok_or_else(|| {
+ Error::CorruptionError("has_forward_ptrs index out of bounds".into())
+ })?;
+ if has_fwd {
+ let mut node = store.read_node_with(&mut reader, idx)?;
+ remap_ptrs_to_blob_offsets(&mut node, &blob_offsets)?;
+ *byte_lens.get_mut(idx).ok_or_else(|| {
+ Error::CorruptionError("byte_lens index out of bounds".into())
+ })? = get_node_byte_len(&node) as u64;
+ }
+
+ current_offset += *byte_lens
+ .get(idx)
+ .ok_or_else(|| Error::CorruptionError("byte_lens index out of bounds".into()))?;
+ }
+
+ if current_offset == prev_total {
+ converged = true;
+ break;
+ }
+ }
+ if !converged {
+ return Err(Error::CorruptionError(format!(
+ "compute_blob_offsets layout did not converge after {max_passes} passes"
+ )));
+ }
+
+ Ok((blob_offsets, current_offset))
+}
+
+/// Stream the squash blob into an arbitrary `Write + Seek` sink.
+///
+/// Reads nodes one-at-a-time from the NodeStore temp file, converts
+/// array-index child pointers to byte offsets, and serializes directly
+/// into `sink`. No intermediate `Vec` is allocated for the full blob.
+///
+/// The blob is written starting at the sink's current position.
+/// All internal offsets (header, node pointers) are relative to the blob
+/// start, not to the absolute file position, so this works correctly when
+/// appending to a `.blobs` file that already contains data.
+///
+/// Returns the number of bytes written.
+pub(crate) fn stream_squash_blob(
+ store: &mut NodeStore,
+ parent_hash: &T,
+ blob_offsets: &[u64],
+ sink: &mut F,
+) -> Result {
+ let n = store.len();
+ let mut reader = store.open_reader()?;
+
+ // Record the base offset so all writes are relative to blob start.
+ let base = sink.stream_position().map_err(Error::IOError)?;
+
+ // Write header: parent block hash + zero identifier
+ sink.write_all(parent_hash.as_bytes())
+ .map_err(Error::IOError)?;
+ sink.seek(SeekFrom::Start(
+ base + BLOCK_HEADER_HASH_ENCODED_SIZE as u64,
+ ))
+ .map_err(Error::IOError)?;
+ sink.write_all(&0u32.to_le_bytes())
+ .map_err(Error::IOError)?;
+
+ for idx in 0..n {
+ let mut node = store.read_node_with(&mut reader, idx)?;
+ let hash = store.hash(idx);
+
+ // Convert array-index pointers to byte offsets (relative to blob start)
+ remap_ptrs_to_blob_offsets(&mut node, blob_offsets)?;
+
+ write_nodetype_bytes(sink, &node, hash)?;
+ }
+
+ let end = sink.stream_position().map_err(Error::IOError)?;
+ Ok(end - base)
+}
+
+/// Per-height block metadata: `(height, block_hash, root_hash)`.
+type BlockInfo = (u32, T, TrieHash);
+
+/// Reads root hashes from either an external `.blobs` file or from SQLite
+/// internal `marf_data.data` BLOB columns.
+enum BlobReader {
+ External(BufReader),
+ Internal(rusqlite::Connection),
+}
+
+impl BlobReader {
+ fn new(db_path: &str, external_blobs: bool) -> Result {
+ if external_blobs {
+ let blobs_path = format!("{db_path}.blobs");
+ let file = File::open(&blobs_path).map_err(Error::IOError)?;
+ Ok(BlobReader::External(BufReader::with_capacity(
+ 64 * 1024,
+ file,
+ )))
+ } else {
+ let conn = rusqlite::Connection::open_with_flags(
+ db_path,
+ rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
+ )?;
+ Ok(BlobReader::Internal(conn))
+ }
+ }
+
+ /// Read the root hash for a block.
+ ///
+ /// For `External`, seeks to `blob_offset + root_ptr_offset` in the `.blobs` file.
+ /// For `Internal`, opens the SQLite blob for `block_id` and seeks within it.
+ fn read_root_hash(&mut self, block_id: u32, blob_offset: u64) -> Result {
+ let root_ptr_offset = (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4;
+ let mut hash_bytes = [0u8; TRIEHASH_ENCODED_SIZE];
+ match self {
+ BlobReader::External(reader) => {
+ reader.seek(SeekFrom::Start(blob_offset + root_ptr_offset))?;
+ reader.read_exact(&mut hash_bytes)?;
+ }
+ BlobReader::Internal(conn) => {
+ let mut blob = conn.blob_open(
+ DatabaseName::Main,
+ "marf_data",
+ "data",
+ block_id.into(),
+ true, // readonly
+ )?;
+ blob.seek(SeekFrom::Start(root_ptr_offset))?;
+ blob.read_exact(&mut hash_bytes)?;
+ }
+ }
+ Ok(TrieHash(hash_bytes))
+ }
+}
+
+/// A `BlockMap` adapter for trie nodes that have no backpointer children.
+///
+/// After the remap pass all pointers in the squash blob are inline.
+/// `write_consensus_bytes` writes zeroed block hashes for non-backptr
+/// children and never queries the `BlockMap`, so every method here is
+/// unreachable.
+struct InlineOnlyBlockMap;
+
+impl BlockMap for InlineOnlyBlockMap {
+ type TrieId = StacksBlockId;
+
+ fn get_block_hash(&self, _id: u32) -> Result {
+ unreachable!("InlineOnlyBlockMap: no backpointers in squash trie")
+ }
+ fn get_block_hash_caching(&mut self, _id: u32) -> Result<&Self::TrieId, Error> {
+ unreachable!("InlineOnlyBlockMap: no backpointers in squash trie")
+ }
+ fn is_block_hash_cached(&self, _id: u32) -> bool {
+ false
+ }
+ fn get_block_id(&self, _bhh: &Self::TrieId) -> Result {
+ unreachable!("InlineOnlyBlockMap: no backpointers in squash trie")
+ }
+ fn get_block_id_caching(&mut self, _bhh: &Self::TrieId) -> Result {
+ unreachable!("InlineOnlyBlockMap: no backpointers in squash trie")
+ }
+}
+
+/// Compute the content hash of a `TrieNodeType` given pre-collected child hashes.
+///
+/// Equivalent to `bits::get_node_hash` but works on the `TrieNodeType` enum
+/// directly (which does not implement `ConsensusSerializable`).
+fn compute_node_hash(node: &TrieNodeType, child_hashes: &[TrieHash]) -> TrieHash {
+ let mut hasher = TrieHasher::new();
+ node.write_consensus_bytes(&mut InlineOnlyBlockMap, &mut hasher)
+ .expect("IO failure pushing to hasher");
+ for h in child_hashes {
+ hasher.update(h.as_ref());
+ }
+ TrieHash(hasher.finalize().into())
+}
+
+fn read_proc_status_kib(field: &str) -> Option {
+ let status = std::fs::read_to_string("/proc/self/status").ok()?;
+ let line = status.lines().find(|line| line.starts_with(field))?;
+ let mut parts = line.split_whitespace();
+ let _ = parts.next()?;
+ parts.next()?.parse::().ok()
+}
+
+fn log_memory_snapshot(stage: &str) {
+ let rss_kib = read_proc_status_kib("VmRSS:");
+ let hwm_kib = read_proc_status_kib("VmHWM:");
+
+ match (rss_kib, hwm_kib) {
+ (Some(rss), Some(hwm)) => info!(
+ "Squash memory ({stage}): VmRSS={} MiB, VmHWM={} MiB",
+ rss / 1024,
+ hwm / 1024
+ ),
+ (Some(rss), None) => info!("Squash memory ({stage}): VmRSS={} MiB", rss / 1024),
+ _ => info!("Squash memory ({stage}): unavailable"),
+ }
+}
+
+/// Key that stores the squashed root hash at the snapshot tip.
+pub const MARF_SQUASH_ROOT_KEY: &str = "__MARF_SQUASH_ROOT";
+/// Key that stores the snapshot height for a squashed MARF.
+pub const MARF_SQUASH_HEIGHT_KEY: &str = "__MARF_SQUASH_HEIGHT";
+/// Prefix for per-height root hashes preserved in squashed MARFs.
+/// Each key has the form `__MARF_SQUASHED_BLOCK_ROOT_HASH::`.
+pub const MARF_SQUASHED_BLOCK_ROOT_HASH_KEY: &str = "__MARF_SQUASHED_BLOCK_ROOT_HASH";
+
+/// Summary statistics from a squashing run.
+#[derive(Debug, Clone)]
+pub struct SquashStats {
+ /// Total number of nodes collected into the squashed MARF.
+ pub node_count: u64,
+}
+
+/// Summary statistics from a validation run.
+///
+/// The default validation checks:
+/// - Per-height root hashes stored in `marf_squash_archival_marf_roots` match the
+/// archival source (guarantees correct ancestor hash computation for the
+/// skip-list at blocks > H).
+/// - Squash metadata (`marf_squash_info`) is present and correct.
+/// - All historical `marf_data` entries share the tip block's blob offset.
+///
+/// When `full_leaf_scan` is enabled, the validator additionally walks every
+/// leaf in both MARFs and cross-checks them, which is O(leaf_count) and much
+/// slower but useful for debugging.
+#[derive(Debug, Clone)]
+pub struct SquashValidationStats {
+ // --- Fast-path (always populated) ---
+ /// Whether the squashed root key was found in the SQL metadata.
+ pub archival_root_present: bool,
+ /// Whether the stored archival root hash at the squash height
+ /// matches the source MARF's root hash at that height.
+ pub archival_root_matches: bool,
+ /// Per-height root hashes missing from the SQL table.
+ pub root_hash_missing: u64,
+ /// Per-height root hashes with mismatched values.
+ pub root_hash_mismatches: u64,
+ /// Number of historical `marf_data` entries that do NOT share the
+ /// tip block's blob offset (should be 0 for a correct squash).
+ pub blob_offset_mismatches: u64,
+ /// Whether the `squash_root_node_hash` was found in SQL metadata
+ /// (a `TrieHash::from_data(&[])` value counts as absent).
+ pub squash_node_hash_present: bool,
+ /// Whether the stored `squash_root_node_hash` matches the value
+ /// recomputed from the committed squash trie blob (DFS walk + bottom-up hash).
+ pub squash_node_hash_matches: bool,
+
+ // --- Full leaf scan (only populated when full_leaf_scan = true) ---
+ /// Total keys compared from the source MARF (0 when fast-only).
+ pub source_keys_checked: u64,
+ /// Total keys compared from the squashed MARF (0 when fast-only).
+ pub squashed_keys_checked: u64,
+ /// Keys present in source but missing in squashed (0 when fast-only).
+ pub missing_in_squashed: u64,
+ /// Keys present in squashed but missing in source (0 when fast-only).
+ pub missing_in_source: u64,
+ /// Keys present in both but with different values (0 when fast-only).
+ pub value_mismatches: u64,
+}
+
+impl SquashValidationStats {
+ /// Returns `true` if all validation checks passed.
+ pub fn is_valid(&self) -> bool {
+ let fast_valid = self.archival_root_present
+ && self.archival_root_matches
+ && self.squash_node_hash_present
+ && self.squash_node_hash_matches
+ && self.root_hash_missing == 0
+ && self.root_hash_mismatches == 0
+ && self.blob_offset_mismatches == 0;
+
+ // If a full leaf scan was performed (either direction checked any keys),
+ // also validate the leaf-level results.
+ let full_scan_performed = self.source_keys_checked > 0 || self.squashed_keys_checked > 0;
+ let leaf_valid = !full_scan_performed
+ || (self.missing_in_squashed == 0
+ && self.missing_in_source == 0
+ && self.value_mismatches == 0);
+
+ fast_valid && leaf_valid
+ }
+}
+
+/// Step 1: Build an in-memory block_map from all `marf_data` entries.
+fn collect_block_map(src: &MARF) -> Result, Error> {
+ let all_blocks = trie_sql::bulk_read_block_entries::(src.sqlite_conn())?;
+ Ok(all_blocks
+ .into_iter()
+ .map(|(id, bh, offset)| (bh, (id, offset)))
+ .collect())
+}
+
+/// Step 2: For each height 0..=H, resolve (block_hash, root_hash) via trie
+/// walk + direct blob seek.
+fn collect_per_height_metadata(
+ src: &mut MARF,
+ source_tip: &T,
+ block_map: &HashMap,
+ blob_reader: &mut BlobReader,
+ height: u32,
+ label: &str,
+) -> Result>, Error> {
+ let mut block_info: Vec> = Vec::with_capacity((height + 1) as usize);
+ let mut last_log = Instant::now();
+ let start = Instant::now();
+
+ for h in 0..=height {
+ let h_key = format!("{BLOCK_HEIGHT_TO_HASH_MAPPING_KEY}::{h}");
+ let val = src
+ .with_conn(|conn| MARF::::get_by_key(conn, source_tip, &h_key))?
+ .ok_or_else(|| {
+ Error::CorruptionError(format!("Missing height mapping for height {h}"))
+ })?;
+ let bh = T::from(val);
+
+ let &(block_id, blob_offset) = block_map.get(&bh).ok_or_else(|| {
+ Error::CorruptionError(format!(
+ "Missing block map entry for block hash at height {h}"
+ ))
+ })?;
+
+ let rh = blob_reader.read_root_hash(block_id, blob_offset)?;
+
+ block_info.push((h, bh, rh));
+
+ if last_log.elapsed().as_secs() >= 30 || (h > 0 && h % 100_000 == 0) {
+ info!(
+ "[{label}] [2/8] Build height index: {}/{} heights in {}",
+ h + 1,
+ height + 1,
+ fmt_duration(start.elapsed())
+ );
+ last_log = Instant::now();
+ }
+ }
+ info!(
+ "[{label}] [2/8] Build height index: {} heights in {}",
+ height + 1,
+ fmt_duration(start.elapsed())
+ );
+
+ Ok(block_info)
+}
+
+/// Step 4: Bulk-insert `marf_data` placeholder rows for blocks 0..H-1.
+///
+/// Returns a mapping from archival block_id to squashed block_id.
+fn insert_placeholder_blocks(
+ conn: &rusqlite::Connection,
+ block_info: &[BlockInfo],
+ block_at_height: &T,
+ block_map: &HashMap,
+ label: &str,
+) -> Result, Error> {
+ let start = Instant::now();
+ let mut archival_to_squashed: HashMap = HashMap::new();
+ let mut stmt = conn.prepare(PLACEHOLDER_INSERT_SQL)?;
+ for (h, bh, _) in block_info {
+ if bh == block_at_height {
+ continue;
+ }
+ let (archival_id, _) = block_map.get(bh).ok_or(Error::NotFoundError)?;
+ let empty_blob: &[u8] = &[];
+ let squashed_id: u32 = stmt
+ .insert(params![bh.to_string(), empty_blob, 0i64, 0i64])?
+ .try_into()
+ .expect("block_id overflow");
+ archival_to_squashed.insert(*archival_id, squashed_id);
+ if *h % 100_000 == 0 && *h > 0 {
+ info!(
+ "[{label}] [4/8] Register placeholder blocks: {h} of {} in {}",
+ block_info.len(),
+ fmt_duration(start.elapsed())
+ );
+ }
+ }
+ info!(
+ "[{label}] [4/8] Register placeholder blocks: {} entries in {}",
+ archival_to_squashed.len(),
+ fmt_duration(start.elapsed())
+ );
+ Ok(archival_to_squashed)
+}
+
+/// SQL used to insert an empty placeholder row into `marf_data`.
+const PLACEHOLDER_INSERT_SQL: &str =
+ "INSERT INTO marf_data (block_hash, data, unconfirmed, external_offset, external_length) \
+ VALUES (?1, ?2, 0, ?3, ?4)";
+
+/// Step 6: Write all squash SQL metadata in one transaction scope.
+fn persist_squash_metadata(
+ conn: &rusqlite::Connection,
+ block_info: &[BlockInfo],
+ source_root_hash: &TrieHash,
+ height: u32,
+) -> Result<(), Error> {
+ let start = Instant::now();
+ trie_sql::write_squash_info(conn, source_root_hash, height)?;
+ let mut stmt = conn.prepare(
+ "INSERT OR REPLACE INTO marf_squash_archival_marf_roots (height, marf_root_hash) VALUES (?1, ?2)",
+ )?;
+ let mut stmt_bh = conn.prepare(
+ "INSERT OR REPLACE INTO marf_squash_block_heights (block_hash, height) VALUES (?1, ?2)",
+ )?;
+ for (h, bh, rh) in block_info {
+ stmt.execute(params![*h as i64, rh.as_bytes().to_vec()])?;
+ stmt_bh.execute(params![bh.to_string(), *h as i64])?;
+ }
+ info!(
+ "Squash: wrote {} root hashes and block heights in {}",
+ block_info.len(),
+ fmt_duration(start.elapsed())
+ );
+ Ok(())
+}
+
+/// Post-commit: persist `squash_root_node_hash` and share blob offsets.
+fn finalize_shared_blob_offsets(
+ dst: &mut MARF,
+ block_at_height: &T,
+ squash_root_node_hash: &TrieHash,
+) -> Result {
+ // Persist squash_root_node_hash to SQL.
+ {
+ let conn = dst.sqlite_conn();
+ conn.execute_batch("BEGIN IMMEDIATE")
+ .map_err(|e| Error::CorruptionError(format!("BEGIN squash_root_node_hash: {e}")))?;
+ trie_sql::update_squash_root_node_hash(conn, squash_root_node_hash)?;
+ conn.execute_batch("COMMIT")
+ .map_err(|e| Error::CorruptionError(format!("COMMIT squash_root_node_hash: {e}")))?;
+ }
+
+ // Bulk-update placeholders to share the tip block's blob offset.
+ let start = Instant::now();
+ let conn = dst.sqlite_conn();
+ let bh_id = trie_sql::get_block_identifier(conn, block_at_height)?;
+ let (offset, length) = trie_sql::get_external_trie_offset_length(conn, bh_id)?;
+
+ conn.execute_batch("BEGIN IMMEDIATE")
+ .map_err(|e| Error::CorruptionError(format!("BEGIN: {e}")))?;
+ let updated = trie_sql::bulk_update_blob_offsets(conn, offset, length, block_at_height)?;
+ conn.execute_batch("COMMIT")
+ .map_err(|e| Error::CorruptionError(format!("COMMIT: {e}")))?;
+ info!(
+ "Squash: updated {} placeholder blob offsets in {}",
+ updated,
+ fmt_duration(start.elapsed())
+ );
+ Ok(updated)
+}
+
+impl MARF {
+ /// Squash the MARF at `height` into a new database at `dst_path`.
+ ///
+ /// Produces a hash-preserving squash: the squashed MARF contains a single
+ /// shared trie storage with all trie nodes reachable at `height`. Each historical
+ /// block (0..=height) has a `marf_data` row pointing at this shared trie storage so
+ /// that `get_block_hash_caching(local_id)` returns the correct original
+ /// `StacksBlockId`.
+ ///
+ /// Backpointer identity is preserved via `TriePtr.back_block` annotations.
+ /// Children that were backpointers in the archival MARF are stored inline in
+ /// the blob but with `back_block` set to the squashed DB's local_id for the
+ /// original block. When the squashed MARF is extended to height H+1,
+ /// `node_copy_update_ptrs` preserves these annotations, ensuring
+ /// that `inner_write_children_hashes` uses the same `StacksBlockId` values
+ /// as the archival MARF. This guarantees identical per-block root hashes.
+ pub fn squash_to_path(
+ src_path: &str,
+ dst_path: &str,
+ open_opts: MARFOpenOpts,
+ height: u32,
+ label: &str,
+ ) -> Result {
+ if open_opts.compress {
+ return Err(Error::CorruptionError(
+ "squash_to_path does not support compress=true; \
+ the direct blob write path only emits uncompressed nodes"
+ .to_string(),
+ ));
+ }
+
+ let overall_start = Instant::now();
+
+ // Step 1: bulk SQL block map
+ let src_storage = TrieFileStorage::open_readonly(src_path, open_opts.clone())?;
+ let mut src = MARF::from_storage(src_storage);
+
+ let tip = trie_sql::get_latest_confirmed_block_hash::(src.sqlite_conn())?;
+ let block_at_height = src
+ .get_block_at_height(height, &tip)?
+ .ok_or(Error::NotFoundError)?;
+
+ let start = Instant::now();
+ let block_map = collect_block_map(&src)?;
+ info!(
+ "[{label}] [1/8] Load block map: {} entries in {}",
+ block_map.len(),
+ fmt_duration(start.elapsed())
+ );
+
+ // [2/8] Build height index
+ info!(
+ "[{label}] [2/8] Build height index: reading {} heights...",
+ height + 1
+ );
+ let mut blob_reader = BlobReader::new(src_path, open_opts.external_blobs)?;
+ let block_info = collect_per_height_metadata(
+ &mut src,
+ &tip,
+ &block_map,
+ &mut blob_reader,
+ height,
+ label,
+ )?;
+
+ // [3/8] Collect trie nodes (DFS walk)
+ //
+ // Derive the temp directory from dst_path: use the parent directory.
+ let tmp_dir = std::path::Path::new(dst_path)
+ .parent()
+ .filter(|p| !p.as_os_str().is_empty())
+ .and_then(|p| p.to_str())
+ .unwrap_or(".");
+ log_memory_snapshot("before trie DFS");
+ info!("[{label}] [3/8] Collect trie nodes: starting DFS...");
+ let start = Instant::now();
+ let (mut node_store, source_to_idx) = src.with_conn(|conn| {
+ MARF::::collect_reachable_nodes(conn, &block_at_height, tmp_dir)
+ })?;
+ let node_count = node_store.len() as u64;
+ info!(
+ "[{label}] [3/8] Collect trie nodes: {node_count} nodes in {}",
+ fmt_duration(start.elapsed())
+ );
+ log_memory_snapshot("after trie DFS");
+
+ let mut dst_open_opts = open_opts.clone();
+ dst_open_opts.external_blobs = true;
+
+ // Open destination MARF and begin transaction
+ let mut dst = MARF::from_path(dst_path, dst_open_opts.clone())?;
+ let mut tx = dst.begin_tx()?;
+ tx.begin(&T::sentinel(), &block_at_height)?;
+
+ // [4/8] Register placeholder blocks
+ let mut archival_to_squashed = insert_placeholder_blocks(
+ tx.sqlite_tx(),
+ &block_info,
+ &block_at_height,
+ &block_map,
+ label,
+ )?;
+
+ // Build `block_id_map`: every archival `block_id` that appears
+ // as a node origin in the DFS must be mappable. insert_placeholder_blocks
+ // covers heights 0..H-1 but skips block_at_height and sentinel.
+ // Add them explicitly so `remap_child_ptrs` can resolve all children.
+ //
+ // Sentinel: flushed to marf_data by tx.begin() -> flush().
+ let sentinel = T::sentinel();
+ if let Some((archival_sentinel_id, _)) = block_map.get(&sentinel) {
+ let squashed_sentinel_id: u32 = tx.sqlite_tx().query_row(
+ "SELECT block_id FROM marf_data WHERE block_hash = ?1",
+ rusqlite::params![sentinel.to_string()],
+ |row| row.get(0),
+ )?;
+ archival_to_squashed.insert(*archival_sentinel_id, squashed_sentinel_id);
+ }
+
+ // block_at_height: not yet in the destination `marf_data` (only in
+ // `block_extension_locks`). Insert an empty placeholder now to get a
+ // real `block_id`. Step [7/8] will UPDATE this row instead of
+ // inserting a new one via `update_external_trie_blob`.
+ let squashed_tip_placeholder_id = {
+ let (archival_tip_id, _) = block_map
+ .get(&block_at_height)
+ .ok_or(Error::NotFoundError)?;
+ let empty_blob: &[u8] = &[];
+ let placeholder_id = tx
+ .sqlite_tx()
+ .prepare(PLACEHOLDER_INSERT_SQL)?
+ .insert(params![block_at_height.to_string(), empty_blob, 0i64, 0i64])?
+ .try_into()
+ .expect("block_id overflow");
+ archival_to_squashed.insert(*archival_tip_id, placeholder_id);
+ placeholder_id
+ };
+ drop(block_map);
+
+ // [5/8] Remap trie pointers (disk-backed)
+ log_memory_snapshot("before pointer remap");
+ info!("[{label}] [5/8] Remap trie pointers: {node_count} nodes...");
+ let start = Instant::now();
+ remap_child_ptrs(
+ &mut node_store,
+ &source_to_idx,
+ Some(&archival_to_squashed),
+ label,
+ )?;
+ info!(
+ "[{label}] [5/8] Remap trie pointers: {node_count} nodes in {}",
+ fmt_duration(start.elapsed())
+ );
+ drop(source_to_idx);
+ drop(archival_to_squashed);
+ node_store.drop_block_ids(); // free ~200 MB
+ log_memory_snapshot("after pointer remap");
+
+ // [6/8] Recompute node hashes (disk-backed)
+ log_memory_snapshot("before hash recompute");
+ info!("[{label}] [6/8] Recompute node hashes: {node_count} nodes...");
+ let start = Instant::now();
+ recompute_content_hashes(&mut node_store)?;
+ info!(
+ "[{label}] [6/8] Recompute node hashes: {node_count} nodes in {}",
+ fmt_duration(start.elapsed())
+ );
+ log_memory_snapshot("after hash recompute");
+
+ let squash_root_node_hash = if node_store.len() > 0 {
+ node_store.hash(0)
+ } else {
+ return Err(Error::CorruptionError(
+ "No nodes in squash trie".to_string(),
+ ));
+ };
+
+ // [7/8] Write trie blob (compute offsets + stream to destination)
+ log_memory_snapshot("before blob write");
+ info!("[{label}] [7/8] Write trie blob: {node_count} nodes...");
+ let start = Instant::now();
+ let parent_hash = T::sentinel();
+
+ let (blob_offsets, total_blob_size) = compute_blob_offsets(&mut node_store)?;
+ // Destination squash MARFs always use external blobs.
+ let block_id = tx.storage.with_trie_blobs(|db, blobs| {
+ let Some(trie_file) = blobs else {
+ return Err(Error::CorruptionError(
+ "squash destination requires external .blobs file but handle is unavailable"
+ .to_string(),
+ ));
+ };
+ let offset = trie_sql::get_external_blobs_length(db)?;
+ trie_file
+ .seek(SeekFrom::Start(offset))
+ .map_err(Error::IOError)?;
+ // buffer size is 1 MiB, completely arbitrary.
+ let mut buf_writer = BufWriter::with_capacity(1 << 20, trie_file);
+ stream_squash_blob(
+ &mut node_store,
+ &parent_hash,
+ &blob_offsets,
+ &mut buf_writer,
+ )?;
+ buf_writer.flush().map_err(Error::IOError)?;
+ let trie_file = buf_writer.into_inner().map_err(|e| {
+ Error::IOError(std::io::Error::other(format!(
+ "failed to flush BufWriter: {e}"
+ )))
+ })?;
+ trie_file.flush().map_err(Error::IOError)?;
+ trie_file.sync_data().map_err(Error::IOError)?;
+ trie_sql::update_external_trie_blob(
+ db,
+ &block_at_height,
+ offset,
+ total_blob_size,
+ squashed_tip_placeholder_id,
+ )
+ })?;
+ info!(
+ "[{label}] [7/8] Write trie blob: block_id={block_id}, {total_blob_size} bytes in {}",
+ fmt_duration(start.elapsed())
+ );
+ drop(blob_offsets);
+ drop(node_store); // free temp file + metadata
+ log_memory_snapshot("after blob write");
+
+ // [8/8] Persist metadata & commit
+ let step8_start = Instant::now();
+ let source_root_hash = block_info
+ .iter()
+ .find(|(_, bh, _)| bh == &block_at_height)
+ .map(|(_, _, rh)| *rh)
+ .ok_or(Error::NotFoundError)?;
+ persist_squash_metadata(tx.sqlite_tx(), &block_info, &source_root_hash, height)?;
+ info!("[{label}] Squash root hash: {squash_root_node_hash}");
+
+ tx.set_squash_info(Some(SquashInfo {
+ archival_marf_root_hash: source_root_hash,
+ squash_root_node_hash,
+ height,
+ }));
+
+ // Commit the SQL transaction without flushing TrieRAM (we already wrote the blob directly)
+ tx.commit_squash()?;
+
+ // Post-commit: share blob offsets across placeholder blocks
+ finalize_shared_blob_offsets(&mut dst, &block_at_height, &squash_root_node_hash)?;
+
+ info!(
+ "[{label}] [8/8] Persist metadata & commit: finished in {}",
+ fmt_duration(step8_start.elapsed())
+ );
+
+ info!(
+ "[{label}] Squash complete: {node_count} nodes, total time {}",
+ fmt_duration(overall_start.elapsed())
+ );
+
+ Ok(SquashStats { node_count })
+ }
+
+ /// DFS collection pass: gather all trie nodes reachable from `block_hash`.
+ ///
+ /// Uses a disk-backed `NodeStore` to avoid holding ~50M full node objects
+ /// in memory (~20 GB). Only lightweight metadata (hashes, block_ids,
+ /// file offsets) is kept in RAM (~4 GB).
+ ///
+ /// Uses iterative DFS instead of BFS. The DFS stack holds at most
+ /// `trie_height` frames (~32), each with one node's child pointer list.
+ /// Total stack memory is ~128 KB, compared to the BFS frontier which
+ /// could hold millions of entries (~GBs) for wide, hash-distributed tries.
+ ///
+ /// Nodes are pushed in DFS preorder (parent before children), which is
+ /// all the remap and hash-recompute passes require.
+ ///
+ /// Returns:
+ /// - `node_store`: disk-backed node data + in-memory metadata.
+ /// - `source_to_idx`: `(source_block_id, byte_offset) -> node index` map
+ /// needed by the remap pass.
+ fn collect_reachable_nodes(
+ source: &mut TrieStorageConnection,
+ block_hash: &T,
+ tmp_dir: &str,
+ ) -> Result<(NodeStore, HashMap<(u32, u64), usize>), Error> {
+ source.open_block(block_hash)?;
+ let (root_node, root_hash) = Trie::read_root(source)?;
+ let root_block_id = source.get_cur_block_identifier()?;
+
+ let mut store = NodeStore::new(tmp_dir)?;
+ let mut source_to_idx: HashMap<(u32, u64), usize> = HashMap::new();
+
+ let root_disk_ptr = TrieStorageConnection::::root_ptr_disk();
+ source_to_idx.insert((root_block_id, root_disk_ptr), 0);
+
+ let root_is_leaf = root_node.is_leaf();
+ let root_ptrs: Vec = if root_is_leaf {
+ vec![]
+ } else {
+ root_node.ptrs().to_vec()
+ };
+ store.push(&root_node, root_hash, root_block_id)?;
+
+ // DFS stack frame: holds remaining child pointers for one node.
+ // Stack depth is bounded by trie height (~32), so total memory is
+ // ~32 * max_ptrs * sizeof(TriePtr) ≈ 128 KB - negligible.
+ struct DfsFrame {
+ origin_block_id: u32,
+ child_ptrs: Vec,
+ next_child: usize,
+ }
+
+ let mut stack: Vec = Vec::new();
+ if !root_is_leaf {
+ stack.push(DfsFrame {
+ origin_block_id: root_block_id,
+ child_ptrs: root_ptrs,
+ next_child: 0,
+ });
+ }
+
+ let dfs_start = Instant::now();
+ let mut nodes_collected: u64 = 1; // root already counted
+ let mut last_log = Instant::now();
+
+ while !stack.is_empty() {
+ let stack_depth = stack.len();
+ let frame = stack.last_mut().expect("stack is non-empty");
+ // Scan this frame's remaining children for the next one to descend into.
+ let mut descend_frame: Option = None;
+
+ while frame.next_child < frame.child_ptrs.len() {
+ let ptr = *frame
+ .child_ptrs
+ .get(frame.next_child)
+ .expect("BUG: next_child within bounds");
+ frame.next_child += 1;
+
+ if ptr.id() == TrieNodeID::Empty as u8 {
+ continue;
+ }
+
+ let (child_block_id, read_ptr) = if is_backptr(ptr.id()) {
+ (ptr.back_block(), ptr.from_backptr())
+ } else {
+ (frame.origin_block_id, ptr)
+ };
+
+ let source_key = (child_block_id, read_ptr.ptr());
+ if source_to_idx.contains_key(&source_key) {
+ continue;
+ }
+
+ let child_bh = source.get_block_from_local_id(child_block_id)?.clone();
+ source.open_block_maybe_id(&child_bh, Some(child_block_id))?;
+ let (child_node, child_hash) = source.read_nodetype(&read_ptr)?;
+
+ let child_is_leaf = child_node.is_leaf();
+ let child_ptrs_vec: Vec = if child_is_leaf {
+ vec![]
+ } else {
+ child_node.ptrs().to_vec()
+ };
+
+ source_to_idx.insert(source_key, store.len());
+ store.push(&child_node, child_hash, child_block_id)?;
+
+ nodes_collected += 1;
+ if last_log.elapsed().as_secs() >= 30 || nodes_collected % 1_000_000 == 0 {
+ info!(
+ "Trie DFS: {nodes_collected} nodes, stack depth {stack_depth}, {} elapsed",
+ fmt_duration(dfs_start.elapsed())
+ );
+ last_log = Instant::now();
+ }
+
+ // If internal node, descend into it (push frame and break).
+ // If leaf, continue scanning siblings.
+ if !child_is_leaf {
+ descend_frame = Some(DfsFrame {
+ origin_block_id: child_block_id,
+ child_ptrs: child_ptrs_vec,
+ next_child: 0,
+ });
+ break;
+ }
+ }
+
+ match descend_frame {
+ Some(new_frame) => stack.push(new_frame),
+ None => {
+ // All children of this frame processed, backtrack.
+ stack.pop();
+ }
+ }
+ }
+
+ store.finish_writing()?;
+
+ info!(
+ "Trie DFS: {} nodes in {}",
+ store.len(),
+ fmt_duration(dfs_start.elapsed())
+ );
+
+ Ok((store, source_to_idx))
+ }
+}
diff --git a/stackslib/src/chainstate/stacks/index/storage.rs b/stackslib/src/chainstate/stacks/index/storage.rs
index cab8b0c1ab8..6b282cb4392 100644
--- a/stackslib/src/chainstate/stacks/index/storage.rs
+++ b/stackslib/src/chainstate/stacks/index/storage.rs
@@ -256,7 +256,7 @@ impl UncommittedState {
/// Panics of the UncommittedState is sealed already.
pub fn write_nodetype(
&mut self,
- node_array_ptr: u32,
+ node_array_ptr: u64,
node: &TrieNodeType,
hash: TrieHash,
) -> Result<(), Error> {
@@ -284,7 +284,7 @@ impl UncommittedState {
}
/// Get the last pointer (i.e. last slot) of the TrieRAM
- pub fn last_ptr(&mut self) -> Result {
+ pub fn last_ptr(&mut self) -> Result {
self.trie_ram_mut().last_ptr()
}
@@ -379,6 +379,12 @@ impl UncommittedState {
/// In-RAM trie storage.
/// Used by TrieFileStorage to buffer the next trie being built.
+///
+/// Pointers in `TrieRAM` are index-based, not disk-offset-based:
+/// `TriePtr::ptr()` is treated as an in-memory node index into `data`, and
+/// traversal/indexing paths are intentionally bounded to `u32`.
+/// Large `u64` byte offsets are only materialized when serializing this trie
+/// to persistent storage (see `dump_consume`/`write_trie_indirect`).
#[derive(Clone)]
pub struct TrieRAM {
data: Vec<(TrieNodeType, TrieHash)>,
@@ -626,7 +632,7 @@ impl TrieRAM {
f: &mut F,
node_data_order: &[u32],
node_data: &[(TrieNodeType, TrieHash)],
- offsets: &[u32],
+ offsets: &[u64],
parent_hash: &T,
) -> Result<(), Error> {
assert_eq!(node_data_order.len(), offsets.len());
@@ -652,7 +658,7 @@ impl TrieRAM {
let next_offset = *offsets.get(ix).ok_or_else(|| {
Error::CorruptionError("node_data_order.len() != offsets.len()".into())
})?;
- f.seek(SeekFrom::Start(next_offset.into()))?;
+ f.seek(SeekFrom::Start(next_offset))?;
}
Ok(())
@@ -668,7 +674,7 @@ impl TrieRAM {
f: &mut F,
node_data_order: &[DumpPtr],
node_data: &[(TrieNodeType, TrieHash)],
- offsets: &[u32],
+ offsets: &[u64],
parent_hash: &T,
) -> Result<(), Error> {
assert_eq!(node_data_order.len(), offsets.len());
@@ -711,7 +717,7 @@ impl TrieRAM {
let next_offset = *offsets.get(ix).ok_or_else(|| {
Error::CorruptionError("node_data_order.len() != offsets.len()".into())
})?;
- f.seek(SeekFrom::Start(u64::from(next_offset)))?;
+ f.seek(SeekFrom::Start(next_offset))?;
}
Ok(())
@@ -818,11 +824,11 @@ impl TrieRAM {
fn calculate_node_hashes(
&mut self,
storage_tx: &mut TrieStorageTransaction,
- node_ptr: u64,
+ node_ptr: u32, // in-memory index is always a u32
) -> Result {
let start_time = storage_tx.bench.write_children_hashes_start();
let mut start_node_time = Some(storage_tx.bench.write_children_hashes_same_block_start());
- let (node, node_hash) = self.get_nodetype(node_ptr as u32)?.to_owned();
+ let (node, node_hash) = self.get_nodetype(node_ptr)?.to_owned();
if node.is_leaf() {
// base case: we already have the hash of the leaf, so return it.
Ok(node_hash)
@@ -863,7 +869,7 @@ impl TrieRAM {
.write_children_hashes_empty_finish(start_time);
} else if !is_backptr(ptr.id()) {
// hash is the hash of this node's children
- let node_hash = self.calculate_node_hashes(storage_tx, ptr.ptr() as u64)?;
+ let node_hash = self.calculate_node_hashes(storage_tx, ptr.ptr_as_u32()?)?;
// count the time taken to store the hash towards the
// write_children_hashes_same_benchmark
@@ -882,7 +888,7 @@ impl TrieRAM {
&& ptr.id() != TrieNodeID::Leaf as u8
{
// need to store this hash too, since we deferred calculation
- self.write_node_hash(ptr.ptr(), node_hash)?;
+ self.write_node_hash(ptr.ptr_as_u32()?, node_hash)?;
}
storage_tx
@@ -930,63 +936,119 @@ impl TrieRAM {
/// Walk through the buffered TrieNodes and dump them to f.
/// This consumes this TrieRAM instance.
- fn dump_consume(mut self, f: &mut F) -> Result {
- // step 1: write out each node in breadth-first order to get their ptr offsets
+ pub(crate) fn dump_consume(mut self, f: &mut F) -> Result {
+ // step 1: determine breadth-first node order
let mut frontier: VecDeque = VecDeque::new();
-
let mut node_data = vec![];
- let mut offsets = vec![];
+ let mut forward_ptr_count = 0usize;
+ // True when a node has forward pointers whose encoding may widen.
+ let mut has_forward_ptrs = vec![];
let start = TriePtr::new(TrieNodeID::Node256 as u8, 0, 0).ptr();
- frontier.push_back(start);
-
- // first 32 bytes is reserved for the parent block hash
- // next 4 bytes is the local block identifier
- let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ frontier.push_back(
+ u32::try_from(start)
+ .map_err(|_| Error::CorruptionError("Root pointer exceeds u32::MAX".into()))?,
+ );
while let Some(pointer) = frontier.pop_front() {
let (node, _node_hash) = self.get_nodetype(pointer)?;
- // calculate size
- let num_written = get_node_byte_len(node);
- ptr += num_written as u64;
// queue each child
+ let mut has_fwd = false;
if !node.is_leaf() {
for ptr in node.ptrs().iter() {
if !ptr.is_empty() && !is_backptr(ptr.id) {
- frontier.push_back(ptr.ptr());
+ let idx = ptr.ptr_as_u32()?;
+ frontier.push_back(idx);
+ forward_ptr_count = forward_ptr_count
+ .checked_add(1)
+ .ok_or_else(|| Error::OverflowError)?;
+ has_fwd = true;
}
}
}
+ has_forward_ptrs.push(has_fwd);
node_data.push(pointer);
- offsets.push(ptr as u32);
}
- assert_eq!(offsets.len(), node_data.len());
+ // step 2: repeatedly lay out nodes until serialized offsets stabilize
+ // The first 32 bytes are reserved for the parent block hash,
+ // and the next 4 bytes for the local block identifier.
+ let mut end_offset = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ let mut offsets = Vec::with_capacity(node_data.len());
+ // Cached byte lengths: nodes without forward pointers have constant
+ // sizes across passes, so we only recompute nodes with forward ptrs.
+ let mut byte_lens = node_data
+ .iter()
+ .map(|p| {
+ let (node, _) = self.get_nodetype(*p)?;
+ u64::try_from(get_node_byte_len(node)).map_err(|_| Error::OverflowError)
+ })
+ .collect::, Error>>()?;
+ // The first pass replaces in-memory indices with serialized offsets.
+ // Afterwards, each mutable child pointer can widen from u32 to u64 at most once.
+ // A pass that changes offsets without introducing any new wide pointers is the final
+ // settling pass, so `forward_ptr_count + 2` bounds convergence.
+ let max_layout_passes = forward_ptr_count.saturating_add(2);
+ let mut converged = false;
+ for _ in 0..max_layout_passes {
+ offsets.clear();
+ let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ for ((&pointer, &has_fwd), blen) in node_data
+ .iter()
+ .zip(has_forward_ptrs.iter())
+ .zip(byte_lens.iter_mut())
+ {
+ if has_fwd {
+ let (node, _) = self.get_nodetype(pointer)?;
+ *blen =
+ u64::try_from(get_node_byte_len(node)).map_err(|_| Error::OverflowError)?;
+ }
+ ptr += *blen;
+ offsets.push(ptr);
+ }
+ end_offset = ptr;
- // step 2: update ptrs in all nodes
- let mut i = 0;
- for node_data_ptr in node_data.iter() {
- let next_node = &mut self
- .data
- .get_mut(*node_data_ptr as usize)
- .ok_or_else(|| Error::CorruptionError("Miscalculated dump_consume pointer".into()))?
- .0;
- if !next_node.is_leaf() {
+ let mut changed = false;
+ let mut i = 0;
+ for (&node_data_ptr, &has_fwd) in node_data.iter().zip(has_forward_ptrs.iter()) {
+ if !has_fwd {
+ continue;
+ }
+ let next_node = &mut self
+ .data
+ .get_mut(usize::try_from(node_data_ptr).map_err(|_| Error::OverflowError)?)
+ .ok_or_else(|| {
+ Error::CorruptionError("Miscalculated dump_consume pointer".into())
+ })?
+ .0;
let ptrs = next_node.ptrs_mut();
for ptr in ptrs.iter_mut() {
if !ptr.is_empty() && !is_backptr(ptr.id) {
- ptr.ptr = *offsets.get(i).ok_or_else(|| {
+ let next_offset = *offsets.get(i).ok_or_else(|| {
Error::CorruptionError("Miscalculated dump_consume offsets".into())
})?;
+ if ptr.ptr != next_offset {
+ ptr.ptr = next_offset;
+ changed = true;
+ }
i += 1;
}
}
}
+ if !changed {
+ converged = true;
+ break;
+ }
+ }
+ if !converged {
+ return Err(Error::CorruptionError(format!(
+ "dump_consume layout did not converge after {max_layout_passes} passes"
+ )));
}
- // step 3: write out each node (now that they have the write ptrs)
+ // step 3: write out each node (now that they have stable write ptrs)
TrieRAM::write_trie_indirect(
f,
&node_data,
@@ -995,7 +1057,7 @@ impl TrieRAM {
&self.parent,
)?;
- Ok(ptr)
+ Ok(end_offset)
}
fn make_node_patch(
@@ -1078,23 +1140,23 @@ impl TrieRAM {
///
/// Returns Ok(len) to report number of bytes written
/// Returns Err(..) if we fail to write
- fn dump_compressed_consume(
+ pub(crate) fn dump_compressed_consume(
mut self,
storage_tx: &mut TrieStorageTransaction,
f: &mut F,
) -> Result {
- // step 1: write out each node in breadth-first order to get their ptr offsets
+ // step 1: determine breadth-first node order and any patch payloads
let mut frontier: VecDeque = VecDeque::new();
let mut node_data = vec![];
- let mut offsets = vec![];
+ let mut forward_ptr_count = 0usize;
+ let mut has_forward_ptrs = vec![];
let start = TriePtr::new(TrieNodeID::Node256 as u8, 0, 0).ptr();
- frontier.push_back(start);
-
- // first 32 bytes is reserved for the parent block hash
- // next 4 bytes is the local block identifier
- let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ frontier.push_back(
+ u32::try_from(start)
+ .map_err(|_| Error::CorruptionError("Root pointer exceeds u32::MAX".into()))?,
+ );
while let Some(pointer) = frontier.pop_front() {
let (node, node_hash) = self.get_nodetype(pointer)?;
@@ -1146,97 +1208,160 @@ impl TrieRAM {
None
};
- // calculate size
if let Some((_, patch_node)) = patch_node_opt.as_ref() {
- // IMPROVEMENT: don't store a copy of a node that was copied forward via
- // MARF::walk_cow(). Instead, store only the new ptrs in the copied node, and store
- // a pointer to the original node in the ancestral trie.
- // TRIEHASH_ENCODED_SIZE accounts for the trie hash bytes written before the patch
- trace!(
- "Patch node {:?} for {:?} to be written at {}",
- &patch_node,
- &node,
- ptr
- );
- let num_written = TRIEHASH_ENCODED_SIZE + patch_node.size();
- ptr += num_written as u64;
-
- let mut num_new_nodes = 0;
- if !node.is_leaf() {
- for ptr in node.ptrs().iter() {
- if !ptr.is_empty() && !is_backptr(ptr.id) {
- num_new_nodes += 1;
- }
- }
- }
- assert_eq!(num_new_nodes, patch_node.ptr_diff.len());
- } else {
- // IMPROVEMENT: don't store backptr block ID if it's 0
- trace!("Normal node {:?} to be written at {}", &node, ptr);
- let num_written = get_node_byte_len_compressed(node);
- ptr += num_written as u64;
+ // The BFS frontier and the convergence loop must visit the
+ // exact same forward children in the same order. Compare the
+ // chr() sequence of forward pointers in the full node against
+ // the patch diff to guarantee this.
+ let node_forward = node
+ .ptrs()
+ .iter()
+ .filter(|p| !p.is_empty() && !is_backptr(p.id))
+ .map(|p| p.chr());
+ let diff_forward = patch_node
+ .ptr_diff
+ .iter()
+ .filter(|p| !p.is_empty() && !is_backptr(p.id))
+ .map(|p| p.chr());
+ assert!(node_forward.eq(diff_forward));
}
// queue each child
+ let mut has_fwd = false;
if !node.is_leaf() {
for ptr in node.ptrs().iter() {
if !ptr.is_empty() && !is_backptr(ptr.id) {
- frontier.push_back(ptr.ptr());
+ let idx = u32::try_from(ptr.ptr()).map_err(|_| {
+ Error::CorruptionError(format!(
+ "In-memory node index {} exceeds u32::MAX",
+ ptr.ptr()
+ ))
+ })?;
+ frontier.push_back(idx);
+ forward_ptr_count = forward_ptr_count
+ .checked_add(1)
+ .ok_or_else(|| Error::OverflowError)?;
+ has_fwd = true;
}
}
}
+ // Nodes with forward ptrs need re-measurement each layout
+ // pass because child offsets can widen from u32 to u64.
if let Some((hash_bytes, patch)) = patch_node_opt.take() {
node_data.push(DumpPtr::Patch(pointer, hash_bytes, patch));
} else {
node_data.push(DumpPtr::Normal(pointer));
}
- offsets.push(ptr as u32);
+ has_forward_ptrs.push(has_fwd);
}
- assert_eq!(offsets.len(), node_data.len());
+ // step 2: repeatedly lay out nodes until serialized offsets stabilize
+ // The first 32 bytes are reserved for the parent block hash,
+ // and the next 4 bytes for the local block identifier.
+ let mut end_offset = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ let mut offsets = vec![];
+ // Cached byte lengths: leaf / pointer-free / patch node sizes are
+ // constant across passes, so we only recompute non-leaf nodes.
+ let mut byte_lens = node_data
+ .iter()
+ .map(|dp| {
+ let byte_len = if let Some(patch) = dp.patch() {
+ TRIEHASH_ENCODED_SIZE + patch.size()
+ } else {
+ let (node, _) = self.get_nodetype(dp.ptr())?;
+ get_node_byte_len_compressed(node)
+ };
+ u64::try_from(byte_len).map_err(|_| Error::OverflowError)
+ })
+ .collect::, Error>>()?;
+ // The first pass replaces in-memory indices with serialized offsets.
+ // Afterwards, each mutable child pointer can widen from u32 to u64 at most once.
+ // A pass that changes offsets without introducing any new wide pointers is the final
+ // settling pass, so `forward_ptr_count + 2` bounds convergence.
+ let max_layout_passes = forward_ptr_count.saturating_add(2);
+ let mut converged = false;
+ for _pass in 0..max_layout_passes {
+ offsets.clear();
+ let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
+ for (node_data_ptr, (&has_fwd, blen)) in node_data
+ .iter()
+ .zip(has_forward_ptrs.iter().zip(byte_lens.iter_mut()))
+ {
+ if has_fwd {
+ let new_len = if let Some(patch) = node_data_ptr.patch() {
+ TRIEHASH_ENCODED_SIZE + patch.size()
+ } else {
+ let (node, _) = self.get_nodetype(node_data_ptr.ptr())?;
+ get_node_byte_len_compressed(node)
+ };
+ *blen = u64::try_from(new_len).map_err(|_| Error::OverflowError)?;
+ }
+ ptr += *blen;
+ offsets.push(ptr);
+ }
+ end_offset = ptr;
- // step 2: update ptrs in all nodes
- let mut i = 0;
- for node_data_ptr in node_data.iter_mut() {
- if let Some(patch) = node_data_ptr.patch_mut() {
- for ptr in patch.ptr_diff.iter_mut() {
- if !ptr.is_empty() && !is_backptr(ptr.id) {
- ptr.ptr = *offsets.get(i).ok_or_else(|| {
- Error::CorruptionError(
- "Miscalculated dump_compressed_consume offsets".into(),
- )
- })?;
- i += 1;
+ let mut changed = false;
+ let mut i = 0;
+ for (node_data_ptr, &has_fwd) in node_data.iter_mut().zip(has_forward_ptrs.iter()) {
+ if let Some(patch) = node_data_ptr.patch_mut() {
+ for ptr in patch.ptr_diff.iter_mut() {
+ if !ptr.is_empty() && !is_backptr(ptr.id) {
+ let next_offset = *offsets.get(i).ok_or_else(|| {
+ Error::CorruptionError(
+ "Miscalculated dump_compressed_consume offsets".into(),
+ )
+ })?;
+ if ptr.ptr != next_offset {
+ ptr.ptr = next_offset;
+ changed = true;
+ }
+ i += 1;
+ }
}
- }
- } else {
- let next_node = &mut self
- .data
- .get_mut(node_data_ptr.ptr() as usize)
- .ok_or_else(|| {
- Error::CorruptionError(
- "Miscalculated dump_compressed_consume pointer".into(),
+ } else if has_fwd {
+ let next_node = &mut self
+ .data
+ .get_mut(
+ usize::try_from(node_data_ptr.ptr())
+ .map_err(|_| Error::OverflowError)?,
)
- })?
- .0;
- if !next_node.is_leaf() {
+ .ok_or_else(|| {
+ Error::CorruptionError(
+ "Miscalculated dump_compressed_consume pointer".into(),
+ )
+ })?
+ .0;
let ptrs = next_node.ptrs_mut();
for ptr in ptrs.iter_mut() {
if !ptr.is_empty() && !is_backptr(ptr.id) {
- ptr.ptr = *offsets.get(i).ok_or_else(|| {
+ let next_offset = *offsets.get(i).ok_or_else(|| {
Error::CorruptionError(
"Miscalculated dump_compressed_consume offsets".into(),
)
})?;
+ if ptr.ptr != next_offset {
+ ptr.ptr = next_offset;
+ changed = true;
+ }
i += 1;
}
}
}
}
+ if !changed {
+ converged = true;
+ break;
+ }
+ }
+ if !converged {
+ return Err(Error::CorruptionError(format!(
+ "dump_compressed_consume layout did not converge after {max_layout_passes} passes"
+ )));
}
- // step 3: write out each node (now that they have the write ptrs)
+ // step 3: write out each node (now that they have stable write ptrs)
TrieRAM::write_trie_indirect_compressed(
f,
&node_data,
@@ -1245,14 +1370,14 @@ impl TrieRAM {
&self.parent,
)?;
- Ok(ptr)
+ Ok(end_offset)
}
/// load the trie from F.
/// The trie will have the same structure as the on-disk trie, but it may have nodes in a
/// different order.
pub fn load(f: &mut F, bhh: &T) -> Result, Error> {
- let mut data = vec![];
+ let mut data: Vec<(TrieNodeType, TrieHash)> = vec![];
let mut frontier = VecDeque::new();
// read parent
@@ -1262,7 +1387,7 @@ impl TrieRAM {
let root_disk_ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4;
- let root_ptr = TriePtr::new(TrieNodeID::Node256 as u8, 0, root_disk_ptr as u32);
+ let root_ptr = TriePtr::new(TrieNodeID::Node256 as u8, 0, root_disk_ptr);
let (mut root_node, root_hash) = read_nodetype(f, &root_ptr)
.inspect_err(|e| error!("Failed to read root node info for {bhh:?}: {e:?}"))?;
@@ -1343,7 +1468,8 @@ impl TrieRAM {
/// Read a node's hash from the TrieRAM. ptr.ptr() is an array index.
pub fn read_node_hash(&self, ptr: &TriePtr) -> Result {
- let (_, node_trie_hash) = self.data.get(ptr.ptr() as usize).ok_or_else(|| {
+ let idx = ptr.ptr_as_usize()?;
+ let (_, node_trie_hash) = self.data.get(idx).ok_or_else(|| {
error!(
"TrieRAM: Failed to read node bytes: {} >= {}",
ptr.ptr(),
@@ -1388,7 +1514,8 @@ impl TrieRAM {
self.read_node_count += 1;
}
- if let Some(node) = self.data.get(ptr.ptr() as usize) {
+ let idx = ptr.ptr_as_usize()?;
+ if let Some(node) = self.data.get(idx) {
Ok(node.clone())
} else {
error!(
@@ -1405,7 +1532,7 @@ impl TrieRAM {
/// Store a node and its hash to the TrieRAM at the given slot.
pub fn write_nodetype(
&mut self,
- node_array_ptr: u32,
+ node_array_ptr: u64,
node: &TrieNodeType,
hash: TrieHash,
) -> Result<(), Error> {
@@ -1432,10 +1559,13 @@ impl TrieRAM {
}
}
- if let Some(existing_node) = self.data.get_mut(node_array_ptr as usize) {
+ let node_index = usize::try_from(node_array_ptr).map_err(|_| Error::NotFoundError)?;
+ if let Some(existing_node) = self.data.get_mut(node_index) {
*existing_node = (node.clone(), hash);
Ok(())
- } else if node_array_ptr == (self.data.len() as u32) {
+ } else if node_array_ptr
+ == u64::try_from(self.data.len()).map_err(|_| Error::OverflowError)?
+ {
self.data.push((node.clone(), hash));
self.total_bytes += get_node_byte_len(node);
Ok(())
@@ -1460,7 +1590,8 @@ impl TrieRAM {
);
// can only set the hash of an existing node
- if let Some(existing_node) = self.data.get_mut(node_array_ptr as usize) {
+ let node_index = usize::try_from(node_array_ptr).map_err(|_| Error::NotFoundError)?;
+ if let Some(existing_node) = self.data.get_mut(node_index) {
existing_node.1 = hash;
Ok(())
} else {
@@ -1470,8 +1601,8 @@ impl TrieRAM {
}
/// Get the next ptr value for a node to store.
- pub fn last_ptr(&mut self) -> Result {
- Ok(self.data.len() as u32)
+ pub fn last_ptr(&mut self) -> Result {
+ u64::try_from(self.data.len()).map_err(|_| Error::OverflowError)
}
#[cfg(test)]
@@ -1489,7 +1620,8 @@ impl TrieRAM {
impl NodeHashReader for TrieRAM {
fn read_node_hash_bytes(&mut self, ptr: &TriePtr, w: &mut W) -> Result<(), Error> {
- let (_, node_trie_hash) = self.data.get(ptr.ptr() as usize).ok_or_else(|| {
+ let idx = ptr.ptr_as_usize()?;
+ let (_, node_trie_hash) = self.data.get(idx).ok_or_else(|| {
error!(
"TrieRAM: Failed to read node bytes: {} >= {}",
ptr.ptr(),
@@ -1634,6 +1766,26 @@ pub struct TrieStorageTransientData {
/// Does this trie represent unconfirmed state?
unconfirmed: bool,
+
+ /// Snapshot metadata if this MARF is squashed.
+ squash_info: Option,
+}
+
+/// Snapshot metadata cached at open time for squashed MARFs.
+///
+/// Contains the archival root hash, squash root node hash, height, and
+/// block hash at which the MARF was squashed. This is populated once
+/// when the MARF is opened and used by the ancestor-hash computation to
+/// avoid opening pruned historical blocks.
+#[derive(Clone, Debug)]
+pub struct SquashInfo {
+ /// Archival MARF root hash committed to the chain at the squash height.
+ pub archival_marf_root_hash: TrieHash,
+ /// Root node hash of the squash trie. i.e. `hash(consensus_bytes(root) || children_content_hashes)`
+ /// `TrieHash::from_data(&[])` if not yet computed.
+ pub squash_root_node_hash: TrieHash,
+ /// Height at which the MARF was squashed.
+ pub height: u32,
}
// disk-backed Trie.
@@ -1679,6 +1831,10 @@ impl TrieStorageTransientData {
fn clear_block_id(&mut self) {
self.cur_block_id = None;
}
+
+ fn set_squash_info(&mut self, squash_info: Option) {
+ self.squash_info = squash_info;
+ }
}
pub struct ReopenedTrieStorageConnection<'a, T: MarfTrieId> {
@@ -1728,6 +1884,33 @@ impl<'a, T: MarfTrieId> ReopenedTrieStorageConnection<'a, T> {
}
impl TrieFileStorage {
+ /// Detect whether this MARF was produced by a squash operation and, if
+ /// so, cache the squash metadata [`SquashInfo`].
+ ///
+ /// The metadata is read from the `marf_squash_info` SQL table
+ fn init_squash_info(&mut self) -> Result<(), Error> {
+ let squash_info = match trie_sql::read_squash_info(&self.db)? {
+ Some((archival_marf_root_hash, squash_root_node_hash_opt, height)) => {
+ Some(SquashInfo {
+ archival_marf_root_hash,
+ // While creating a squash, this may still be empty.
+ squash_root_node_hash: squash_root_node_hash_opt
+ .unwrap_or_else(|| TrieHash::from_data(&[])),
+ height,
+ })
+ }
+ None => None,
+ };
+
+ self.data.set_squash_info(squash_info);
+ Ok(())
+ }
+
+ /// Returns cached squashing metadata, if present.
+ pub fn squash_info(&self) -> Option<&SquashInfo> {
+ self.data.squash_info.as_ref()
+ }
+
pub fn connection(&mut self) -> TrieStorageConnection<'_, T> {
TrieStorageConnection {
db: SqliteConnection::ConnRef(&self.db),
@@ -1768,6 +1951,8 @@ impl TrieFileStorage {
readonly: true,
unconfirmed: self.unconfirmed(),
+
+ squash_info: self.data.squash_info.clone(),
};
// perf note: should we attempt to clone the cache
let cache = TrieCache::default();
@@ -1881,17 +2066,21 @@ impl TrieFileStorage {
None
};
- let prev_schema_version = trie_sql::migrate_tables_if_needed::(&mut db)?;
- if prev_schema_version != trie_sql::SQL_MARF_SCHEMA_VERSION || marf_opts.force_db_migrate {
- if let Some(blobs) = blobs.as_mut() {
- if TrieFile::exists(&db_path)? {
- // migrate blobs out of the old DB
- blobs.export_trie_blobs::(&db, &db_path)?;
+ let prev_schema_version = trie_sql::migrate_tables_if_needed::(&mut db, readonly)?;
+ if !readonly {
+ if prev_schema_version != trie_sql::SQL_MARF_SCHEMA_VERSION
+ || marf_opts.force_db_migrate
+ {
+ if let Some(blobs) = blobs.as_mut() {
+ if TrieFile::exists(&db_path)? {
+ // migrate blobs out of the old DB
+ blobs.export_trie_blobs::(&db, &db_path)?;
+ }
}
}
- }
- if trie_sql::detect_partial_migration(&db)? {
- panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis.");
+ if trie_sql::detect_partial_migration(&db)? {
+ panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis.");
+ }
}
debug!(
@@ -1902,7 +2091,7 @@ impl TrieFileStorage {
let cache = TrieCache::new(&marf_opts.cache_strategy);
- let ret = TrieFileStorage {
+ let mut ret = TrieFileStorage {
db_path,
db,
cache,
@@ -1929,6 +2118,8 @@ impl TrieFileStorage {
readonly,
unconfirmed,
+
+ squash_info: None,
},
// used in testing in order to short-circuit block-height lookups
@@ -1937,6 +2128,7 @@ impl TrieFileStorage {
test_genesis_block: None,
};
+ ret.init_squash_info()?;
Ok(ret)
}
@@ -1992,7 +2184,7 @@ impl TrieFileStorage {
trace!("Make read-only view of TrieFileStorage: {}", &self.db_path);
// TODO: borrow self.uncommitted_writes; don't copy them
- let ret = TrieFileStorage {
+ let mut ret = TrieFileStorage {
db_path: self.db_path.clone(),
db,
blobs,
@@ -2019,6 +2211,8 @@ impl TrieFileStorage {
readonly: true,
unconfirmed: self.unconfirmed(),
+
+ squash_info: self.data.squash_info.clone(),
},
// used in testing in order to short-circuit block-height lookups
@@ -2027,6 +2221,7 @@ impl TrieFileStorage {
test_genesis_block: self.test_genesis_block.clone(),
};
+ ret.init_squash_info()?;
Ok(ret)
}
@@ -2062,7 +2257,7 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> {
let cache = TrieCache::default();
// TODO: borrow self.uncommitted_writes; don't copy them
- let ret = TrieFileStorage {
+ let mut ret = TrieFileStorage {
db_path: self.db_path.to_string(),
db,
blobs,
@@ -2089,6 +2284,8 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> {
readonly: true,
unconfirmed: self.unconfirmed(),
+
+ squash_info: self.data.squash_info.clone(),
},
// used in testing in order to short-circuit block-height lookups
@@ -2097,11 +2294,12 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> {
test_genesis_block: self.test_genesis_block.clone(),
};
+ ret.init_squash_info()?;
Ok(ret)
}
/// Run `cls` with a mutable reference to the inner trie blobs opt.
- fn with_trie_blobs(&mut self, cls: F) -> R
+ pub(crate) fn with_trie_blobs(&mut self, cls: F) -> R
where
F: FnOnce(&Connection, &mut Option<&mut TrieFile>) -> R,
{
@@ -2433,6 +2631,26 @@ impl TrieStorageConnection<'_, T> {
self.data.unconfirmed
}
+ /// Returns true when this storage represents a squashed MARF.
+ pub fn is_squashed(&self) -> bool {
+ self.data.squash_info.is_some()
+ }
+
+ /// Returns cached squashing metadata, if present.
+ pub fn squash_info(&self) -> Option<&SquashInfo> {
+ self.data.squash_info.as_ref()
+ }
+
+ /// Set cached squashing metadata for this storage connection.
+ pub(crate) fn set_squash_info(&mut self, squash_info: Option