diff --git a/changelog.d/marf-snapshot-framework.added b/changelog.d/marf-snapshot-framework.added new file mode 100644 index 00000000000..716eef3343d --- /dev/null +++ b/changelog.d/marf-snapshot-framework.added @@ -0,0 +1 @@ +Add snapshot framework for copying chainstate index and SPV side-tables into squashed output diff --git a/changelog.d/marf-squash-engine.added b/changelog.d/marf-squash-engine.added new file mode 100644 index 00000000000..ed709c40e49 --- /dev/null +++ b/changelog.d/marf-squash-engine.added @@ -0,0 +1 @@ +Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights diff --git a/changelog.d/marf-squash-foundation.added b/changelog.d/marf-squash-foundation.added new file mode 100644 index 00000000000..61dc95db510 --- /dev/null +++ b/changelog.d/marf-squash-foundation.added @@ -0,0 +1 @@ +Add squash metadata SQL tables and storage foundation for MARF squashing support diff --git a/changelog.d/marf-u64-offset-pointers.added b/changelog.d/marf-u64-offset-pointers.added new file mode 100644 index 00000000000..f71a49df8ce --- /dev/null +++ b/changelog.d/marf-u64-offset-pointers.added @@ -0,0 +1 @@ +Add support for u64 children pointer offsets in the MARF trie, using a mixed u32/u64 encoding with a 0x20 bit flag for backward compatibility \ No newline at end of file diff --git a/stackslib/src/burnchains/bitcoin/spv.rs b/stackslib/src/burnchains/bitcoin/spv.rs index c46baf007c0..3e88e24f5f2 100644 --- a/stackslib/src/burnchains/bitcoin/spv.rs +++ b/stackslib/src/burnchains/bitcoin/spv.rs @@ -56,7 +56,7 @@ const BLOCK_DIFFICULTY_INTERVAL: u32 = 14 * 24 * 60 * 60; // two weeks, in secon pub const SPV_DB_VERSION: &str = "3"; -const SPV_INITIAL_SCHEMA: &[&str] = &[ +pub(crate) const SPV_INITIAL_SCHEMA: &[&str] = &[ r#" CREATE TABLE headers( version INTEGER NOT NULL, @@ -75,7 +75,7 @@ const SPV_INITIAL_SCHEMA: &[&str] = &[ // unlike the `headers` table, this table will never be deleted from, since we use it to determine // whether or not newly-arrived headers represent a better chain than the best-known chain. The // only way to _replace_ a row is to find a header difficulty interval with a _higher_ work score. -const SPV_SCHEMA_2: &[&str] = &[r#" +pub(crate) const SPV_SCHEMA_2: &[&str] = &[r#" CREATE TABLE chain_work( interval INTEGER PRIMARY KEY, work TEXT NOT NULL -- 32-byte (256-bit) integer @@ -83,7 +83,7 @@ const SPV_SCHEMA_2: &[&str] = &[r#" "#]; // force the node to go and store the burnchain block header hash as well -const SPV_SCHEMA_3: &[&str] = &[ +pub(crate) const SPV_SCHEMA_3: &[&str] = &[ r#" DROP TABLE headers; "#, diff --git a/stackslib/src/chainstate/stacks/db/mod.rs b/stackslib/src/chainstate/stacks/db/mod.rs index 7d5c094c19b..821040bd05e 100644 --- a/stackslib/src/chainstate/stacks/db/mod.rs +++ b/stackslib/src/chainstate/stacks/db/mod.rs @@ -86,6 +86,7 @@ pub mod accounts; pub mod blocks; pub mod contracts; pub mod headers; +pub mod snapshot; pub mod transactions; pub mod unconfirmed; diff --git a/stackslib/src/chainstate/stacks/db/snapshot/common.rs b/stackslib/src/chainstate/stacks/db/snapshot/common.rs new file mode 100644 index 00000000000..c8e529d8d18 --- /dev/null +++ b/stackslib/src/chainstate/stacks/db/snapshot/common.rs @@ -0,0 +1,343 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::collections::HashSet; +use std::time::Instant; + +use rusqlite::{params, Connection}; +use stacks_common::util::hash::to_hex; + +use crate::chainstate::stacks::index::marf::{MARFOpenOpts, MarfConnection, MARF}; +use crate::chainstate::stacks::index::storage::{TrieFileStorage, TrieHashCalculationMode}; +use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId}; + +/// A spec for copying a single table from the ATTACHed `src` database. +/// +/// The `source_sql` is the exact `SELECT` used to filter source rows. +/// Copy uses plain `INSERT ... SELECT` (no `OR IGNORE`) so that unexpected +/// pre-population in the destination fails loudly. +pub struct TableCopySpec { + pub table: &'static str, + /// The exact SELECT for the source side, e.g. + /// `"SELECT * FROM src.snapshots WHERE sortition_id IN (SELECT sortition_id FROM canonical_sortitions)"`. + pub source_sql: String, +} + +/// Clone table and index schemas from the source DB (via `sqlite_master`) into the +/// destination connection. This avoids duplicating any CREATE TABLE / ALTER TABLE / +/// CREATE INDEX statements and is always in sync with whatever migration version the +/// source is at. +/// +/// Expects the source DB to be ATTACHed as `src`. +pub fn clone_schemas_from_source(conn: &Connection, tables: &[&str]) -> Result<(), Error> { + let mut stmts: Vec = Vec::new(); + + for table in tables { + let sql: Option = conn + .query_row( + "SELECT sql FROM src.sqlite_master WHERE type='table' AND name=?1", + params![table], + |row| row.get(0), + ) + .ok(); + + if let Some(create_sql) = sql { + let safe_sql = if create_sql.contains("IF NOT EXISTS") { + create_sql + } else { + create_sql.replacen("CREATE TABLE", "CREATE TABLE IF NOT EXISTS", 1) + }; + stmts.push(safe_sql); + } + + let mut idx_stmt = conn + .prepare("SELECT sql FROM src.sqlite_master WHERE type='index' AND tbl_name=?1 AND sql IS NOT NULL") + .map_err(Error::SQLError)?; + let idx_rows = idx_stmt + .query_map(params![table], |row| row.get::<_, String>(0)) + .map_err(Error::SQLError)?; + for idx_sql in idx_rows { + let idx_sql = idx_sql.map_err(Error::SQLError)?; + let safe_sql = if idx_sql.contains("IF NOT EXISTS") { + idx_sql + } else { + idx_sql.replacen("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1) + }; + stmts.push(safe_sql); + } + } + + for stmt in &stmts { + conn.execute_batch(stmt).map_err(Error::SQLError)?; + } + + Ok(()) +} + +/// Clone schemas only for tables that exist in the source DB. +/// Returns the list of tables that were actually cloned. +pub fn clone_optional_schemas_from_source( + conn: &Connection, + tables: &[&str], +) -> Result, Error> { + let mut present = Vec::new(); + for table in tables { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name=?1", + params![table], + |row| row.get(0), + ) + .map_err(Error::SQLError)?; + if exists { + clone_schemas_from_source(conn, &[table])?; + present.push(table.to_string()); + } + } + Ok(present) +} + +/// Check if a table exists in the given schema prefix (empty for main, "src" for attached). +pub fn table_exists(conn: &Connection, schema: &str, table: &str) -> bool { + let master = if schema.is_empty() { + "sqlite_master".to_string() + } else { + format!("{schema}.sqlite_master") + }; + conn.query_row( + &format!("SELECT COUNT(*) > 0 FROM {master} WHERE type='table' AND name=?1"), + params![table], + |row| row.get(0), + ) + .unwrap_or(false) +} + +/// Check bidirectional full-row EXCEPT equality. +/// Returns true if the two result sets are identical. +pub fn full_row_except_match(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool { + let extra_in_dst: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"), + [], + |row| row.get(0), + ) + .unwrap_or(1); + let extra_in_src: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM ({src_sql} EXCEPT {dst_sql})"), + [], + |row| row.get(0), + ) + .unwrap_or(1); + extra_in_dst == 0 && extra_in_src == 0 +} + +/// One-directional subset check: every row in `dst_sql` must exist in +/// `src_sql`, but `src_sql` may contain additional rows. Use this for +/// non-consensus tables that grow after the snapshot (e.g. signer_stats, +/// matured_rewards). +pub fn dst_subset_of_src(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool { + let extra_in_dst: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"), + [], + |row| row.get(0), + ) + .unwrap_or(1); + extra_in_dst == 0 +} + +/// Execute a slice of copy specs inside the current transaction. +/// Returns a vec of (table_name, rows_copied). +pub fn execute_copy_specs( + conn: &Connection, + specs: &[TableCopySpec], +) -> Result, Error> { + let mut results = Vec::with_capacity(specs.len()); + for spec in specs { + let t = Instant::now(); + let sql = format!("INSERT INTO {} {}", spec.table, spec.source_sql); + let rows = conn.execute(&sql, []).map_err(Error::SQLError)? as u64; + info!( + " copy: {} ({} rows) in {:?}", + spec.table, + rows, + t.elapsed() + ); + results.push((spec.table, rows)); + } + Ok(results) +} + +/// Check an optional table's match status. +/// Returns None if absent in both, Some(false) if present in one but not other, +/// Some(true/false) from full-row EXCEPT if present in both. +pub fn check_optional_table_match( + conn: &Connection, + table: &str, + src_filter: Option<&str>, +) -> Option { + let in_dst = table_exists(conn, "", table); + let in_src = table_exists(conn, "src", table); + + match (in_dst, in_src) { + (false, false) => None, + (true, false) | (false, true) => Some(false), + (true, true) => { + let src_sql = match src_filter { + Some(filter) => format!("SELECT * FROM src.{table} {filter}"), + None => format!("SELECT * FROM src.{table}"), + }; + Some(full_row_except_match( + conn, + &format!("SELECT * FROM {table}"), + &src_sql, + )) + } + } +} + +/// Collect the hex-encoded `MARFValue` of every leaf in the squashed trie. +/// +/// Opens the MARF at `db_path` read-only, resolves the tip, and walks the +/// trie via `for_each_leaf`. Auto-detects external blobs. +/// +/// Returns `(tip_block_hash, leaf_value_hashes)`. +pub fn collect_leaf_value_hashes( + db_path: &str, +) -> Result<(T, HashSet), Error> { + let external_blobs = std::path::Path::new(&format!("{db_path}.blobs")).exists(); + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", external_blobs); + let storage = TrieFileStorage::open_readonly(db_path, open_opts)?; + let mut marf = MARF::::from_storage(storage); + let tip = trie_sql::get_latest_confirmed_block_hash::(marf.sqlite_conn())?; + + let mut hashes = HashSet::new(); + marf.with_conn(|conn| { + MARF::for_each_leaf(conn, &tip, |_hash, value| { + hashes.insert(to_hex(&value.to_vec())); + Ok(()) + }) + })?; + + Ok((tip, hashes)) +} + +/// Copy only the `__fork_storage` rows that are referenced by leaf nodes +/// in the squashed MARF trie. Non-canonical entries from forks are excluded. +/// +/// Opens the squashed MARF read-only and walks the trie via `for_each_leaf` +/// to collect canonical leaf value hashes, then copies only the matching +/// `__fork_storage` rows from the source. +/// +/// Falls back to a full copy if `marf_data` is absent (e.g. in test +/// fixtures that don't go through `squash_to_path`). +/// +/// Returns the number of rows copied. +pub fn copy_canonical_fork_storage( + conn: &Connection, + dst_path: &str, +) -> Result { + // Check if the source even has __fork_storage (test fixtures may not). + let src_has_table: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name='__fork_storage'", + [], + |row| row.get(0), + ) + .unwrap_or(false); + + if !src_has_table { + info!(" copy_canonical_fork_storage: source has no __fork_storage, skipping"); + return Ok(0); + } + + // Ensure the destination table exists (clone schema from source). + clone_schemas_from_source(conn, &["__fork_storage"])?; + + // If marf_data doesn't exist, fall back to full copy. + let has_marf_data: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='marf_data'", + [], + |row| row.get(0), + ) + .unwrap_or(false); + + if !has_marf_data { + let rows = conn + .execute( + "INSERT OR REPLACE INTO __fork_storage SELECT * FROM src.__fork_storage", + [], + ) + .map_err(Error::SQLError)? as u64; + info!(" copy_canonical_fork_storage: no marf_data table, full copy ({rows} rows)"); + return Ok(rows); + } + + let t = Instant::now(); + + let (_tip, leaf_hashes) = collect_leaf_value_hashes::(dst_path)?; + let insert_count = leaf_hashes.len() as u64; + + // Build a temp table of canonical leaf value hashes. + conn.execute_batch("CREATE TEMP TABLE __squash_leaf_values (value_hash TEXT PRIMARY KEY)") + .map_err(Error::SQLError)?; + + { + let mut stmt = conn + .prepare("INSERT OR IGNORE INTO __squash_leaf_values (value_hash) VALUES (?1)") + .map_err(Error::SQLError)?; + for hash in &leaf_hashes { + stmt.execute(params![hash]).map_err(Error::SQLError)?; + } + } + drop(leaf_hashes); + + info!( + " copy_canonical_fork_storage: extracted {insert_count} leaf hashes in {:?}", + t.elapsed() + ); + + // Copy only the referenced rows. + let t2 = Instant::now(); + let rows = conn + .execute( + "INSERT OR REPLACE INTO __fork_storage \ + SELECT f.* FROM src.__fork_storage f \ + INNER JOIN __squash_leaf_values lv ON f.value_hash = lv.value_hash", + [], + ) + .map_err(Error::SQLError)? as u64; + + conn.execute_batch("DROP TABLE IF EXISTS __squash_leaf_values") + .map_err(Error::SQLError)?; + + info!( + " copy_canonical_fork_storage: copied {rows} rows (from {insert_count} leaves) in {:?}", + t2.elapsed() + ); + + Ok(rows) +} + +pub fn checkpoint_destination_wal(conn: &Connection) -> Result<(), Error> { + let _: (i64, i64, i64) = conn + .query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .map_err(Error::SQLError)?; + Ok(()) +} diff --git a/stackslib/src/chainstate/stacks/db/snapshot/index.rs b/stackslib/src/chainstate/stacks/db/snapshot/index.rs new file mode 100644 index 00000000000..1c5492df4ab --- /dev/null +++ b/stackslib/src/chainstate/stacks/db/snapshot/index.rs @@ -0,0 +1,695 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::time::Instant; + +use rusqlite::{params, Connection, OptionalExtension}; +use stacks_common::types::chainstate::StacksBlockId; + +use super::common::{ + checkpoint_destination_wal, clone_schemas_from_source, collect_leaf_value_hashes, + copy_canonical_fork_storage, dst_subset_of_src, execute_copy_specs, full_row_except_match, + table_exists, TableCopySpec, +}; +use crate::burnchains::PoxConstants; +use crate::chainstate::stacks::index::Error; + +/// Required table names that must be present in the squashed index DB. +const REQUIRED_TABLES: &[&str] = &[ + "db_config", + "block_headers", + "nakamoto_block_headers", + "payments", + "transactions", + "nakamoto_tenure_events", + "nakamoto_reward_sets", + "signer_stats", + "matured_rewards", + "burnchain_txids", + "epoch_transitions", + "staging_blocks", + "staging_microblocks", + "staging_microblocks_data", + // Schema fidelity: these tables exist in archival nodes but are expected + // unused in a Nakamoto-era GSS node. Included to prevent missing-table + // crashes if any code path references them. + "invalidated_microblocks_data", // Epoch 2.x block orphaning only (blocks.rs:2189) + "user_supporters", // Dead table: zero runtime references +]; + +/// Row-count statistics returned by [`copy_index_side_tables`]. +#[derive(Debug, Clone)] +pub struct IndexSideTableStats { + pub block_headers_rows: u64, + pub nakamoto_block_headers_rows: u64, + pub payments_rows: u64, + pub transactions_rows: u64, + pub nakamoto_tenure_events_rows: u64, + pub nakamoto_reward_sets_rows: u64, + pub signer_stats_rows: u64, + pub matured_rewards_rows: u64, + pub burnchain_txids_rows: u64, + pub epoch_transitions_rows: u64, + pub staging_blocks_rows: u64, + pub fork_storage_rows: u64, +} + +/// Validation result for index side tables in a squashed DB. +#[derive(Debug, Clone)] +pub struct IndexSideTableValidation { + pub tables_present: bool, + pub db_config_matches: bool, + pub fork_storage_match: bool, + pub block_headers_count_match: bool, + pub nakamoto_headers_count_match: bool, + pub payments_count_match: bool, + pub transactions_count_match: bool, + pub nakamoto_tenure_events_count_match: bool, + pub nakamoto_reward_sets_match: bool, + pub signer_stats_match: bool, + pub matured_rewards_match: bool, + pub burnchain_txids_match: bool, + pub epoch_transitions_match: bool, + pub staging_blocks_match: bool, + pub invalidated_microblocks_data_empty: bool, + pub transactions_no_extra_blocks: bool, + pub tenure_events_no_extra_blocks: bool, +} + +impl IndexSideTableValidation { + pub fn is_valid(&self) -> bool { + self.tables_present + && self.db_config_matches + && self.fork_storage_match + && self.block_headers_count_match + && self.nakamoto_headers_count_match + && self.payments_count_match + && self.transactions_count_match + && self.nakamoto_tenure_events_count_match + && self.nakamoto_reward_sets_match + && self.signer_stats_match + && self.matured_rewards_match + && self.burnchain_txids_match + && self.epoch_transitions_match + && self.staging_blocks_match + && self.invalidated_microblocks_data_empty + && self.transactions_no_extra_blocks + && self.tenure_events_no_extra_blocks + } +} + +/// Populate a temp table with the canonical block hashes from the squashed MARF's +/// `marf_squash_block_heights` metadata. +fn populate_canonical_blocks(conn: &Connection) -> Result<(), Error> { + conn.execute_batch("CREATE TEMP TABLE canonical_blocks (index_block_hash TEXT PRIMARY KEY)") + .map_err(Error::SQLError)?; + conn.execute( + "INSERT OR IGNORE INTO canonical_blocks (index_block_hash) \ + SELECT block_hash FROM marf_squash_block_heights", + [], + ) + .map_err(Error::SQLError)?; + Ok(()) +} + +/// Derive the maximum reward cycle from the canonical squashed tip's burn height. +fn derive_max_reward_cycle( + conn: &Connection, + first_burn_height: u64, + reward_cycle_len: u64, +) -> Result, Error> { + let tip_burn_height: Option = conn + .query_row( + "SELECT nh.burn_header_height \ + FROM marf_squash_block_heights mh \ + JOIN src.nakamoto_block_headers nh ON nh.index_block_hash = mh.block_hash \ + ORDER BY mh.height DESC LIMIT 1", + [], + |row| row.get::<_, i64>(0), + ) + .optional() + .map_err(Error::SQLError)? + .map(|h| h as u64); + + match tip_burn_height { + Some(tbh) => { + let cycle = PoxConstants::static_block_height_to_reward_cycle( + tbh, + first_burn_height, + reward_cycle_len, + ) + .ok_or_else(|| { + Error::CorruptionError(format!( + "cannot derive reward cycle: tip_burn_height={tbh}, \ + first_burn_height={first_burn_height}, reward_cycle_len={reward_cycle_len}" + )) + })?; + info!(" derive_max_reward_cycle: {cycle} (tip_burn_height={tbh})"); + Ok(Some(cycle)) + } + None => Ok(None), + } +} + +/// Build the copy specs for descriptor-driven index tables. +/// These are the uniform `index_block_hash IN canonical_blocks` tables. +fn index_copy_specs() -> Vec { + let cb = "SELECT index_block_hash FROM canonical_blocks"; + vec![ + TableCopySpec { + table: "block_headers", + source_sql: format!("SELECT * FROM src.block_headers WHERE index_block_hash IN ({cb})"), + }, + TableCopySpec { + table: "nakamoto_block_headers", + source_sql: format!( + "SELECT * FROM src.nakamoto_block_headers WHERE index_block_hash IN ({cb})" + ), + }, + TableCopySpec { + table: "payments", + source_sql: format!("SELECT * FROM src.payments WHERE index_block_hash IN ({cb})"), + }, + TableCopySpec { + table: "transactions", + source_sql: format!("SELECT * FROM src.transactions WHERE index_block_hash IN ({cb})"), + }, + TableCopySpec { + table: "nakamoto_tenure_events", + source_sql: format!( + "SELECT * FROM src.nakamoto_tenure_events WHERE block_id IN ({cb})" + ), + }, + TableCopySpec { + table: "nakamoto_reward_sets", + source_sql: format!( + "SELECT * FROM src.nakamoto_reward_sets WHERE index_block_hash IN ({cb})" + ), + }, + TableCopySpec { + table: "matured_rewards", + source_sql: format!( + "SELECT * FROM src.matured_rewards WHERE child_index_block_hash IN ({cb})" + ), + }, + TableCopySpec { + table: "burnchain_txids", + source_sql: format!( + "SELECT * FROM src.burnchain_txids WHERE index_block_hash IN ({cb})" + ), + }, + TableCopySpec { + table: "epoch_transitions", + source_sql: format!("SELECT * FROM src.epoch_transitions WHERE block_id IN ({cb})"), + }, + ] +} + +/// Copy required non-MARF tables from the source `index.sqlite` into the +/// squashed destination. Only canonical rows (determined by the squashed MARF's +/// `marf_squash_block_heights`) are included, excluding non-canonical fork data. +pub fn copy_index_side_tables( + src_path: &str, + dst_path: &str, + first_burn_height: u64, + reward_cycle_len: u64, +) -> Result { + let conn = Connection::open(dst_path).map_err(Error::SQLError)?; + + conn.execute("ATTACH DATABASE ?1 AS src", params![src_path]) + .map_err(Error::SQLError)?; + + conn.execute_batch("BEGIN IMMEDIATE") + .map_err(Error::SQLError)?; + + if let Err(e) = clone_schemas_from_source(&conn, REQUIRED_TABLES) { + let _ = conn.execute_batch("ROLLBACK"); + let _ = conn.execute_batch("DETACH DATABASE src"); + return Err(e); + } + + let result = copy_tables_inner(&conn, dst_path, first_burn_height, reward_cycle_len); + + match result { + Ok(stats) => { + conn.execute_batch("COMMIT").map_err(Error::SQLError)?; + conn.execute_batch("DETACH DATABASE src") + .map_err(Error::SQLError)?; + checkpoint_destination_wal(&conn)?; + Ok(stats) + } + Err(e) => { + let _ = conn.execute_batch("ROLLBACK"); + let _ = conn.execute_batch("DETACH DATABASE src"); + Err(e) + } + } +} + +fn copy_tables_inner( + conn: &Connection, + dst_path: &str, + first_burn_height: u64, + reward_cycle_len: u64, +) -> Result { + let total_start = Instant::now(); + + // Copy db_config verbatim. + let t = Instant::now(); + conn.execute( + "INSERT OR REPLACE INTO db_config SELECT * FROM src.db_config", + [], + ) + .map_err(Error::SQLError)?; + info!(" copy_side_tables: db_config done in {:?}", t.elapsed()); + + // Copy only canonical __fork_storage rows - the squashed MARF trie + // leaves reference these by value_hash. Non-canonical fork entries + // are excluded. + let fork_storage_rows = copy_canonical_fork_storage::(conn, dst_path)?; + + // Build canonical block set from squash metadata. + let t = Instant::now(); + populate_canonical_blocks(conn)?; + info!( + " copy_side_tables: canonical_blocks temp table built in {:?}", + t.elapsed() + ); + + // Execute descriptor-driven copies for uniform tables. + let specs = index_copy_specs(); + let results = execute_copy_specs(conn, &specs)?; + + let get = |name: &str| -> u64 { + results + .iter() + .find(|(t, _)| *t == name) + .map(|(_, r)| *r) + .unwrap_or(0) + }; + + // Custom: signer_stats filtered by derived reward cycle. + let max_reward_cycle = derive_max_reward_cycle(conn, first_burn_height, reward_cycle_len)?; + + let t = Instant::now(); + let signer_stats_rows = match max_reward_cycle { + Some(cycle) => conn + .execute( + "INSERT INTO signer_stats SELECT * FROM src.signer_stats \ + WHERE reward_cycle <= ?1", + params![cycle as i64], + ) + .map_err(Error::SQLError)? as u64, + None => conn + .execute( + "INSERT INTO signer_stats SELECT * FROM src.signer_stats", + [], + ) + .map_err(Error::SQLError)? as u64, + }; + info!( + " copy_side_tables: signer_stats ({signer_stats_rows} rows) in {:?}", + t.elapsed() + ); + + // Custom: staging_blocks with semantic predicate. + let t = Instant::now(); + let staging_blocks_rows = conn + .execute( + "INSERT INTO staging_blocks \ + SELECT s.* FROM src.staging_blocks s \ + WHERE s.index_block_hash IN (SELECT index_block_hash FROM canonical_blocks) \ + AND s.processed = 1 \ + AND s.orphaned = 0", + [], + ) + .map_err(Error::SQLError)? as u64; + info!( + " copy_side_tables: staging_blocks ({staging_blocks_rows} rows) in {:?}", + t.elapsed() + ); + + conn.execute_batch("DROP TABLE IF EXISTS canonical_blocks") + .map_err(Error::SQLError)?; + + info!( + " copy_side_tables: all tables done in {:?}", + total_start.elapsed() + ); + + Ok(IndexSideTableStats { + block_headers_rows: get("block_headers"), + nakamoto_block_headers_rows: get("nakamoto_block_headers"), + payments_rows: get("payments"), + transactions_rows: get("transactions"), + nakamoto_tenure_events_rows: get("nakamoto_tenure_events"), + nakamoto_reward_sets_rows: get("nakamoto_reward_sets"), + signer_stats_rows, + matured_rewards_rows: get("matured_rewards"), + burnchain_txids_rows: get("burnchain_txids"), + epoch_transitions_rows: get("epoch_transitions"), + staging_blocks_rows, + fork_storage_rows, + }) +} + +/// Validate that the squashed index DB has the correct side tables by +/// comparing against the source. +pub fn validate_index_side_tables( + src_path: &str, + dst_path: &str, + first_burn_height: u64, + reward_cycle_len: u64, +) -> Result { + let conn = Connection::open(dst_path).map_err(Error::SQLError)?; + conn.execute("ATTACH DATABASE ?1 AS src", params![src_path]) + .map_err(Error::SQLError)?; + + // Check all required tables exist. + let tables_present = REQUIRED_TABLES.iter().all(|table| { + conn.query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1", + params![table], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(0) + > 0 + }); + + // db_config verbatim match. + let db_config_matches = conn + .query_row( + "SELECT COUNT(*) FROM ( + SELECT version, mainnet, chain_id FROM db_config + EXCEPT + SELECT version, mainnet, chain_id FROM src.db_config + )", + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(1) + == 0 + && conn + .query_row( + "SELECT COUNT(*) FROM ( + SELECT version, mainnet, chain_id FROM src.db_config + EXCEPT + SELECT version, mainnet, chain_id FROM db_config + )", + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(1) + == 0; + + // __fork_storage: canonical-only copy. Validate against the canonical + // filtered source set (same leaf-hash filter used by copy_canonical_fork_storage). + let fork_storage_match = { + let dst_has = table_exists(&conn, "", "__fork_storage"); + let src_has = table_exists(&conn, "src", "__fork_storage"); + match (dst_has, src_has) { + (false, false) => true, + (true, true) => { + let has_marf_data = table_exists(&conn, "", "marf_data"); + + if has_marf_data { + let (_tip, leaf_hashes) = collect_leaf_value_hashes::(dst_path)?; + + conn.execute_batch( + "CREATE TEMP TABLE val_fork_leaf_values (value_hash TEXT PRIMARY KEY)", + ) + .map_err(Error::SQLError)?; + + { + let mut stmt = conn + .prepare( + "INSERT OR IGNORE INTO val_fork_leaf_values (value_hash) VALUES (?1)", + ) + .map_err(Error::SQLError)?; + for hash in &leaf_hashes { + stmt.execute([hash]).map_err(Error::SQLError)?; + } + } + + let ok = full_row_except_match( + &conn, + "SELECT * FROM __fork_storage", + "SELECT f.* FROM src.__fork_storage f \ + INNER JOIN val_fork_leaf_values lv ON f.value_hash = lv.value_hash", + ); + + conn.execute_batch("DROP TABLE IF EXISTS val_fork_leaf_values") + .map_err(Error::SQLError)?; + + ok + } else { + // fixture fallback, matching copy_canonical_fork_storage() + full_row_except_match( + &conn, + "SELECT * FROM __fork_storage", + "SELECT * FROM src.__fork_storage", + ) + } + } + _ => false, + } + }; + + // Build canonical block set. + let _ = conn.execute_batch( + "CREATE TEMP TABLE IF NOT EXISTS val_canonical_blocks (index_block_hash TEXT PRIMARY KEY)", + ); + let _ = conn.execute( + "INSERT OR IGNORE INTO val_canonical_blocks (index_block_hash) \ + SELECT block_hash FROM marf_squash_block_heights", + [], + ); + + let cb = "SELECT index_block_hash FROM val_canonical_blocks"; + + // Count-match validations (cheaper for large tables). + let block_headers_count_match = { + let src_count: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM src.block_headers WHERE index_block_hash IN ({cb})"), + [], + |row| row.get(0), + ) + .unwrap_or(-1); + let dst_count: i64 = conn + .query_row("SELECT COUNT(*) FROM block_headers", [], |row| row.get(0)) + .unwrap_or(-2); + src_count == dst_count + }; + + let nakamoto_headers_count_match = { + let src_count: i64 = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM src.nakamoto_block_headers \ + WHERE index_block_hash IN ({cb})" + ), + [], + |row| row.get(0), + ) + .unwrap_or(-1); + let dst_count: i64 = conn + .query_row("SELECT COUNT(*) FROM nakamoto_block_headers", [], |row| { + row.get(0) + }) + .unwrap_or(-2); + src_count == dst_count + }; + + let payments_count_match = { + let src_count: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM src.payments WHERE index_block_hash IN ({cb})"), + [], + |row| row.get(0), + ) + .unwrap_or(-1); + let dst_count: i64 = conn + .query_row("SELECT COUNT(*) FROM payments", [], |row| row.get(0)) + .unwrap_or(-2); + src_count == dst_count + }; + + let transactions_count_match = { + let src_count: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM src.transactions WHERE index_block_hash IN ({cb})"), + [], + |row| row.get(0), + ) + .unwrap_or(-1); + let dst_count: i64 = conn + .query_row("SELECT COUNT(*) FROM transactions", [], |row| row.get(0)) + .unwrap_or(-2); + src_count == dst_count + }; + + let nakamoto_tenure_events_count_match = { + let src_count: i64 = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM src.nakamoto_tenure_events WHERE block_id IN ({cb})" + ), + [], + |row| row.get(0), + ) + .unwrap_or(-1); + let dst_count: i64 = conn + .query_row("SELECT COUNT(*) FROM nakamoto_tenure_events", [], |row| { + row.get(0) + }) + .unwrap_or(-2); + src_count == dst_count + }; + + // No out-of-range rows leaked. + let transactions_no_extra_blocks = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM transactions \ + WHERE index_block_hash NOT IN ({cb})" + ), + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(1) + == 0; + + let tenure_events_no_extra_blocks = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM nakamoto_tenure_events \ + WHERE block_id NOT IN ({cb})" + ), + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(1) + == 0; + + // staging_blocks: bidirectional full-row EXCEPT against canonical source rows. + let staging_blocks_match = full_row_except_match( + &conn, + "SELECT * FROM staging_blocks", + &format!( + "SELECT s.* FROM src.staging_blocks s \ + WHERE s.index_block_hash IN ({cb}) \ + AND s.processed = 1 AND s.orphaned = 0" + ), + ); + + // Schema-fidelity tables should be empty. + let invalidated_microblocks_data_empty = conn + .query_row( + "SELECT COUNT(*) FROM invalidated_microblocks_data", + [], + |row| row.get::<_, i64>(0), + ) + .unwrap_or(1) + == 0; + + // Canonical-filtered tables: bidirectional full-row EXCEPT match. + let nakamoto_reward_sets_match = full_row_except_match( + &conn, + "SELECT * FROM nakamoto_reward_sets", + &format!("SELECT * FROM src.nakamoto_reward_sets WHERE index_block_hash IN ({cb})"), + ); + + let max_reward_cycle = derive_max_reward_cycle(&conn, first_burn_height, reward_cycle_len)?; + + // signer_stats is a non-consensus counter table whose only writer uses + // INSERT ... ON CONFLICT DO UPDATE SET blocks_signed = blocks_signed + 1. + // After the snapshot the source keeps incrementing, so we check: + // 1. every (public_key, reward_cycle) key in dst exists in filtered src + // 2. dst.blocks_signed <= src.blocks_signed + let signer_stats_match = { + let cycle_filter = match max_reward_cycle { + Some(cycle) => format!(" WHERE reward_cycle <= {cycle}"), + None => String::new(), + }; + // No fabricated keys. + let keys_ok = dst_subset_of_src( + &conn, + "SELECT public_key, reward_cycle FROM signer_stats", + &format!("SELECT public_key, reward_cycle FROM src.signer_stats{cycle_filter}"), + ); + // No inflated counters. + let counters_ok: i64 = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM signer_stats d \ + JOIN src.signer_stats s \ + ON d.public_key = s.public_key AND d.reward_cycle = s.reward_cycle \ + WHERE d.blocks_signed > s.blocks_signed" + ), + [], + |row| row.get(0), + ) + .unwrap_or(1); + keys_ok && counters_ok == 0 + }; + + // matured_rewards is a non-consensus cache populated as new blocks + // trigger maturation of older canonical blocks' rewards. The source + // legitimately gains rows after the snapshot, so we only verify no + // fabricated rows exist in the destination. + let matured_rewards_match = dst_subset_of_src( + &conn, + "SELECT * FROM matured_rewards", + &format!("SELECT * FROM src.matured_rewards WHERE child_index_block_hash IN ({cb})"), + ); + + let burnchain_txids_match = full_row_except_match( + &conn, + "SELECT * FROM burnchain_txids", + &format!("SELECT * FROM src.burnchain_txids WHERE index_block_hash IN ({cb})"), + ); + + let epoch_transitions_match = full_row_except_match( + &conn, + "SELECT * FROM epoch_transitions", + &format!("SELECT * FROM src.epoch_transitions WHERE block_id IN ({cb})"), + ); + + let _ = conn.execute_batch("DROP TABLE IF EXISTS val_canonical_blocks"); + + conn.execute_batch("DETACH DATABASE src") + .map_err(Error::SQLError)?; + + Ok(IndexSideTableValidation { + tables_present, + db_config_matches, + fork_storage_match, + block_headers_count_match, + nakamoto_headers_count_match, + payments_count_match, + transactions_count_match, + nakamoto_tenure_events_count_match, + nakamoto_reward_sets_match, + signer_stats_match, + matured_rewards_match, + burnchain_txids_match, + epoch_transitions_match, + staging_blocks_match, + invalidated_microblocks_data_empty, + transactions_no_extra_blocks, + tenure_events_no_extra_blocks, + }) +} diff --git a/stackslib/src/chainstate/stacks/db/snapshot/mod.rs b/stackslib/src/chainstate/stacks/db/snapshot/mod.rs new file mode 100644 index 00000000000..65d1af33978 --- /dev/null +++ b/stackslib/src/chainstate/stacks/db/snapshot/mod.rs @@ -0,0 +1,27 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +pub mod common; +pub mod index; +pub mod spv; + +#[cfg(test)] +mod tests; + +pub use index::{ + copy_index_side_tables, validate_index_side_tables, IndexSideTableStats, + IndexSideTableValidation, +}; +pub use spv::{copy_spv_headers, validate_spv_headers, SpvHeadersCopyStats, SpvHeadersValidation}; diff --git a/stackslib/src/chainstate/stacks/db/snapshot/spv.rs b/stackslib/src/chainstate/stacks/db/snapshot/spv.rs new file mode 100644 index 00000000000..ad82ca9f8a5 --- /dev/null +++ b/stackslib/src/chainstate/stacks/db/snapshot/spv.rs @@ -0,0 +1,222 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::fs; +use std::path::Path; + +use rusqlite::{params, Connection, OpenFlags}; + +use super::common::{ + clone_optional_schemas_from_source, clone_schemas_from_source, full_row_except_match, + table_exists, +}; +use crate::chainstate::stacks::db::snapshot::common::checkpoint_destination_wal; +use crate::chainstate::stacks::index::Error; + +/// Tables required in all headers.sqlite versions. +const REQUIRED_TABLES: &[&str] = &["headers", "db_config"]; + +/// Tables present only in SPV schema v2+ (may be absent in very old DBs). +const OPTIONAL_TABLES: &[&str] = &[ + "chain_work", // Added in SPV_SCHEMA_2 +]; + +/// Bitcoin difficulty chunk size (2016 blocks per difficulty interval). +const DIFFICULTY_CHUNK_SIZE: u32 = 2016; + +/// Row-count statistics returned by [`copy_spv_headers`]. +#[derive(Debug, Clone)] +pub struct SpvHeadersCopyStats { + pub headers_rows: u64, + pub chain_work_rows: u64, +} + +/// Validation result for a copied headers.sqlite. +#[derive(Debug, Clone)] +pub struct SpvHeadersValidation { + pub headers_match: bool, + pub chain_work_match: bool, + pub db_config_match: bool, + pub no_extra_headers: bool, +} + +impl SpvHeadersValidation { + pub fn is_valid(&self) -> bool { + self.headers_match && self.chain_work_match && self.db_config_match && self.no_extra_headers + } +} + +/// Copy canonical SPV headers up to `burn_height` into a new destination. +/// +/// Returns an error if the source file does not exist. +pub fn copy_spv_headers( + src_path: &str, + dst_path: &str, + burn_height: u32, +) -> Result { + if !Path::new(src_path).exists() { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("SPV headers source not found: {src_path}"), + ))); + } + + if let Some(parent) = Path::new(dst_path).parent() { + fs::create_dir_all(parent).map_err(Error::IOError)?; + } + + // Remove stale destination to ensure a clean copy. + let dst = Path::new(dst_path); + if dst.exists() { + fs::remove_file(dst).map_err(Error::IOError)?; + } + + let conn = Connection::open(dst_path).map_err(Error::SQLError)?; + + // Match the journal mode used by stacks-node (WAL) so the database can be + // opened later without needing write access to switch modes. + conn.pragma_update(None, "journal_mode", "WAL") + .map_err(Error::SQLError)?; + + conn.execute("ATTACH DATABASE ?1 AS src", params![src_path]) + .map_err(Error::SQLError)?; + + conn.execute_batch("BEGIN IMMEDIATE") + .map_err(Error::SQLError)?; + + let result = copy_spv_headers_inner(&conn, burn_height); + + match result { + Ok(stats) => { + conn.execute_batch("COMMIT").map_err(Error::SQLError)?; + conn.execute_batch("DETACH DATABASE src") + .map_err(Error::SQLError)?; + checkpoint_destination_wal(&conn)?; + Ok(stats) + } + Err(e) => { + let _ = conn.execute_batch("ROLLBACK"); + let _ = conn.execute_batch("DETACH DATABASE src"); + Err(e) + } + } +} + +fn copy_spv_headers_inner( + conn: &Connection, + burn_height: u32, +) -> Result { + clone_schemas_from_source(conn, REQUIRED_TABLES)?; + let optional_present = clone_optional_schemas_from_source(conn, OPTIONAL_TABLES)?; + let has_chain_work = optional_present.contains(&"chain_work".to_string()); + + conn.execute("INSERT INTO db_config SELECT * FROM src.db_config", []) + .map_err(Error::SQLError)?; + + let headers_rows = conn + .execute( + "INSERT INTO headers SELECT * FROM src.headers WHERE height <= ?1", + params![burn_height], + ) + .map_err(Error::SQLError)? as u64; + + // Copy chain_work for complete intervals only. + let chain_work_rows = if has_chain_work { + conn.execute( + "INSERT INTO chain_work SELECT * FROM src.chain_work \ + WHERE (interval + 1) * ?1 - 1 <= ?2", + params![DIFFICULTY_CHUNK_SIZE, burn_height], + ) + .map_err(Error::SQLError)? as u64 + } else { + 0 + }; + + Ok(SpvHeadersCopyStats { + headers_rows, + chain_work_rows, + }) +} + +/// Validate a copied headers.sqlite against its source. +pub fn validate_spv_headers( + src_path: &str, + dst_path: &str, + burn_height: u32, +) -> Result { + if !Path::new(src_path).exists() { + return Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("SPV headers source not found: {src_path}"), + ))); + } + if !Path::new(dst_path).exists() { + return Err(Error::NotFoundError); + } + + let conn = Connection::open_with_flags(dst_path, OpenFlags::SQLITE_OPEN_READ_ONLY) + .map_err(Error::SQLError)?; + + conn.execute("ATTACH DATABASE ?1 AS src", params![src_path]) + .map_err(Error::SQLError)?; + + let db_config_match = full_row_except_match( + &conn, + "SELECT * FROM db_config", + "SELECT * FROM src.db_config", + ); + + let headers_match = full_row_except_match( + &conn, + "SELECT * FROM headers", + &format!("SELECT * FROM src.headers WHERE height <= {burn_height}"), + ); + + let has_src_cw = table_exists(&conn, "src", "chain_work"); + let has_dst_cw = table_exists(&conn, "", "chain_work"); + + let chain_work_match = match (has_src_cw, has_dst_cw) { + (false, false) => true, + (true, true) => full_row_except_match( + &conn, + "SELECT * FROM chain_work", + &format!( + "SELECT * FROM src.chain_work \ + WHERE (interval + 1) * {DIFFICULTY_CHUNK_SIZE} - 1 <= {burn_height}" + ), + ), + _ => false, + }; + + // No headers above burn_height in destination. + let extra_above: i64 = conn + .query_row( + &format!("SELECT COUNT(*) FROM headers WHERE height > {burn_height}"), + [], + |row| row.get(0), + ) + .unwrap_or(1); + let no_extra_headers = extra_above == 0; + + conn.execute_batch("DETACH DATABASE src") + .map_err(Error::SQLError)?; + + Ok(SpvHeadersValidation { + headers_match, + chain_work_match, + db_config_match, + no_extra_headers, + }) +} diff --git a/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs b/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs new file mode 100644 index 00000000000..e32211516dd --- /dev/null +++ b/stackslib/src/chainstate/stacks/db/snapshot/tests/mod.rs @@ -0,0 +1,902 @@ +use rusqlite::{params, Connection}; +use tempfile::tempdir; + +use super::index::{copy_index_side_tables, validate_index_side_tables}; +use crate::burnchains::bitcoin::spv::{ + SPV_DB_VERSION, SPV_INITIAL_SCHEMA, SPV_SCHEMA_2, SPV_SCHEMA_3, +}; +use crate::chainstate::nakamoto::{ + NAKAMOTO_CHAINSTATE_SCHEMA_1, NAKAMOTO_CHAINSTATE_SCHEMA_2, NAKAMOTO_CHAINSTATE_SCHEMA_3, + NAKAMOTO_CHAINSTATE_SCHEMA_4, NAKAMOTO_CHAINSTATE_SCHEMA_5, NAKAMOTO_CHAINSTATE_SCHEMA_6, + NAKAMOTO_CHAINSTATE_SCHEMA_7, NAKAMOTO_CHAINSTATE_SCHEMA_8, +}; +use crate::chainstate::stacks::db::{ + CHAINSTATE_INDEXES, CHAINSTATE_INITIAL_SCHEMA, CHAINSTATE_SCHEMA_2, CHAINSTATE_SCHEMA_3, + CHAINSTATE_SCHEMA_4, CHAINSTATE_SCHEMA_5, +}; + +/// Create a source `index.sqlite` with the full chainstate schema by replaying +/// the real migration pipeline. Returns the connection for inserting test data. +fn create_source_db(path: &std::path::Path) -> Connection { + let conn = Connection::open(path).unwrap(); + + for cmd in CHAINSTATE_INITIAL_SCHEMA { + conn.execute_batch(cmd).unwrap(); + } + conn.execute( + "INSERT INTO db_config (version, mainnet, chain_id) VALUES (?1, ?2, ?3)", + params!["1", 1i64, 1i64], + ) + .unwrap(); + + // Apply all migrations in order (same as StacksChainState::apply_schema_migrations). + for cmd in CHAINSTATE_SCHEMA_2 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in CHAINSTATE_SCHEMA_3 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_1.iter() { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_2 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_3 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_4 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_5 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in CHAINSTATE_SCHEMA_4 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_6 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in CHAINSTATE_SCHEMA_5 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_7 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in NAKAMOTO_CHAINSTATE_SCHEMA_8 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in CHAINSTATE_INDEXES { + conn.execute_batch(cmd).unwrap(); + } + + conn +} + +/// Create a destination DB that simulates a squashed MARF by adding the +/// `marf_squash_block_heights` table with the given canonical block hashes. +fn create_dest_db_with_canonical_blocks(path: &std::path::Path, canonical: &[&str]) { + let conn = Connection::open(path).unwrap(); + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS marf_squash_block_heights (block_hash TEXT NOT NULL, height INTEGER NOT NULL)", + ) + .unwrap(); + for (h, bh) in canonical.iter().enumerate() { + conn.execute( + "INSERT INTO marf_squash_block_heights (block_hash, height) VALUES (?1, ?2)", + params![bh, h as i64], + ) + .unwrap(); + } +} + +/// Insert a block_headers row at the given height. +fn insert_block_header(conn: &Connection, height: u32, suffix: &str) { + conn.execute( + "INSERT INTO block_headers (version, total_burn, total_work, proof, parent_block, \ + parent_microblock, parent_microblock_sequence, tx_merkle_root, state_index_root, \ + microblock_pubkey_hash, block_hash, index_block_hash, block_height, index_root, \ + consensus_hash, burn_header_hash, burn_header_height, burn_header_timestamp, \ + parent_block_id, cost, block_size) \ + VALUES (1,'0','0','p','par','mb',0,'mr','sr','mph',?1,?2,?3,'ir',?4,'bhh',?3,0,'pid','0','0')", + params![ + format!("bh{suffix}"), + format!("ibh{suffix}"), + height, + format!("ch{suffix}"), + ], + ) + .unwrap(); +} + +/// Insert a payment row at the given height. +fn insert_payment(conn: &Connection, height: u32, suffix: &str) { + conn.execute( + "INSERT INTO payments (address, block_hash, consensus_hash, parent_block_hash, \ + parent_consensus_hash, coinbase, tx_fees_anchored, tx_fees_streamed, stx_burns, \ + burnchain_commit_burn, burnchain_sortition_burn, miner, stacks_block_height, \ + index_block_hash, vtxindex, recipient, schedule_type) \ + VALUES ('addr',?1,?2,'pbh','pch','100','0','0','0',0,0,1,?3,?4,0,NULL,'Epoch2')", + params![ + format!("bh{suffix}"), + format!("ch{suffix}"), + height, + format!("ibh{suffix}"), + ], + ) + .unwrap(); +} + +/// Insert a transaction row for the given index_block_hash. +fn insert_transaction(conn: &Connection, id: i64, ibh: &str) { + conn.execute( + "INSERT INTO transactions (id, txid, index_block_hash, tx_hex, result) \ + VALUES (?1, ?2, ?3, '0x00', 'ok')", + params![id, format!("tx{id}"), ibh], + ) + .unwrap(); +} +#[test] +fn test_copy_index_side_tables_round_trip() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + // Insert test data at heights 1, 2, 3. + for (h, s) in [(1, "1"), (2, "2"), (3, "3")] { + insert_block_header(&conn, h, s); + insert_payment(&conn, h, s); + insert_transaction(&conn, h as i64, &format!("ibh{s}")); + } + conn.execute( + "INSERT INTO nakamoto_tenure_events (tenure_id_consensus_hash, prev_tenure_id_consensus_hash, \ + burn_view_consensus_hash, cause, block_hash, block_id, coinbase_height, num_blocks_confirmed) \ + VALUES ('ch1','ch0','bv1',0,'bh1','ibh1',1,0)", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO nakamoto_reward_sets (index_block_hash, reward_set) VALUES ('ibh1','{}')", + [], + ) + .unwrap(); + drop(conn); + + // Destination: canonical blocks are ibh1, ibh2 (height 0, 1) - ibh3 is NOT canonical. + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1", "ibh2"]); + + // Copy: only canonical blocks ibh1 and ibh2 should be included. + let stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert_eq!(stats.block_headers_rows, 2, "2 canonical block_headers"); + assert_eq!(stats.payments_rows, 2, "2 canonical payments"); + assert_eq!(stats.transactions_rows, 2, "2 canonical transactions"); + assert_eq!( + stats.nakamoto_tenure_events_rows, 1, + "1 tenure event for ibh1" + ); + assert_eq!(stats.nakamoto_reward_sets_rows, 1); + + // Validate. + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + validation.is_valid(), + "validation should pass: {validation:?}" + ); + assert!(validation.tables_present); + assert!(validation.db_config_matches); + assert!(validation.block_headers_count_match); + assert!(validation.payments_count_match); + assert!(validation.transactions_count_match); + assert!(validation.nakamoto_tenure_events_count_match); + assert!(validation.transactions_no_extra_blocks); + assert!(validation.tenure_events_no_extra_blocks); + assert!(validation.staging_blocks_match); + assert!(validation.invalidated_microblocks_data_empty); +} + +#[test] +fn test_copy_excludes_fork_rows() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + // Insert canonical block at height 1. + insert_block_header(&conn, 1, "1_canonical"); + insert_transaction(&conn, 1, "ibh1_canonical"); + // Insert fork block at same height 1 (different consensus hash). + insert_block_header(&conn, 1, "1_fork"); + insert_transaction(&conn, 2, "ibh1_fork"); + drop(conn); + + // Only ibh1_canonical is in the canonical set. + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1_canonical"]); + + let stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Only canonical block should be copied, not the fork. + assert_eq!(stats.block_headers_rows, 1, "only canonical block_headers"); + assert_eq!(stats.transactions_rows, 1, "only canonical transactions"); + + // Validate passes - fork rows excluded. + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + assert!( + validation.is_valid(), + "validation should pass without fork rows: {validation:?}" + ); +} + +#[test] +fn test_validate_index_side_tables_detects_extra_rows() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + // Insert one block + transaction. + insert_block_header(&conn, 1, "1"); + insert_transaction(&conn, 1, "ibh1"); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Inject a transaction for a block NOT in the canonical set. + { + let conn = Connection::open(&dst_path).unwrap(); + conn.execute( + "INSERT INTO transactions VALUES (99, 'tx_bad', 'ibh_UNKNOWN', '0x00', 'ok')", + [], + ) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + !validation.transactions_no_extra_blocks, + "should detect extra block" + ); + assert!( + !validation.transactions_count_match, + "count should mismatch" + ); + assert!(!validation.is_valid(), "validation must fail"); +} + +#[test] +fn test_all_required_tables_exist() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let _conn = create_source_db(&src_path); + drop(_conn); + + let dst_path = dir.path().join("dst.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &[]); + + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1).unwrap(); + + let dst_conn = Connection::open(&dst_path).unwrap(); + + // Verify all required tables exist including the newly added ones. + for table in &[ + "staging_blocks", + "staging_microblocks", + "staging_microblocks_data", + "invalidated_microblocks_data", + "user_supporters", + ] { + let count: i64 = dst_conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1", + params![table], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(count, 1, "table '{table}' should exist"); + } + + // invalidated_microblocks_data should be empty. + let count: i64 = dst_conn + .query_row( + "SELECT COUNT(*) FROM invalidated_microblocks_data", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(count, 0, "invalidated_microblocks_data should be empty"); +} + +/// Insert a minimal nakamoto_block_headers row into the source DB. +fn insert_nakamoto_header(conn: &Connection, ibh: &str, burn_height: u32) { + conn.execute( + "INSERT INTO nakamoto_block_headers ( \ + block_height, index_root, burn_header_hash, burn_header_height, \ + burn_header_timestamp, block_size, version, chain_length, burn_spent, \ + consensus_hash, parent_block_id, tx_merkle_root, state_index_root, \ + miner_signature, signer_signature, signer_bitvec, header_type, \ + block_hash, index_block_hash, cost, total_tenure_cost, tenure_changed, \ + tenure_tx_fees, vrf_proof, timestamp, burn_view, height_in_tenure, \ + total_tenure_size) \ + VALUES (?1,'ir','bhh',?2,0,'0',1,?1,0,'ch','pid','mr','sr','ms','ss','bv', \ + 'nakamoto','bh',?3,'0','0',0,'0',NULL,0,NULL,0,0)", + params![burn_height, burn_height, ibh], + ) + .unwrap(); +} + +#[test] +fn test_signer_stats_validates_with_source_drift() { + // signer_stats is a non-consensus counter table. After the squash, the + // source node continues running and increments blocks_signed for existing + // (public_key, reward_cycle) pairs. Validation should still pass because + // we only check that the destination keys are a subset of the source keys. + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + insert_block_header(&conn, 1, "1"); + // Nakamoto header so derive_max_reward_cycle can compute a cycle. + insert_nakamoto_header(&conn, "ibh1", 10); + conn.execute( + "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \ + VALUES ('pk1', 1, 5), ('pk2', 1, 3)", + [], + ) + .unwrap(); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + // Copy with first_burn_height=0, reward_cycle_len=1 so max_cycle = 10/1 = 10, + // which covers the test row at reward_cycle=1. + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Simulate source drift: increment blocks_signed counters. + { + let src_conn = Connection::open(&src_path).unwrap(); + src_conn + .execute("UPDATE signer_stats SET blocks_signed = 100", []) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + validation.signer_stats_match, + "signer_stats should pass with drifted counter values" + ); + assert!( + validation.is_valid(), + "overall validation should pass: {validation:?}" + ); +} + +#[test] +fn test_signer_stats_detects_fabricated_keys() { + // If the destination has a (public_key, reward_cycle) pair that doesn't + // exist in the source at all, validation must fail. + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + insert_block_header(&conn, 1, "1"); + insert_nakamoto_header(&conn, "ibh1", 10); + conn.execute( + "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \ + VALUES ('pk1', 1, 5)", + [], + ) + .unwrap(); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Inject a fabricated signer key into the destination. + { + let dst_conn = Connection::open(&dst_path).unwrap(); + dst_conn + .execute( + "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \ + VALUES ('pk_FAKE', 1, 99)", + [], + ) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + !validation.signer_stats_match, + "signer_stats should fail with fabricated key" + ); + assert!(!validation.is_valid()); +} + +#[test] +fn test_signer_stats_detects_inflated_counters() { + // If the destination has blocks_signed > source for an existing key, + // validation must fail (the counter is monotonically increasing). + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + insert_block_header(&conn, 1, "1"); + insert_nakamoto_header(&conn, "ibh1", 10); + conn.execute( + "INSERT INTO signer_stats (public_key, reward_cycle, blocks_signed) \ + VALUES ('pk1', 1, 5)", + [], + ) + .unwrap(); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Inflate the counter in the destination beyond the source value. + { + let dst_conn = Connection::open(&dst_path).unwrap(); + dst_conn + .execute( + "UPDATE signer_stats SET blocks_signed = 999 WHERE public_key = 'pk1'", + [], + ) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + !validation.signer_stats_match, + "signer_stats should fail with inflated counter" + ); + assert!(!validation.is_valid()); +} + +#[test] +fn test_matured_rewards_validates_with_source_growth() { + // matured_rewards is a non-consensus cache. After the squash, new blocks + // on the source trigger maturation of rewards for older canonical blocks, + // adding rows that match the canonical filter. Validation should still + // pass because we only check dst ⊆ filtered-src. + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + insert_block_header(&conn, 1, "1"); + insert_nakamoto_header(&conn, "ibh1", 10); + conn.execute( + "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \ + tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \ + child_index_block_hash, parent_index_block_hash) \ + VALUES ('addr1', NULL, 0, '100', '0', '0', '0', 'ibh1', 'pibh0')", + [], + ) + .unwrap(); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Simulate source growth: add a new matured_rewards row for a canonical block. + { + let src_conn = Connection::open(&src_path).unwrap(); + src_conn + .execute( + "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \ + tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \ + child_index_block_hash, parent_index_block_hash) \ + VALUES ('addr2', NULL, 0, '0', '0', '0', '0', 'ibh1', 'pibh0')", + [], + ) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + validation.matured_rewards_match, + "matured_rewards should pass when source has grown" + ); + assert!( + validation.is_valid(), + "overall validation should pass: {validation:?}" + ); +} + +#[test] +fn test_matured_rewards_detects_fabricated_rows() { + // If the destination has a matured_rewards row not in the filtered source, + // validation must fail. + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_index.sqlite"); + let conn = create_source_db(&src_path); + + insert_block_header(&conn, 1, "1"); + insert_nakamoto_header(&conn, "ibh1", 10); + drop(conn); + + let dst_path = dir.path().join("dst_index.sqlite"); + create_dest_db_with_canonical_blocks(&dst_path, &["ibh1"]); + + let _stats = + copy_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + // Inject a fabricated matured_rewards row. + { + let dst_conn = Connection::open(&dst_path).unwrap(); + dst_conn + .execute( + "INSERT INTO matured_rewards (address, recipient, vtxindex, coinbase, \ + tx_fees_anchored, tx_fees_streamed_confirmed, tx_fees_streamed_produced, \ + child_index_block_hash, parent_index_block_hash) \ + VALUES ('addr_FAKE', NULL, 0, '999', '0', '0', '0', 'ibh1', 'pibh0')", + [], + ) + .unwrap(); + } + + let validation = + validate_index_side_tables(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0, 1) + .unwrap(); + + assert!( + !validation.matured_rewards_match, + "matured_rewards should fail with fabricated row" + ); + assert!(!validation.is_valid()); +} + +/// Create a source headers.sqlite (SPV v3 schema with chain_work). +/// Replays the real SPV migration pipeline: INITIAL -> SCHEMA_2 -> SCHEMA_3. +fn create_spv_headers_db(path: &std::path::Path) -> Connection { + let conn = Connection::open(path).unwrap(); + for cmd in SPV_INITIAL_SCHEMA { + conn.execute_batch(cmd).unwrap(); + } + for cmd in SPV_SCHEMA_2 { + conn.execute_batch(cmd).unwrap(); + } + for cmd in SPV_SCHEMA_3 { + conn.execute_batch(cmd).unwrap(); + } + conn.execute( + &format!("INSERT INTO db_config (version) VALUES ('{SPV_DB_VERSION}')"), + [], + ) + .unwrap(); + conn +} + +#[test] +fn test_spv_headers_copy_and_validate() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src_headers.sqlite"); + let dst_path = dir.path().join("dst_headers.sqlite"); + + let src = create_spv_headers_db(&src_path); + // Insert headers at heights 0..=5000. + for h in 0..=5000u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'prev', 'merkle', 0, 0, 0, ?1, ?2)", + params![h, format!("hash_{h}")], + ) + .unwrap(); + } + // Insert chain_work for intervals 0, 1, 2. + src.execute("INSERT INTO chain_work VALUES (0, 'work_0')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (1, 'work_1')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (2, 'work_2')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4500) + .unwrap(); + + // Headers 0..=4500 = 4501 rows. + assert_eq!(stats.headers_rows, 4501); + // Interval 0: (0+1)*2016-1=2015 <= 4500 ✓ + // Interval 1: (1+1)*2016-1=4031 <= 4500 ✓ + // Interval 2: (2+1)*2016-1=6047 <= 4500 ✗ + assert_eq!(stats.chain_work_rows, 2); + + let v = super::spv::validate_spv_headers( + src_path.to_str().unwrap(), + dst_path.to_str().unwrap(), + 4500, + ) + .unwrap(); + assert!(v.is_valid(), "validation failed: {v:?}"); +} + +#[test] +fn test_spv_headers_chain_work_boundary_0() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, 0, 'h0')", + [], + ) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (0, 'w0')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 0) + .unwrap(); + + assert_eq!(stats.headers_rows, 1); + // (0+1)*2016-1 = 2015 > 0 -> no intervals included. + assert_eq!(stats.chain_work_rows, 0); +} + +#[test] +fn test_spv_headers_chain_work_boundary_2015() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + for h in 0..=2015u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)", + params![h, format!("h{h}")], + ) + .unwrap(); + } + src.execute("INSERT INTO chain_work VALUES (0, 'w0')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (1, 'w1')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 2015) + .unwrap(); + + assert_eq!(stats.headers_rows, 2016); + // (0+1)*2016-1 = 2015 <= 2015 ✓ -> 1 interval. + assert_eq!(stats.chain_work_rows, 1); +} + +#[test] +fn test_spv_headers_chain_work_boundary_2016() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + for h in 0..=2016u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)", + params![h, format!("h{h}")], + ) + .unwrap(); + } + src.execute("INSERT INTO chain_work VALUES (0, 'w0')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (1, 'w1')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 2016) + .unwrap(); + + assert_eq!(stats.headers_rows, 2017); + // (0+1)*2016-1 = 2015 <= 2016 ✓ + // (1+1)*2016-1 = 4031 <= 2016 ✗ + assert_eq!(stats.chain_work_rows, 1); +} + +#[test] +fn test_spv_headers_chain_work_boundary_4031() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + for h in 0..=4031u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)", + params![h, format!("h{h}")], + ) + .unwrap(); + } + src.execute("INSERT INTO chain_work VALUES (0, 'w0')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (1, 'w1')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (2, 'w2')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4031) + .unwrap(); + + assert_eq!(stats.headers_rows, 4032); + // (0+1)*2016-1 = 2015 <= 4031 ✓ + // (1+1)*2016-1 = 4031 <= 4031 ✓ + // (2+1)*2016-1 = 6047 <= 4031 ✗ + assert_eq!(stats.chain_work_rows, 2); +} + +#[test] +fn test_spv_headers_chain_work_boundary_4032() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + for h in 0..=4032u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)", + params![h, format!("h{h}")], + ) + .unwrap(); + } + src.execute("INSERT INTO chain_work VALUES (0, 'w0')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (1, 'w1')", []) + .unwrap(); + src.execute("INSERT INTO chain_work VALUES (2, 'w2')", []) + .unwrap(); + drop(src); + + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 4032) + .unwrap(); + + assert_eq!(stats.headers_rows, 4033); + // (2+1)*2016-1 = 6047 <= 4032 ✗ -> still only 2 intervals. + assert_eq!(stats.chain_work_rows, 2); +} + +#[test] +fn test_spv_headers_missing_source_is_error() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("nonexistent.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let result = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 100); + assert!(result.is_err(), "missing source should error"); +} + +#[test] +fn test_spv_headers_validate_source_present_dest_missing_fails() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("nonexistent.sqlite"); + + create_spv_headers_db(&src_path); + + let result = super::spv::validate_spv_headers( + src_path.to_str().unwrap(), + dst_path.to_str().unwrap(), + 100, + ); + assert!(result.is_err()); +} + +#[test] +fn test_spv_headers_validate_both_absent_is_error() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("no_src.sqlite"); + let dst_path = dir.path().join("no_dst.sqlite"); + + let result = super::spv::validate_spv_headers( + src_path.to_str().unwrap(), + dst_path.to_str().unwrap(), + 100, + ); + assert!(result.is_err(), "both absent should error"); +} + +#[test] +fn test_spv_headers_stale_destination_errors_when_source_absent() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("nonexistent.sqlite"); + let dst_path = dir.path().join("stale_headers.sqlite"); + + // Create a stale destination file (simulates reused output dir). + std::fs::write(&dst_path, b"stale data").unwrap(); + assert!(dst_path.exists()); + + let result = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 100); + assert!( + result.is_err(), + "missing source should error even with stale destination" + ); +} + +#[test] +fn test_spv_headers_reused_output_dir() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("src.sqlite"); + let dst_path = dir.path().join("dst.sqlite"); + + let src = create_spv_headers_db(&src_path); + for h in 0..=10u32 { + src.execute( + "INSERT INTO headers VALUES (1, 'p', 'm', 0, 0, 0, ?1, ?2)", + params![h, format!("h{h}")], + ) + .unwrap(); + } + drop(src); + + // First copy. + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 10) + .unwrap(); + + // Second copy into the same destination (reused output dir). + let stats = + super::spv::copy_spv_headers(src_path.to_str().unwrap(), dst_path.to_str().unwrap(), 10) + .unwrap(); + + assert_eq!(stats.headers_rows, 11); + + // Validate to confirm no duplicate rows. + let v = super::spv::validate_spv_headers( + src_path.to_str().unwrap(), + dst_path.to_str().unwrap(), + 10, + ) + .unwrap(); + assert!( + v.is_valid(), + "reused output dir should produce valid copy: {v:?}" + ); +} diff --git a/stackslib/src/chainstate/stacks/index/bits.rs b/stackslib/src/chainstate/stacks/index/bits.rs index 8eb4647c966..e68167fcfd0 100644 --- a/stackslib/src/chainstate/stacks/index/bits.rs +++ b/stackslib/src/chainstate/stacks/index/bits.rs @@ -22,7 +22,7 @@ use sha2::{Digest, Sha512_256 as TrieHasher}; use crate::chainstate::stacks::index::node::{ clear_compressed, clear_ctrl_bits, is_compressed, ptrs_fmt, ConsensusSerializable, TrieNode, TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, TrieNodePatch, TrieNodeType, - TriePtr, TRIEPTR_SIZE, + TriePtr, }; use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{BlockMap, Error, MarfTrieId, TrieLeaf}; @@ -101,10 +101,10 @@ fn node_id_to_ptr_count(node_id: u8) -> usize { } } -/// Helper to determine the maximum number of bytes a Trie node's child pointers will take to encode. +/// Helper to determine how many bytes a Trie node's child pointers will take to encode. pub fn get_ptrs_byte_len(ptrs: &[TriePtr]) -> usize { let node_id_len = 1; - node_id_len + TRIEPTR_SIZE * ptrs.len() + node_id_len + ptrs.iter().map(TriePtr::encoded_size).sum::() } /// Helper to determine a sparse TriePtr list's bitmap size, given the node ID's numeric value. @@ -238,7 +238,9 @@ pub fn ptrs_from_bytes( ptrs_start_disk_ptr ); - let mut bytes = vec![0u8; 1 + num_ptrs * TRIEPTR_SIZE]; + let max_ptr_size = TriePtr::max_encoded_size(); + let patch_overhead = max_ptr_size + 1; + let mut bytes = vec![0u8; 1 + num_ptrs * max_ptr_size + patch_overhead]; let mut offset = 0; loop { let nr = match r.read( @@ -269,7 +271,11 @@ pub fn ptrs_from_bytes( offset = offset.checked_add(nr).ok_or_else(|| Error::OverflowError)?; } - trace!("Read bytes ({}) {}", bytes.len(), &to_hex(&bytes)); + let bytes = bytes + .get(0..offset) + .ok_or_else(|| Error::CorruptionError("Failed to trim bytes array".into()))?; + + trace!("Read bytes ({}) {}", bytes.len(), &to_hex(bytes)); // verify the id is correct let nid = bytes @@ -445,15 +451,30 @@ pub fn ptrs_from_bytes( } } else { // ptrs list is not compressed - // iterate over the read-in bytes in chunks of TRIEPTR_SIZE and store them - // to `ptrs_buf` + // iterate over the read-in bytes one pointer at a time since each encoded pointer + // can independently choose u32 or u64 storage. trace!("Node {} has uncompressed ptrs", cleared_nid); - let reading_ptrs = ptr_bytes - .chunks_exact(TRIEPTR_SIZE) - .zip(ptrs_buf.iter_mut()); - for (next_ptr_bytes, ptr_slot) in reading_ptrs { - *ptr_slot = TriePtr::from_bytes(next_ptr_bytes); + let mut cursor = 0; + for ptr_slot in ptrs_buf.iter_mut() { + let ptr_id = *ptr_bytes + .get(cursor) + .ok_or_else(|| Error::CorruptionError("ptr_bytes runs short".into()))?; + *ptr_slot = TriePtr::from_bytes( + ptr_bytes + .get(cursor..) + .ok_or_else(|| Error::CorruptionError("ptr_bytes runs short".into()))?, + ); + cursor = cursor + .checked_add(TriePtr::encoded_size_for_id(ptr_id)) + .ok_or_else(|| Error::OverflowError)?; } + let seek_target = u64::try_from(cursor) + .ok() + .and_then(|c| c.checked_add(1)) + .and_then(|c| ptrs_start_disk_ptr.checked_add(c)) + .ok_or(Error::OverflowError)?; + r.seek(SeekFrom::Start(seek_target)) + .inspect_err(|e| error!("Failed to seek to the end of the uncompressed ptrs: {e:?}"))?; } Ok(clear_compressed(*nid)) @@ -568,8 +589,7 @@ pub fn read_node_hash_bytes( f: &mut F, ptr: &TriePtr, ) -> Result<[u8; TRIEHASH_ENCODED_SIZE], Error> { - f.seek(SeekFrom::Start(ptr.ptr() as u64)) - .map_err(Error::IOError)?; + f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?; read_hash_bytes(f) } @@ -601,8 +621,7 @@ pub fn read_nodetype( f: &mut F, ptr: &TriePtr, ) -> Result<(TrieNodeType, TrieHash), Error> { - f.seek(SeekFrom::Start(ptr.ptr() as u64)) - .map_err(Error::IOError)?; + f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?; trace!("read_nodetype at {:?}", ptr); read_nodetype_at_head(f, ptr.id()) } @@ -615,8 +634,7 @@ pub fn read_nodetype_nohash( f: &mut F, ptr: &TriePtr, ) -> Result { - f.seek(SeekFrom::Start(ptr.ptr() as u64)) - .map_err(Error::IOError)?; + f.seek(SeekFrom::Start(ptr.ptr())).map_err(Error::IOError)?; trace!("read_nodetype_nohash at {:?}", ptr); read_nodetype_at_head_nohash(f, ptr.id()) } diff --git a/stackslib/src/chainstate/stacks/index/file.rs b/stackslib/src/chainstate/stacks/index/file.rs index a23cdf9cbff..76d187fdb7b 100644 --- a/stackslib/src/chainstate/stacks/index/file.rs +++ b/stackslib/src/chainstate/stacks/index/file.rs @@ -106,6 +106,15 @@ impl TrieFile { } } + /// Durably sync blob data to disk. + /// No-op for RAM-backed TrieFiles. + pub fn sync_data(&mut self) -> Result<(), io::Error> { + if let TrieFile::Disk(ref mut data) = self { + data.fd.sync_data()?; + } + Ok(()) + } + /// Get a copy of the path to this TrieFile. /// If in RAM, then the path will be ":memory:" pub fn get_path(&self) -> String { @@ -326,8 +335,7 @@ impl<'a> TrieFileNodeHashReader<'a> { impl NodeHashReader for TrieFileNodeHashReader<'_> { fn read_node_hash_bytes(&mut self, ptr: &TriePtr, w: &mut W) -> Result<(), Error> { let trie_offset = self.file.get_trie_offset(self.db, self.block_id)?; - self.file - .seek(SeekFrom::Start(trie_offset + (ptr.ptr() as u64)))?; + self.file.seek(SeekFrom::Start(trie_offset + (ptr.ptr())))?; let hash_buff = read_hash_bytes(self.file)?; w.write_all(&hash_buff).map_err(|e| e.into()) } @@ -362,7 +370,7 @@ impl TrieFile { ptr: &TriePtr, ) -> Result { let offset = self.get_trie_offset(db, block_id)?; - self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?; + self.seek(SeekFrom::Start(offset + (ptr.ptr())))?; let hash_buff = read_hash_bytes(self)?; Ok(TrieHash(hash_buff)) } @@ -376,7 +384,7 @@ impl TrieFile { ptr: &TriePtr, ) -> Result<(TrieNodeType, TrieHash), Error> { let offset = self.get_trie_offset(db, block_id)?; - self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?; + self.seek(SeekFrom::Start(offset + (ptr.ptr())))?; read_nodetype_at_head(self, ptr.id()) } @@ -388,7 +396,7 @@ impl TrieFile { ptr: &TriePtr, ) -> Result { let offset = self.get_trie_offset(db, block_id)?; - self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?; + self.seek(SeekFrom::Start(offset + (ptr.ptr())))?; read_nodetype_at_head_nohash(self, ptr.id()) } @@ -401,7 +409,7 @@ impl TrieFile { ptr: &TriePtr, ) -> Result { let (offset, _length) = trie_sql::get_external_trie_offset_length_by_bhh(db, bhh)?; - self.seek(SeekFrom::Start(offset + (ptr.ptr() as u64)))?; + self.seek(SeekFrom::Start(offset + (ptr.ptr())))?; let hash_buff = read_hash_bytes(self)?; Ok(TrieHash(hash_buff)) } @@ -443,10 +451,7 @@ impl TrieFile { self.seek(SeekFrom::Start(offset))?; self.write_all(buf)?; self.flush()?; - - if let TrieFile::Disk(ref mut data) = self { - data.fd.sync_data()?; - } + self.sync_data()?; Ok(offset) } } diff --git a/stackslib/src/chainstate/stacks/index/marf.rs b/stackslib/src/chainstate/stacks/index/marf.rs index 9a12f23f121..a34c5674c9e 100644 --- a/stackslib/src/chainstate/stacks/index/marf.rs +++ b/stackslib/src/chainstate/stacks/index/marf.rs @@ -13,6 +13,8 @@ // // You should have received a copy of the GNU General Public License // along with this program. If not, see . +#[cfg(test)] +use std::collections::HashSet; use std::ops::DerefMut; #[cfg(any(test, feature = "testing"))] use std::sync::LazyLock; @@ -24,6 +26,9 @@ use rusqlite::{Connection, Transaction}; use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; use stacks_common::util::hash::Sha512Trunc256Sum; +pub use super::squash::{ + SquashStats, MARF_SQUASHED_BLOCK_ROOT_HASH_KEY, MARF_SQUASH_HEIGHT_KEY, MARF_SQUASH_ROOT_KEY, +}; use super::storage::ReopenedTrieStorageConnection; use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash}; use crate::chainstate::stacks::index::node::{ @@ -31,10 +36,13 @@ use crate::chainstate::stacks::index::node::{ TrieCursor, TrieNode256, TrieNodeID, TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::storage::{ - TrieFileStorage, TrieHashCalculationMode, TrieStorageConnection, TrieStorageTransaction, + SquashInfo, TrieFileStorage, TrieHashCalculationMode, TrieStorageConnection, + TrieStorageTransaction, }; use crate::chainstate::stacks::index::trie::Trie; -use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof}; +use crate::chainstate::stacks::index::{ + trie_sql, Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof, +}; use crate::util_lib::db::Error as db_error; pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT"; @@ -101,7 +109,7 @@ pub struct MARF { } pub struct MarfTransaction<'a, T: MarfTrieId> { - storage: TrieStorageTransaction<'a, T>, + pub(crate) storage: TrieStorageTransaction<'a, T>, open_chain_tip: &'a mut Option>, } @@ -409,6 +417,42 @@ impl<'a, T: MarfTrieId> MarfTransaction<'a, T> { self.storage.sqlite_tx_mut() } + /// Commit the SQL transaction without flushing TrieRAM to disk. + /// + /// Used by `squash_to_path` which writes the blob directly, bypassing + /// the normal TrieRAM flush path. + pub(crate) fn commit_squash(mut self) -> Result<(), Error> { + if self.storage.readonly() { + return Err(Error::ReadOnlyError); + } + self.open_chain_tip.take(); + self.storage.drop_extending_trie(); + self.storage.commit_tx(); + Ok(()) + } + + /// Set squash metadata on the underlying storage connection. + /// + /// Called during `squash_to_path` so the ancestor-hash computation can + /// use stored root hashes instead of opening pruned historical blocks. + pub(crate) fn set_squash_info(&mut self, info: Option) { + self.storage.set_squash_info(info); + } + + /// Write a trie node directly to the uncommitted TrieRAM at `slot`. + /// + /// Used by `squash_to_path` to populate the TrieRAM with a + /// structure-preserving deep copy of the source trie, bypassing the + /// normal walk-cow insertion path. + pub(crate) fn write_node_direct( + &mut self, + slot: u64, + node: &TrieNodeType, + hash: TrieHash, + ) -> Result<(), Error> { + self.storage.write_nodetype(slot, node, hash) + } + /// Reopen this MARF transaction with readonly storage. /// NOTE: any pending operations in the SQLite transaction _will not_ /// have materialized in the reopened view. @@ -596,6 +640,22 @@ impl<'a, T: MarfTrieId> MarfTransaction<'a, T> { Ok(()) } + pub fn insert_raw(&mut self, path: TrieHash, marf_leaf: TrieLeaf) -> Result<(), Error> { + if self.storage.readonly() { + return Err(Error::ReadOnlyError); + } + let block_hash = match self.open_chain_tip { + None => Err(Error::WriteNotBegunError), + Some(WriteChainTip { ref block_hash, .. }) => Ok(block_hash.clone()), + }?; + + let (cur_block_hash, cur_block_id) = self.storage.get_cur_block_and_id(); + let result = MARF::insert_leaf(&mut self.storage, &block_hash, &path, &marf_leaf); + self.storage + .open_block_maybe_id(&cur_block_hash, cur_block_id)?; + result + } + /// Begin extending the MARF to an unconfirmed trie. The resulting trie will have a block hash /// equal to MARF::make_unconfirmed_block_hash(chain_tip) to avoid collision /// and block hash reuse. @@ -746,16 +806,17 @@ impl MARF { } } + /// Copy a node forward from an ancestor trie by converting its inline children into + /// back-pointers. Returns the node hash (leaf hash for leaves, empty hash for internal + /// nodes whose hash will be computed at commit time). fn node_copy_update(node: &mut TrieNodeType, child_block_id: u32) -> TrieHash { - let hash = match node { + match node { TrieNodeType::Leaf(leaf) => get_leaf_hash(leaf), _ => { node_copy_update_ptrs(node.ptrs_mut(), child_block_id); TrieHash::EMPTY } - }; - - hash + } } /// Given a node, and the chr of one of its children, go find the last instance of that child in @@ -1317,6 +1378,16 @@ impl MARF { } } + // In a squashed MARF, OWN_BLOCK_HEIGHT_KEY returns the squash + // height H for every block in the squashed range. Use the + // side-table when available. + if storage.squash_info().is_some() { + if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)? + { + return Ok(Some(h)); + } + } + let marf_value = if block_hash == current_block_hash { MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)? } else { @@ -1659,6 +1730,21 @@ impl MARF { self.storage.connection() } + /// Build the set of trusted squash trie root-node hashes from this + /// MARF's squash metadata. Returns an empty set for archival + /// (non-squashed) MARFs. + #[cfg(test)] + pub fn trusted_squash_node_hashes(&self) -> HashSet { + let mut set = HashSet::new(); + if let Some(info) = self.storage.squash_info() { + let h = info.squash_root_node_hash; + if h != TrieHash::from_data(&[]) { + set.insert(h); + } + } + set + } + #[cfg(test)] pub fn borrow_storage_transaction(&mut self) -> TrieStorageTransaction<'_, T> { self.storage.transaction().unwrap() @@ -1726,7 +1812,7 @@ impl MARF { } } -// --- Leaf traversal ----------------------------------------------------------- +// Leaf traversal impl MARF { /// Walk all leaves in the trie at `block_hash`, yielding full paths and values. @@ -1736,13 +1822,13 @@ impl MARF { pub(crate) fn for_each_leaf( storage: &mut TrieStorageConnection, block_hash: &T, - handle_leaf: F, + mut handle_leaf: F, ) -> Result where - F: Fn(TrieHash, MARFValue) -> Result<(), Error>, + F: FnMut(TrieHash, MARFValue) -> Result<(), Error>, { let (original_block_hash, original_block_id) = storage.get_cur_block_and_id(); - let result = Self::inner_each_leaf(storage, block_hash, &handle_leaf); + let result = Self::inner_each_leaf(storage, block_hash, &mut handle_leaf); storage .open_block_maybe_id(&original_block_hash, original_block_id) @@ -1762,10 +1848,10 @@ impl MARF { fn inner_each_leaf( storage: &mut TrieStorageConnection, block_hash: &T, - handle_leaf: &F, + handle_leaf: &mut F, ) -> Result where - F: Fn(TrieHash, MARFValue) -> Result<(), Error>, + F: FnMut(TrieHash, MARFValue) -> Result<(), Error>, { storage.open_block(block_hash)?; let (root_node, _root_hash) = Trie::read_root(storage)?; @@ -1774,11 +1860,11 @@ impl MARF { let mut stack: Vec<(TriePtr, Vec, T, Option)> = Vec::new(); // Process a node: emit leaf or push children onto the stack. - let process_node = |node: TrieNodeType, - prefix: Vec, - block_hash: T, - block_id: Option, - stack: &mut Vec<(TriePtr, Vec, T, Option)>| + let mut process_node = |node: TrieNodeType, + prefix: Vec, + block_hash: T, + block_id: Option, + stack: &mut Vec<(TriePtr, Vec, T, Option)>| -> Result { let mut full_prefix = prefix; full_prefix.extend_from_slice(node.path_bytes()); diff --git a/stackslib/src/chainstate/stacks/index/mod.rs b/stackslib/src/chainstate/stacks/index/mod.rs index 5c2d023c752..88a33d3d107 100644 --- a/stackslib/src/chainstate/stacks/index/mod.rs +++ b/stackslib/src/chainstate/stacks/index/mod.rs @@ -33,6 +33,7 @@ pub mod marf; pub mod node; pub mod profile; pub mod proofs; +pub mod squash; pub mod storage; pub mod trie; pub mod trie_sql; diff --git a/stackslib/src/chainstate/stacks/index/node.rs b/stackslib/src/chainstate/stacks/index/node.rs index 2b3f44ca999..64bdc60808b 100644 --- a/stackslib/src/chainstate/stacks/index/node.rs +++ b/stackslib/src/chainstate/stacks/index/node.rs @@ -53,11 +53,11 @@ impl error::Error for CursorError { } // All numeric values of a Trie node when encoded. -// They are all 6-bit numbers -// * the 8th bit is used to indicate whether or not the value -// identifies a back-pointer to be followed. -// * the 7th bit is used to indicate whether or not the ptrs -// are compressed. This bit is cleared on read. +// They are all 4-bit numbers (values 0-6) +// * the 8th bit (0x80) indicates a back-pointer to be followed +// * the 7th bit (0x40) indicates the ptrs are compressed. Cleared on read. +// * the 6th bit (0x20) indicates the ptr offset is encoded as u64, instead of u32. Cleared on read. +// * the 5th bit (0x10) indicates a compressed inline pointer contains a back_block payload. Cleared on read. define_u8_enum!(TrieNodeID { Empty = 0, Leaf = 1, @@ -98,9 +98,46 @@ pub fn clear_compressed(id: u8) -> u8 { id & 0xbf } -/// Clear all control bits (backptr and compressed) +/// Is this compressed inline pointer flagged to carry `back_block` payload bytes? +/// This bit is wire-format-only metadata and is cleared after decoding. +pub fn has_inline_back_block(id: u8) -> bool { + id & 0x10 != 0 +} + +/// Set the compressed inline `back_block` payload bit. +pub fn set_inline_back_block(id: u8) -> u8 { + id | 0x10 +} + +/// Clear the compressed inline `back_block` payload bit. +pub fn clear_inline_back_block(id: u8) -> u8 { + id & 0xef +} + +/// True if a compressed pointer with this encoded id includes a back_block payload. +#[inline] +fn has_back_block_payload_bytes(id: u8) -> bool { + is_backptr(id) || has_inline_back_block(id) +} + +/// Is this pointer encoded with a u64 offset? +pub const fn is_u64_ptr(id: u8) -> bool { + id & 0x20 != 0 +} + +/// Set the u64-pointer bit +pub const fn set_u64_ptr(id: u8) -> u8 { + id | 0x20 +} + +/// Clear the u64-pointer bit +pub const fn clear_u64_ptr(id: u8) -> u8 { + id & 0xdf +} + +/// Clear all control bits (backptr, compressed, u64-pointer, annotation) pub fn clear_ctrl_bits(id: u8) -> u8 { - id & 0x3f + id & 0x0f } // Byte writing operations for pointer lists, paths. @@ -370,18 +407,30 @@ impl ConsensusSerializable for T { } } -/// Child pointer +/// Child pointer within a MARF trie node. +/// +/// `back_block` has two modes depending on the backptr flag in `id`: +/// +/// * Back-pointer (`id & 0x80 != 0`): the child lives in a different block's trie. +/// `back_block` is the `marf_data` row ID of that block, and `ptr` is the byte offset +/// within that block's trie storage. +/// +/// * Inline (`id & 0x80 == 0`): the child lives in the same trie storage. +/// `back_block` is normally 0. In a squashed MARF, a non-zero `back_block` is a +/// squash annotation: it records the original archival block ID. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TriePtr { - pub id: u8, // ID of the child. Will have bit 0x80 set if the child is a back-pointer (in which case, back_block will be nonzero) - pub chr: u8, // Path character at which this child resides - pub ptr: u32, // Storage-specific pointer to where the child's encoded bytes can be found - pub back_block: u32, // Pointer back to the block that contains the child, if it's not in this trie + /// Node type ID of the child (see [`TrieNodeID`]). Bit 0x80 marks a back-pointer. + pub id: u8, + /// Path character at which this child resides. + pub chr: u8, + /// Byte offset of the child's encoded data within the trie storage. + pub ptr: u64, + /// Block ID of the trie containing the child. Zero for same-block inline children + /// (unless carrying a squash annotation). + pub back_block: u32, } -pub const TRIEPTR_SIZE: usize = 10; // full size of a TriePtr -pub const TRIEPTR_SIZE_COMPRESSED: usize = 6; // full size of a compressed TriePtr - pub fn ptrs_fmt(ptrs: &[TriePtr]) -> String { let mut strs = vec![]; for ptr in ptrs.iter() { @@ -409,7 +458,7 @@ impl Default for TriePtr { impl TriePtr { #[inline] - pub fn new(id: u8, chr: u8, ptr: u32) -> TriePtr { + pub fn new(id: u8, chr: u8, ptr: u64) -> TriePtr { TriePtr { id, chr, @@ -420,7 +469,7 @@ impl TriePtr { /// Create a back-pointer version of a [`TriePtr`] #[cfg(test)] - pub fn new_backptr(id: u8, chr: u8, ptr: u32, back_block: u32) -> TriePtr { + pub fn new_backptr(id: u8, chr: u8, ptr: u64, back_block: u32) -> TriePtr { TriePtr { id: set_backptr(id), chr, @@ -446,10 +495,24 @@ impl TriePtr { } #[inline] - pub fn ptr(&self) -> u32 { + pub fn ptr(&self) -> u64 { self.ptr } + /// Convert `self.ptr()` to a `u32` in-memory index, or return an error + /// if the value exceeds `u32::MAX`. + #[inline] + pub fn ptr_as_u32(&self) -> Result { + u32::try_from(self.ptr).map_err(|_| Error::OverflowError) + } + + /// Convert `self.ptr()` to a `usize` in-memory index, or return an error + /// if the value exceeds `usize::MAX`. + #[inline] + pub fn ptr_as_usize(&self) -> Result { + usize::try_from(self.ptr).map_err(|_| Error::OverflowError) + } + #[inline] pub fn back_block(&self) -> u32 { self.back_block @@ -465,19 +528,75 @@ impl TriePtr { } } + /// Return the identifier byte that will be emitted on disk for this pointer. + /// + /// This preserves the logical node kind while setting or clearing the `0x20` + /// control bit to match the encoded pointer width. + #[inline] + pub fn encoded_id(&self) -> u8 { + if self.ptr() > u64::from(u32::MAX) { + set_u64_ptr(self.id()) + } else { + clear_u64_ptr(self.id()) + } + } + + /// Return the uncompressed encoded size, in bytes, for a pointer with the + /// given on-disk identifier byte. + /// + /// The `0x20` control bit determines whether the pointer payload is encoded + /// as `u32` or `u64`. + #[inline] + pub const fn encoded_size_for_id(node_id: u8) -> usize { + 1 + 1 + if is_u64_ptr(node_id) { 8 } else { 4 } + 4 + } + + /// Return the maximum possible uncompressed encoded size for any `TriePtr`. + #[inline] + pub const fn max_encoded_size() -> usize { + Self::encoded_size_for_id(set_u64_ptr(TrieNodeID::Empty as u8)) + } + + /// Return the compressed encoded size, in bytes, for a pointer with the + /// given on-disk identifier byte. + /// + /// The `0x20` control bit determines whether the pointer payload is encoded + /// as `u32` or `u64`. + #[inline] + pub const fn encoded_size_compressed_for_id(node_id: u8) -> usize { + 1 + 1 + if is_u64_ptr(node_id) { 8 } else { 4 } + } + #[inline] pub fn write_bytes(&self, w: &mut W) -> Result<(), Error> { - w.write_all(&[self.id(), self.chr()])?; - w.write_all(&self.ptr().to_be_bytes())?; + let encoded_id = self.encoded_id(); + w.write_all(&[encoded_id, self.chr()])?; + if is_u64_ptr(encoded_id) { + w.write_all(&self.ptr().to_be_bytes())?; + } else { + let ptr32 = u32::try_from(self.ptr()).map_err(|_| Error::OverflowError)?; + w.write_all(&ptr32.to_be_bytes())?; + } w.write_all(&self.back_block().to_be_bytes())?; Ok(()) } #[inline] pub fn write_bytes_compressed(&self, w: &mut W) -> Result<(), Error> { - w.write_all(&[set_compressed(self.id()), self.chr()])?; - w.write_all(&self.ptr().to_be_bytes())?; - if is_backptr(self.id()) { + // Preserve squash annotation payload on disk for inline pointers that + // carry a non-zero back_block, without changing backptr semantics. + let mut encoded_id = set_compressed(self.encoded_id()); + if !is_backptr(self.id()) && self.back_block() != 0 { + encoded_id = set_inline_back_block(encoded_id); + } + w.write_all(&[encoded_id, self.chr()])?; + if is_u64_ptr(encoded_id) { + w.write_all(&self.ptr().to_be_bytes())?; + } else { + let ptr32 = u32::try_from(self.ptr()).map_err(|_| Error::OverflowError)?; + w.write_all(&ptr32.to_be_bytes())?; + } + if has_back_block_payload_bytes(encoded_id) { w.write_all(&self.back_block().to_be_bytes())?; } Ok(()) @@ -508,12 +627,24 @@ impl TriePtr { #[inline] #[allow(clippy::indexing_slicing)] + /// Deserialize a pointer from raw bytes using the encoded width bit. pub fn from_bytes(bytes: &[u8]) -> TriePtr { - assert!(bytes.len() >= TRIEPTR_SIZE); - let id = bytes[0]; + let encoded_id = bytes[0]; + let min_len = TriePtr::encoded_size_for_id(encoded_id); + assert!(bytes.len() >= min_len); + let id = clear_u64_ptr(encoded_id); let chr = bytes[1]; - let ptr = u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]); - let back_block = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]); + let (ptr, back_block) = if is_u64_ptr(encoded_id) { + let ptr = u64::from_be_bytes([ + bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], + ]); + let back_block = u32::from_be_bytes([bytes[10], bytes[11], bytes[12], bytes[13]]); + (ptr, back_block) + } else { + let ptr = u64::from(u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]])); + let back_block = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]); + (ptr, back_block) + }; TriePtr { id, @@ -524,19 +655,39 @@ impl TriePtr { } /// Load up this TriePtr from a slice of bytes, assuming that they represent a compressed - /// TriePtr. A TriePtr that is compressed will not have a stored `back_block` field if the - /// node ID does not have the backptr bit set. + /// TriePtr. + /// + /// A compressed TriePtr stores `back_block` bytes if either: + /// * it is a back-pointer (`is_backptr(id)`), or + /// * it is an inline pointer with back_block payload + /// (`has_inline_back_block(id)`). + /// + /// The annotation bit is wire metadata and is cleared on read. #[inline] #[allow(clippy::indexing_slicing)] pub fn from_bytes_compressed(bytes: &[u8]) -> TriePtr { - assert!(bytes.len() >= TRIEPTR_SIZE_COMPRESSED); - let id = clear_compressed(bytes[0]); + let encoded_id = clear_compressed(bytes[0]); + assert!(bytes.len() >= TriePtr::compressed_size_for_id(encoded_id)); + let id = clear_u64_ptr(clear_inline_back_block(encoded_id)); let chr = bytes[1]; - let ptr = u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]]); + let ptr = if is_u64_ptr(encoded_id) { + u64::from_be_bytes([ + bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], + ]) + } else { + u64::from(u32::from_be_bytes([bytes[2], bytes[3], bytes[4], bytes[5]])) + }; - let back_block = if is_backptr(id) { - assert!(bytes.len() >= TRIEPTR_SIZE); - u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]) + let back_block = if has_back_block_payload_bytes(encoded_id) { + // Backpointers and squash annotations append a 4-byte `back_block` after the compressed ptr payload. + let back_block_offset = TriePtr::encoded_size_compressed_for_id(encoded_id); + assert!(bytes.len() >= back_block_offset + 4); + u32::from_be_bytes([ + bytes[back_block_offset], + bytes[back_block_offset + 1], + bytes[back_block_offset + 2], + bytes[back_block_offset + 3], + ]) } else { 0 }; @@ -555,11 +706,18 @@ impl TriePtr { #[inline] pub fn read_bytes_compressed(fd: &mut R) -> Result { let id_bits: u8 = read_next(fd)?; - let id = clear_compressed(id_bits); + let encoded_id = clear_compressed(id_bits); + let id = clear_u64_ptr(clear_inline_back_block(encoded_id)); let chr: u8 = read_next(fd)?; - let ptr_be_bytes: [u8; 4] = read_next(fd)?; - let ptr = u32::from_be_bytes(ptr_be_bytes); - let back_block = if is_backptr(id) { + let ptr = if is_u64_ptr(encoded_id) { + let hi: [u8; 4] = read_next(fd)?; + let lo: [u8; 4] = read_next(fd)?; + u64::from_be_bytes([hi[0], hi[1], hi[2], hi[3], lo[0], lo[1], lo[2], lo[3]]) + } else { + let ptr_be_bytes: [u8; 4] = read_next(fd)?; + u64::from(u32::from_be_bytes(ptr_be_bytes)) + }; + let back_block = if has_back_block_payload_bytes(encoded_id) { let bytes: [u8; 4] = read_next(fd)?; u32::from_be_bytes(bytes) } else { @@ -574,21 +732,32 @@ impl TriePtr { }) } + /// Size of this TriePtr on disk. + #[inline] + pub fn encoded_size(&self) -> usize { + Self::encoded_size_for_id(self.encoded_id()) + } + /// Size of this TriePtr on disk, if compression is to be used. #[inline] pub fn compressed_size(&self) -> usize { - Self::compressed_size_for_id(self.id) + let encoded_id = self.encoded_id(); + if !is_backptr(self.id) && self.back_block != 0 { + Self::encoded_size_for_id(encoded_id) + } else { + Self::compressed_size_for_id(encoded_id) + } } /// Returns the size, in bytes, that a node occupies on disk, taking compression into account. - /// In this case, non-backpointer nodes use a smaller size (`TRIEPTR_SIZE_COMPRESSED`), - /// while backpointer nodes use the full size (`TRIEPTR_SIZE`). + /// Pointers without a `back_block` payload omit it, while backpointers and + /// inline-annotation pointers store it. #[inline] pub fn compressed_size_for_id(node_id: u8) -> usize { - if !is_backptr(node_id) { - TRIEPTR_SIZE_COMPRESSED + if !has_back_block_payload_bytes(node_id) { + Self::encoded_size_compressed_for_id(node_id) } else { - TRIEPTR_SIZE + Self::encoded_size_for_id(node_id) } } } @@ -982,7 +1151,7 @@ impl TrieNode16 { #[derive(Clone)] pub struct TrieNode48 { pub path: Vec, - indexes: [i8; 256], // indexes[i], if non-negative, is an index into ptrs. + pub(crate) indexes: [i8; 256], // indexes[i], if non-negative, is an index into ptrs. pub ptrs: [TriePtr; 48], /// If this node was created by copy-on-write, then this points to the node it was copied from. pub cowptr: Option, @@ -1229,17 +1398,19 @@ impl StacksMessageCodec for TrieNodePatch { } } -/// Turn each non-empty, non-backptr in `ptrs` into a backptr pointing at `child_block_id` +/// Turn each non-empty, non-backptr in `ptrs` into a backptr. +/// If `back_block` is already non-zero (squash annotation), it is preserved; +/// otherwise it is set to `child_block_id`. pub(crate) fn node_copy_update_ptrs(ptrs: &mut [TriePtr], child_block_id: u32) { for pointer in ptrs.iter_mut() { // if the node is empty, do nothing, if it's a back pointer, if pointer.id() == TrieNodeID::Empty as u8 || is_backptr(pointer.id()) { continue; - } else { - // make backptr + } + if pointer.back_block == 0 { pointer.back_block = child_block_id; - pointer.id = set_backptr(pointer.id()); } + pointer.id = set_backptr(pointer.id()); } } @@ -1545,15 +1716,6 @@ impl TrieNodePatch { } sz } - - /// Load a TrieNodePatch from a Read object - /// Returns Ok(Self) on success - /// Returns Err(codec_error::*) on failure to decode the bytes - /// Returns Err(IOError(..)) on disk I/O failure - pub fn from_bytes(f: &mut R) -> Result { - Self::consensus_deserialize(f) - .map_err(|e| Error::CorruptionError(format!("Codec error: {e:?}"))) - } } impl TrieNode for TrieNode4 { @@ -1679,7 +1841,6 @@ impl TrieNode for TrieNode16 { fn from_bytes(r: &mut R) -> Result { let mut ptrs_slice = [TriePtr::default(); 16]; ptrs_from_bytes(TrieNodeID::Node16 as u8, r, &mut ptrs_slice)?; - let path = path_from_bytes(r)?; Ok(TrieNode16 { diff --git a/stackslib/src/chainstate/stacks/index/squash.rs b/stackslib/src/chainstate/stacks/index/squash.rs new file mode 100644 index 00000000000..f9a01c24134 --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/squash.rs @@ -0,0 +1,1502 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//! MARF squashing: offline snapshot creation and validation. +//! +//! A squashed MARF contains only the canonical state at a given +//! height H plus the metadata needed for ancestor hash lookups and +//! block-height resolution. + +use std::collections::HashMap; +use std::fs::File; +use std::io::{BufReader, BufWriter, Read as _, Seek, SeekFrom, Write}; +use std::time::{Duration, Instant}; + +use rusqlite::{params, DatabaseName}; +use sha2::Digest as _; +use stacks_common::types::chainstate::{ + StacksBlockId, TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE, TRIEHASH_ENCODED_SIZE, +}; + +use crate::chainstate::stacks::index::bits::{ + get_leaf_hash, get_node_byte_len, write_nodetype_bytes, +}; +use crate::chainstate::stacks::index::marf::{ + MARFOpenOpts, MarfConnection, BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, MARF, +}; +use crate::chainstate::stacks::index::node::{ + clear_backptr, is_backptr, TrieNode16, TrieNode256, TrieNode4, TrieNode48, TrieNodeID, + TrieNodeType, TriePtr, +}; +use crate::chainstate::stacks::index::storage::{ + SquashInfo, TrieFileStorage, TrieStorageConnection, +}; +use crate::chainstate::stacks::index::trie::Trie; +use crate::chainstate::stacks::index::{ + trie_sql, BlockMap, Error, MARFValue, MarfTrieId, TrieHasher, TrieLeaf, +}; + +/// Classify a child pointer: resolve the `(block_id, byte_offset)` pair that +/// locates the child in blob storage. Backpointers carry the target block_id +/// directly; inline pointers belong to `origin_block_id`. +/// Returns `None` for empty pointers. +#[inline] +fn resolve_child_ptr(ptr: &TriePtr, origin_block_id: u32) -> Option<(u32, u64)> { + if ptr.id() == TrieNodeID::Empty as u8 { + return None; + } + if is_backptr(ptr.id()) { + Some((ptr.back_block(), ptr.from_backptr().ptr())) + } else { + Some((origin_block_id, ptr.ptr())) + } +} + +/// Returns `true` when a pointer is an inline child (non-empty, non-backptr) +/// — i.e. it points to a node in the same blob, not to an ancestor block. +#[inline] +fn is_inline_child_ptr(ptr: &TriePtr) -> bool { + ptr.id() != TrieNodeID::Empty as u8 && !is_backptr(ptr.id()) +} + +/// Format a `Duration` as `X.YZ secs` or `X min Y.ZW secs`. +fn fmt_duration(d: Duration) -> String { + let total_centis = d.as_millis() / 10; + let mins = total_centis / 6000; + let secs = (total_centis % 6000) as f64 / 100.0; + if mins == 0 { + format!("{secs:.2} secs") + } else { + format!("{mins} min {secs:.2} secs") + } +} + +// --------------------------------------------------------------------------- +// NodeStore: disk-backed storage for collected trie nodes. +// +// Instead of holding all 50M+ collected nodes in a giant in-memory vector, +// this stores the full node data in a temporary file and keeps only +// lightweight per-node metadata in memory (~4 GB). +// --------------------------------------------------------------------------- + +/// Tag bytes for node serialization to the temp file. +const TAG_LEAF: u8 = 0; +const TAG_NODE4: u8 = 1; +const TAG_NODE16: u8 = 2; +const TAG_NODE48: u8 = 3; +const TAG_NODE256: u8 = 4; + +/// Serialize a single `TriePtr` to the writer. +fn write_trie_ptr(w: &mut W, p: &TriePtr) -> Result<(), Error> { + w.write_all(&[p.id, p.chr])?; + w.write_all(&p.ptr.to_le_bytes())?; + w.write_all(&p.back_block.to_le_bytes())?; + Ok(()) +} + +/// Deserialize a single `TriePtr` from the reader. +fn read_trie_ptr(r: &mut R) -> Result { + let mut buf2 = [0u8; 2]; + r.read_exact(&mut buf2)?; + let mut buf8 = [0u8; 8]; + r.read_exact(&mut buf8)?; + let ptr = u64::from_le_bytes(buf8); + let mut buf4 = [0u8; 4]; + r.read_exact(&mut buf4)?; + let back_block = u32::from_le_bytes(buf4); + Ok(TriePtr { + id: buf2[0], + chr: buf2[1], + ptr, + back_block, + }) +} + +/// Serialize a `TrieNodeType` to the writer in a compact binary format. +/// Format: [tag: u8] [path_len: u32] [path bytes] [variant data] +pub(crate) fn serialize_node(w: &mut W, node: &TrieNodeType) -> Result<(), Error> { + match node { + TrieNodeType::Leaf(leaf) => { + w.write_all(&[TAG_LEAF])?; + w.write_all(&(leaf.path.len() as u32).to_le_bytes())?; + w.write_all(&leaf.path)?; + w.write_all(&leaf.data.0)?; + } + TrieNodeType::Node4(n) => { + w.write_all(&[TAG_NODE4])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node16(n) => { + w.write_all(&[TAG_NODE16])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node48(n) => { + w.write_all(&[TAG_NODE48])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + // Write the 256-byte indexes array + let indexes = n.indexes.map(|idx| idx as u8); + w.write_all(&indexes)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + TrieNodeType::Node256(n) => { + w.write_all(&[TAG_NODE256])?; + w.write_all(&(n.path.len() as u32).to_le_bytes())?; + w.write_all(&n.path)?; + for p in &n.ptrs { + write_trie_ptr(w, p)?; + } + } + } + Ok(()) +} + +/// Deserialize a `TrieNodeType` from the reader. +pub(crate) fn deserialize_node(r: &mut R) -> Result { + let mut tag = [0u8; 1]; + r.read_exact(&mut tag)?; + let mut path_len_buf = [0u8; 4]; + r.read_exact(&mut path_len_buf)?; + let path_len = u32::from_le_bytes(path_len_buf) as usize; + let mut path = vec![0u8; path_len]; + if path_len > 0 { + r.read_exact(&mut path)?; + } + + match tag[0] { + TAG_LEAF => { + let mut data = [0u8; 40]; + r.read_exact(&mut data)?; + Ok(TrieNodeType::Leaf(TrieLeaf { + path, + data: MARFValue(data), + })) + } + TAG_NODE4 => { + let mut ptrs = [TriePtr::default(); 4]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node4(TrieNode4 { + path, + ptrs, + cowptr: None, + patches: vec![], + })) + } + TAG_NODE16 => { + let mut ptrs = [TriePtr::default(); 16]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node16(TrieNode16 { + path, + ptrs, + cowptr: None, + patches: vec![], + })) + } + TAG_NODE48 => { + let mut indexes_u8 = [0u8; 256]; + r.read_exact(&mut indexes_u8)?; + let indexes = indexes_u8.map(|idx| idx as i8); + let mut ptrs = [TriePtr::default(); 48]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node48(Box::new(TrieNode48 { + path, + indexes, + ptrs, + cowptr: None, + patches: vec![], + }))) + } + TAG_NODE256 => { + let mut ptrs = [TriePtr::default(); 256]; + for p in ptrs.iter_mut() { + *p = read_trie_ptr(r)?; + } + Ok(TrieNodeType::Node256(Box::new(TrieNode256 { + path, + ptrs, + cowptr: None, + patches: vec![], + }))) + } + _ => Err(Error::CorruptionError(format!( + "NodeStore: invalid tag byte {0}", + tag[0] + ))), + } +} + +/// Disk-backed store for collected trie nodes. +/// +/// Full node data is serialized to a temporary file. Only lightweight +/// per-node metadata (hash, block_id, file offset) is kept in memory. +pub(crate) struct NodeStore { + /// Temp file holding serialized nodes (write handle). + writer: BufWriter, + /// Path to the temp file (for re-opening as reader). + pub(crate) path: std::path::PathBuf, + /// Byte offset in the temp file for each node. + pub(crate) file_offsets: Vec, + /// Per-node hash. + hashes: Vec, + /// Per-node origin block ID. + block_ids: Vec, +} + +impl NodeStore { + pub(crate) fn new(dir: &str) -> Result { + let pid = std::process::id(); + // Try up to 16 times with atomic create_new to avoid collision. + for attempt in 0u32..16 { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + let path = std::path::PathBuf::from(format!( + "{}/.squash_nodes_{pid}_{nanos}_{attempt}.tmp", + dir + )); + match File::options().write(true).create_new(true).open(&path) { + Ok(file) => { + return Ok(NodeStore { + writer: BufWriter::with_capacity(1 << 20, file), + path, + file_offsets: Vec::new(), + hashes: Vec::new(), + block_ids: Vec::new(), + }); + } + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => continue, + Err(e) => return Err(Error::IOError(e)), + } + } + Err(Error::IOError(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + "failed to create unique NodeStore temp file after 16 attempts", + ))) + } + + pub(crate) fn len(&self) -> usize { + self.file_offsets.len() + } + + /// Append a node. Returns the node's index. + pub(crate) fn push( + &mut self, + node: &TrieNodeType, + hash: TrieHash, + block_id: u32, + ) -> Result { + let idx = self.file_offsets.len(); + let offset = self.writer.stream_position().map_err(Error::IOError)?; + self.file_offsets.push(offset); + self.hashes.push(hash); + self.block_ids.push(block_id); + serialize_node(&mut self.writer, node)?; + Ok(idx) + } + + /// Flush the writer and return a sequential reader over all nodes. + pub(crate) fn finish_writing(&mut self) -> Result<(), Error> { + self.writer.flush().map_err(Error::IOError)?; + Ok(()) + } + + /// Open a reader for random-access reads. + pub(crate) fn open_reader(&self) -> Result, Error> { + let file = File::open(&self.path).map_err(Error::IOError)?; + Ok(BufReader::with_capacity(1 << 20, file)) + } + + /// Read a node from the temp file using the given reader. + pub(crate) fn read_node_with( + &self, + reader: &mut BufReader, + idx: usize, + ) -> Result { + let offset = *self.file_offsets.get(idx).ok_or_else(|| { + Error::CorruptionError(format!("NodeStore: index {idx} out of bounds")) + })?; + reader + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + deserialize_node(reader) + } + + pub(crate) fn hash(&self, idx: usize) -> TrieHash { + self.hashes.get(idx).copied().unwrap_or_else(|| { + panic!( + "NodeStore::hash: index {idx} out of bounds (len={})", + self.hashes.len() + ) + }) + } + + pub(crate) fn set_hash(&mut self, idx: usize, hash: TrieHash) { + if let Some(slot) = self.hashes.get_mut(idx) { + *slot = hash; + } else { + panic!( + "NodeStore::set_hash: index {idx} out of bounds (len={})", + self.hashes.len() + ); + } + } + + pub(crate) fn block_id(&self, idx: usize) -> u32 { + self.block_ids.get(idx).copied().unwrap_or_else(|| { + panic!( + "NodeStore::block_id: index {idx} out of bounds (len={})", + self.block_ids.len() + ) + }) + } + + /// Drop the block_ids Vec to free memory after remap. + fn drop_block_ids(&mut self) { + self.block_ids = Vec::new(); + } + + /// Clean up the temp file. + fn cleanup(&self) { + let _ = std::fs::remove_file(&self.path); + } +} + +impl Drop for NodeStore { + fn drop(&mut self) { + self.cleanup(); + } +} + +/// Remap child pointers in a `NodeStore` for the squashed trie layout. +/// +/// For each non-leaf node, reads it from the temp file, remaps its child +/// pointers from source (block_id, offset) to sequential indices, and +/// writes the modified node back. +/// +/// When `block_id_map` is `Some`, each child's `back_block` is set to the +/// squashed equivalent of its origin block (needed for the real squash blob +/// so that COW and hash computation preserve block identity). When `None`, +/// `back_block` is zeroed (used by `recompute_squash_root_node_hash` where +/// block identity is irrelevant). +fn remap_child_ptrs( + store: &mut NodeStore, + source_to_idx: &HashMap<(u32, u64), usize>, + block_id_map: Option<&HashMap>, + label: &str, +) -> Result<(), Error> { + let remap_start = Instant::now(); + let node_count = store.len(); + let mut reader = store.open_reader()?; + + let write_file = std::fs::OpenOptions::new() + .write(true) + .open(&store.path) + .map_err(Error::IOError)?; + let mut writer = BufWriter::with_capacity(1 << 20, write_file); + + for idx in 0..node_count { + if idx > 0 && idx % 1_000_000 == 0 { + info!( + "[{label}] Remap trie pointers: {idx}/{node_count} nodes in {}", + fmt_duration(remap_start.elapsed()) + ); + } + + let mut node = store.read_node_with(&mut reader, idx)?; + let origin_block_id = store.block_id(idx); + + if node.is_leaf() { + continue; + } + + let ptrs = node.ptrs_mut(); + let mut modified = false; + for ptr in ptrs.iter_mut() { + let Some((child_block_id, read_ptr_val)) = resolve_child_ptr(ptr, origin_block_id) + else { + continue; + }; + + let source_key = (child_block_id, read_ptr_val); + let child_idx = *source_to_idx.get(&source_key).ok_or_else(|| { + Error::CorruptionError(format!( + "remap_child_ptrs: child {source_key:?} not in source_to_idx" + )) + })?; + + ptr.ptr = child_idx as u64; + ptr.id = clear_backptr(ptr.id); + + ptr.back_block = match block_id_map { + Some(map) => *map.get(&child_block_id).ok_or_else(|| { + Error::CorruptionError(format!( + "remap_child_ptrs: block_id {child_block_id} not in block_id_map" + )) + })?, + None => 0, + }; + modified = true; + } + + if modified { + let offset = *store.file_offsets.get(idx).ok_or_else(|| { + Error::CorruptionError(format!("remap: file_offsets index {idx} out of bounds")) + })?; + writer + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + serialize_node(&mut writer, &node)?; + } + } + writer.flush().map_err(Error::IOError)?; + + info!( + "[{label}] Remap trie pointers complete: {node_count} nodes in {}", + fmt_duration(remap_start.elapsed()) + ); + Ok(()) +} + +/// Recompute content hashes using a `NodeStore`. +/// +/// Leaf hashes are computed by reading each leaf from the temp file. +/// Internal node hashes are computed bottom-up (reverse order) using +/// the in-memory hashes Vec for child lookups and reading the node +/// structure from the temp file. +fn recompute_content_hashes(store: &mut NodeStore) -> Result<(), Error> { + let empty_hash = TrieHash::from_data(&[]); + let node_count = store.len(); + let mut reader = store.open_reader()?; + let start = Instant::now(); + + // Pass 1: compute leaf hashes + for idx in 0..node_count { + let node = store.read_node_with(&mut reader, idx)?; + if let TrieNodeType::Leaf(ref leaf) = node { + store.set_hash(idx, get_leaf_hash(leaf)); + } + } + info!( + "Trie hash: leaf pass done in {}", + fmt_duration(start.elapsed()) + ); + + // Pass 2: internal nodes in reverse order + for idx in (0..node_count).rev() { + let node = store.read_node_with(&mut reader, idx)?; + if node.is_leaf() { + continue; + } + + // Collect child hashes + let ptrs = node.ptrs(); + let mut child_hashes = Vec::with_capacity(ptrs.len()); + for child_ptr in ptrs { + if !is_inline_child_ptr(child_ptr) { + child_hashes.push(empty_hash); + } else { + let child_idx = child_ptr.ptr() as usize; + if child_idx >= node_count { + return Err(Error::CorruptionError(format!( + "Invalid child index {child_idx} at node {idx}" + ))); + } + child_hashes.push(store.hash(child_idx)); + } + } + + let new_hash = compute_node_hash(&node, &child_hashes); + store.set_hash(idx, new_hash); + } + + info!( + "Trie hash: {node_count} nodes in {}", + fmt_duration(start.elapsed()) + ); + Ok(()) +} + +/// Replace array-index child pointers in `node` with the corresponding +/// blob byte offsets from `blob_offsets`. Only forward (non-back, non-empty) +/// pointers are remapped. +pub(crate) fn remap_ptrs_to_blob_offsets( + node: &mut TrieNodeType, + blob_offsets: &[u64], +) -> Result<(), Error> { + if node.is_leaf() { + return Ok(()); + } + for ptr in node.ptrs_mut() { + if is_inline_child_ptr(ptr) { + let child_idx = ptr.ptr() as usize; + ptr.ptr = *blob_offsets.get(child_idx).ok_or_else(|| { + Error::CorruptionError(format!( + "blob offset remap: child index {child_idx} out of bounds" + )) + })?; + } + } + Ok(()) +} + +/// Compute per-node byte offsets within the serialized blob. +/// +/// Returns `(blob_offsets, total_size)` where `blob_offsets[i]` is the byte +/// position where node `i` starts in the blob (after the header). +pub(crate) fn compute_blob_offsets(store: &mut NodeStore) -> Result<(Vec, u64), Error> { + compute_blob_offsets_inner(store, u32::MAX as u64) +} + +/// Inner implementation with a configurable early-exit threshold. +/// When `current_offset <= early_exit_threshold` after pass 1, the fixpoint +/// loop is skipped because no pointer will switch to u64 encoding. +pub(crate) fn compute_blob_offsets_inner( + store: &mut NodeStore, + early_exit_threshold: u64, +) -> Result<(Vec, u64), Error> { + let n = store.len(); + let mut reader = store.open_reader()?; + let header_size = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + let mut blob_offsets: Vec = Vec::with_capacity(n); + let mut current_offset = header_size; + let mut forward_ptr_count: usize = 0; + + // Per-node byte lengths cached during Pass 1. For nodes without + // forward pointers the length is constant across fixpoint passes, + // so we can skip re-reading them from disk entirely. + let mut byte_lens: Vec = Vec::with_capacity(n); + // True when a node has forward pointers (must be re-read in fixpoint). + let mut has_forward_ptrs: Vec = Vec::with_capacity(n); + + // Pass 1: compute offsets using original (array-index) pointer values. + for idx in 0..n { + blob_offsets.push(current_offset); + let node = store.read_node_with(&mut reader, idx)?; + let mut has_fwd = false; + if !node.is_leaf() { + for ptr in node.ptrs() { + if is_inline_child_ptr(ptr) { + forward_ptr_count = forward_ptr_count + .checked_add(1) + .ok_or(Error::OverflowError)?; + has_fwd = true; + } + } + } + has_forward_ptrs.push(has_fwd); + let byte_len = get_node_byte_len(&node) as u64; + byte_lens.push(byte_len); + current_offset += byte_len; + } + + // If the blob fits in 4 GiB, no pointer will switch to u64 encoding. + if current_offset <= early_exit_threshold { + return Ok((blob_offsets, current_offset)); + } + + // Pass 2+: recompute with blob-offset pointer values until stable. + // Each forward pointer widens from u32 to u64 at most once, so + // `forward_ptr_count + 2` bounds convergence (same as dump_consume). + let max_passes = forward_ptr_count.saturating_add(2); + let mut converged = false; + for _ in 0..max_passes { + let prev_total = current_offset; + current_offset = header_size; + + for idx in 0..n { + // Temporary mutable borrow - released at the semicolon so + // `remap_ptrs_to_blob_offsets` can borrow `blob_offsets` immutably. + *blob_offsets.get_mut(idx).ok_or_else(|| { + Error::CorruptionError("blob offset index out of bounds".into()) + })? = current_offset; + + let has_fwd = *has_forward_ptrs.get(idx).ok_or_else(|| { + Error::CorruptionError("has_forward_ptrs index out of bounds".into()) + })?; + if has_fwd { + let mut node = store.read_node_with(&mut reader, idx)?; + remap_ptrs_to_blob_offsets(&mut node, &blob_offsets)?; + *byte_lens.get_mut(idx).ok_or_else(|| { + Error::CorruptionError("byte_lens index out of bounds".into()) + })? = get_node_byte_len(&node) as u64; + } + + current_offset += *byte_lens + .get(idx) + .ok_or_else(|| Error::CorruptionError("byte_lens index out of bounds".into()))?; + } + + if current_offset == prev_total { + converged = true; + break; + } + } + if !converged { + return Err(Error::CorruptionError(format!( + "compute_blob_offsets layout did not converge after {max_passes} passes" + ))); + } + + Ok((blob_offsets, current_offset)) +} + +/// Stream the squash blob into an arbitrary `Write + Seek` sink. +/// +/// Reads nodes one-at-a-time from the NodeStore temp file, converts +/// array-index child pointers to byte offsets, and serializes directly +/// into `sink`. No intermediate `Vec` is allocated for the full blob. +/// +/// The blob is written starting at the sink's current position. +/// All internal offsets (header, node pointers) are relative to the blob +/// start, not to the absolute file position, so this works correctly when +/// appending to a `.blobs` file that already contains data. +/// +/// Returns the number of bytes written. +pub(crate) fn stream_squash_blob( + store: &mut NodeStore, + parent_hash: &T, + blob_offsets: &[u64], + sink: &mut F, +) -> Result { + let n = store.len(); + let mut reader = store.open_reader()?; + + // Record the base offset so all writes are relative to blob start. + let base = sink.stream_position().map_err(Error::IOError)?; + + // Write header: parent block hash + zero identifier + sink.write_all(parent_hash.as_bytes()) + .map_err(Error::IOError)?; + sink.seek(SeekFrom::Start( + base + BLOCK_HEADER_HASH_ENCODED_SIZE as u64, + )) + .map_err(Error::IOError)?; + sink.write_all(&0u32.to_le_bytes()) + .map_err(Error::IOError)?; + + for idx in 0..n { + let mut node = store.read_node_with(&mut reader, idx)?; + let hash = store.hash(idx); + + // Convert array-index pointers to byte offsets (relative to blob start) + remap_ptrs_to_blob_offsets(&mut node, blob_offsets)?; + + write_nodetype_bytes(sink, &node, hash)?; + } + + let end = sink.stream_position().map_err(Error::IOError)?; + Ok(end - base) +} + +/// Per-height block metadata: `(height, block_hash, root_hash)`. +type BlockInfo = (u32, T, TrieHash); + +/// Reads root hashes from either an external `.blobs` file or from SQLite +/// internal `marf_data.data` BLOB columns. +enum BlobReader { + External(BufReader), + Internal(rusqlite::Connection), +} + +impl BlobReader { + fn new(db_path: &str, external_blobs: bool) -> Result { + if external_blobs { + let blobs_path = format!("{db_path}.blobs"); + let file = File::open(&blobs_path).map_err(Error::IOError)?; + Ok(BlobReader::External(BufReader::with_capacity( + 64 * 1024, + file, + ))) + } else { + let conn = rusqlite::Connection::open_with_flags( + db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY, + )?; + Ok(BlobReader::Internal(conn)) + } + } + + /// Read the root hash for a block. + /// + /// For `External`, seeks to `blob_offset + root_ptr_offset` in the `.blobs` file. + /// For `Internal`, opens the SQLite blob for `block_id` and seeks within it. + fn read_root_hash(&mut self, block_id: u32, blob_offset: u64) -> Result { + let root_ptr_offset = (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4; + let mut hash_bytes = [0u8; TRIEHASH_ENCODED_SIZE]; + match self { + BlobReader::External(reader) => { + reader.seek(SeekFrom::Start(blob_offset + root_ptr_offset))?; + reader.read_exact(&mut hash_bytes)?; + } + BlobReader::Internal(conn) => { + let mut blob = conn.blob_open( + DatabaseName::Main, + "marf_data", + "data", + block_id.into(), + true, // readonly + )?; + blob.seek(SeekFrom::Start(root_ptr_offset))?; + blob.read_exact(&mut hash_bytes)?; + } + } + Ok(TrieHash(hash_bytes)) + } +} + +/// A `BlockMap` adapter for trie nodes that have no backpointer children. +/// +/// After the remap pass all pointers in the squash blob are inline. +/// `write_consensus_bytes` writes zeroed block hashes for non-backptr +/// children and never queries the `BlockMap`, so every method here is +/// unreachable. +struct InlineOnlyBlockMap; + +impl BlockMap for InlineOnlyBlockMap { + type TrieId = StacksBlockId; + + fn get_block_hash(&self, _id: u32) -> Result { + unreachable!("InlineOnlyBlockMap: no backpointers in squash trie") + } + fn get_block_hash_caching(&mut self, _id: u32) -> Result<&Self::TrieId, Error> { + unreachable!("InlineOnlyBlockMap: no backpointers in squash trie") + } + fn is_block_hash_cached(&self, _id: u32) -> bool { + false + } + fn get_block_id(&self, _bhh: &Self::TrieId) -> Result { + unreachable!("InlineOnlyBlockMap: no backpointers in squash trie") + } + fn get_block_id_caching(&mut self, _bhh: &Self::TrieId) -> Result { + unreachable!("InlineOnlyBlockMap: no backpointers in squash trie") + } +} + +/// Compute the content hash of a `TrieNodeType` given pre-collected child hashes. +/// +/// Equivalent to `bits::get_node_hash` but works on the `TrieNodeType` enum +/// directly (which does not implement `ConsensusSerializable`). +fn compute_node_hash(node: &TrieNodeType, child_hashes: &[TrieHash]) -> TrieHash { + let mut hasher = TrieHasher::new(); + node.write_consensus_bytes(&mut InlineOnlyBlockMap, &mut hasher) + .expect("IO failure pushing to hasher"); + for h in child_hashes { + hasher.update(h.as_ref()); + } + TrieHash(hasher.finalize().into()) +} + +fn read_proc_status_kib(field: &str) -> Option { + let status = std::fs::read_to_string("/proc/self/status").ok()?; + let line = status.lines().find(|line| line.starts_with(field))?; + let mut parts = line.split_whitespace(); + let _ = parts.next()?; + parts.next()?.parse::().ok() +} + +fn log_memory_snapshot(stage: &str) { + let rss_kib = read_proc_status_kib("VmRSS:"); + let hwm_kib = read_proc_status_kib("VmHWM:"); + + match (rss_kib, hwm_kib) { + (Some(rss), Some(hwm)) => info!( + "Squash memory ({stage}): VmRSS={} MiB, VmHWM={} MiB", + rss / 1024, + hwm / 1024 + ), + (Some(rss), None) => info!("Squash memory ({stage}): VmRSS={} MiB", rss / 1024), + _ => info!("Squash memory ({stage}): unavailable"), + } +} + +/// Key that stores the squashed root hash at the snapshot tip. +pub const MARF_SQUASH_ROOT_KEY: &str = "__MARF_SQUASH_ROOT"; +/// Key that stores the snapshot height for a squashed MARF. +pub const MARF_SQUASH_HEIGHT_KEY: &str = "__MARF_SQUASH_HEIGHT"; +/// Prefix for per-height root hashes preserved in squashed MARFs. +/// Each key has the form `__MARF_SQUASHED_BLOCK_ROOT_HASH::`. +pub const MARF_SQUASHED_BLOCK_ROOT_HASH_KEY: &str = "__MARF_SQUASHED_BLOCK_ROOT_HASH"; + +/// Summary statistics from a squashing run. +#[derive(Debug, Clone)] +pub struct SquashStats { + /// Total number of nodes collected into the squashed MARF. + pub node_count: u64, +} + +/// Summary statistics from a validation run. +/// +/// The default validation checks: +/// - Per-height root hashes stored in `marf_squash_archival_marf_roots` match the +/// archival source (guarantees correct ancestor hash computation for the +/// skip-list at blocks > H). +/// - Squash metadata (`marf_squash_info`) is present and correct. +/// - All historical `marf_data` entries share the tip block's blob offset. +/// +/// When `full_leaf_scan` is enabled, the validator additionally walks every +/// leaf in both MARFs and cross-checks them, which is O(leaf_count) and much +/// slower but useful for debugging. +#[derive(Debug, Clone)] +pub struct SquashValidationStats { + // --- Fast-path (always populated) --- + /// Whether the squashed root key was found in the SQL metadata. + pub archival_root_present: bool, + /// Whether the stored archival root hash at the squash height + /// matches the source MARF's root hash at that height. + pub archival_root_matches: bool, + /// Per-height root hashes missing from the SQL table. + pub root_hash_missing: u64, + /// Per-height root hashes with mismatched values. + pub root_hash_mismatches: u64, + /// Number of historical `marf_data` entries that do NOT share the + /// tip block's blob offset (should be 0 for a correct squash). + pub blob_offset_mismatches: u64, + /// Whether the `squash_root_node_hash` was found in SQL metadata + /// (a `TrieHash::from_data(&[])` value counts as absent). + pub squash_node_hash_present: bool, + /// Whether the stored `squash_root_node_hash` matches the value + /// recomputed from the committed squash trie blob (DFS walk + bottom-up hash). + pub squash_node_hash_matches: bool, + + // --- Full leaf scan (only populated when full_leaf_scan = true) --- + /// Total keys compared from the source MARF (0 when fast-only). + pub source_keys_checked: u64, + /// Total keys compared from the squashed MARF (0 when fast-only). + pub squashed_keys_checked: u64, + /// Keys present in source but missing in squashed (0 when fast-only). + pub missing_in_squashed: u64, + /// Keys present in squashed but missing in source (0 when fast-only). + pub missing_in_source: u64, + /// Keys present in both but with different values (0 when fast-only). + pub value_mismatches: u64, +} + +impl SquashValidationStats { + /// Returns `true` if all validation checks passed. + pub fn is_valid(&self) -> bool { + let fast_valid = self.archival_root_present + && self.archival_root_matches + && self.squash_node_hash_present + && self.squash_node_hash_matches + && self.root_hash_missing == 0 + && self.root_hash_mismatches == 0 + && self.blob_offset_mismatches == 0; + + // If a full leaf scan was performed (either direction checked any keys), + // also validate the leaf-level results. + let full_scan_performed = self.source_keys_checked > 0 || self.squashed_keys_checked > 0; + let leaf_valid = !full_scan_performed + || (self.missing_in_squashed == 0 + && self.missing_in_source == 0 + && self.value_mismatches == 0); + + fast_valid && leaf_valid + } +} + +/// Step 1: Build an in-memory block_map from all `marf_data` entries. +fn collect_block_map(src: &MARF) -> Result, Error> { + let all_blocks = trie_sql::bulk_read_block_entries::(src.sqlite_conn())?; + Ok(all_blocks + .into_iter() + .map(|(id, bh, offset)| (bh, (id, offset))) + .collect()) +} + +/// Step 2: For each height 0..=H, resolve (block_hash, root_hash) via trie +/// walk + direct blob seek. +fn collect_per_height_metadata( + src: &mut MARF, + source_tip: &T, + block_map: &HashMap, + blob_reader: &mut BlobReader, + height: u32, + label: &str, +) -> Result>, Error> { + let mut block_info: Vec> = Vec::with_capacity((height + 1) as usize); + let mut last_log = Instant::now(); + let start = Instant::now(); + + for h in 0..=height { + let h_key = format!("{BLOCK_HEIGHT_TO_HASH_MAPPING_KEY}::{h}"); + let val = src + .with_conn(|conn| MARF::::get_by_key(conn, source_tip, &h_key))? + .ok_or_else(|| { + Error::CorruptionError(format!("Missing height mapping for height {h}")) + })?; + let bh = T::from(val); + + let &(block_id, blob_offset) = block_map.get(&bh).ok_or_else(|| { + Error::CorruptionError(format!( + "Missing block map entry for block hash at height {h}" + )) + })?; + + let rh = blob_reader.read_root_hash(block_id, blob_offset)?; + + block_info.push((h, bh, rh)); + + if last_log.elapsed().as_secs() >= 30 || (h > 0 && h % 100_000 == 0) { + info!( + "[{label}] [2/8] Build height index: {}/{} heights in {}", + h + 1, + height + 1, + fmt_duration(start.elapsed()) + ); + last_log = Instant::now(); + } + } + info!( + "[{label}] [2/8] Build height index: {} heights in {}", + height + 1, + fmt_duration(start.elapsed()) + ); + + Ok(block_info) +} + +/// Step 4: Bulk-insert `marf_data` placeholder rows for blocks 0..H-1. +/// +/// Returns a mapping from archival block_id to squashed block_id. +fn insert_placeholder_blocks( + conn: &rusqlite::Connection, + block_info: &[BlockInfo], + block_at_height: &T, + block_map: &HashMap, + label: &str, +) -> Result, Error> { + let start = Instant::now(); + let mut archival_to_squashed: HashMap = HashMap::new(); + let mut stmt = conn.prepare(PLACEHOLDER_INSERT_SQL)?; + for (h, bh, _) in block_info { + if bh == block_at_height { + continue; + } + let (archival_id, _) = block_map.get(bh).ok_or(Error::NotFoundError)?; + let empty_blob: &[u8] = &[]; + let squashed_id: u32 = stmt + .insert(params![bh.to_string(), empty_blob, 0i64, 0i64])? + .try_into() + .expect("block_id overflow"); + archival_to_squashed.insert(*archival_id, squashed_id); + if *h % 100_000 == 0 && *h > 0 { + info!( + "[{label}] [4/8] Register placeholder blocks: {h} of {} in {}", + block_info.len(), + fmt_duration(start.elapsed()) + ); + } + } + info!( + "[{label}] [4/8] Register placeholder blocks: {} entries in {}", + archival_to_squashed.len(), + fmt_duration(start.elapsed()) + ); + Ok(archival_to_squashed) +} + +/// SQL used to insert an empty placeholder row into `marf_data`. +const PLACEHOLDER_INSERT_SQL: &str = + "INSERT INTO marf_data (block_hash, data, unconfirmed, external_offset, external_length) \ + VALUES (?1, ?2, 0, ?3, ?4)"; + +/// Step 6: Write all squash SQL metadata in one transaction scope. +fn persist_squash_metadata( + conn: &rusqlite::Connection, + block_info: &[BlockInfo], + source_root_hash: &TrieHash, + height: u32, +) -> Result<(), Error> { + let start = Instant::now(); + trie_sql::write_squash_info(conn, source_root_hash, height)?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO marf_squash_archival_marf_roots (height, marf_root_hash) VALUES (?1, ?2)", + )?; + let mut stmt_bh = conn.prepare( + "INSERT OR REPLACE INTO marf_squash_block_heights (block_hash, height) VALUES (?1, ?2)", + )?; + for (h, bh, rh) in block_info { + stmt.execute(params![*h as i64, rh.as_bytes().to_vec()])?; + stmt_bh.execute(params![bh.to_string(), *h as i64])?; + } + info!( + "Squash: wrote {} root hashes and block heights in {}", + block_info.len(), + fmt_duration(start.elapsed()) + ); + Ok(()) +} + +/// Post-commit: persist `squash_root_node_hash` and share blob offsets. +fn finalize_shared_blob_offsets( + dst: &mut MARF, + block_at_height: &T, + squash_root_node_hash: &TrieHash, +) -> Result { + // Persist squash_root_node_hash to SQL. + { + let conn = dst.sqlite_conn(); + conn.execute_batch("BEGIN IMMEDIATE") + .map_err(|e| Error::CorruptionError(format!("BEGIN squash_root_node_hash: {e}")))?; + trie_sql::update_squash_root_node_hash(conn, squash_root_node_hash)?; + conn.execute_batch("COMMIT") + .map_err(|e| Error::CorruptionError(format!("COMMIT squash_root_node_hash: {e}")))?; + } + + // Bulk-update placeholders to share the tip block's blob offset. + let start = Instant::now(); + let conn = dst.sqlite_conn(); + let bh_id = trie_sql::get_block_identifier(conn, block_at_height)?; + let (offset, length) = trie_sql::get_external_trie_offset_length(conn, bh_id)?; + + conn.execute_batch("BEGIN IMMEDIATE") + .map_err(|e| Error::CorruptionError(format!("BEGIN: {e}")))?; + let updated = trie_sql::bulk_update_blob_offsets(conn, offset, length, block_at_height)?; + conn.execute_batch("COMMIT") + .map_err(|e| Error::CorruptionError(format!("COMMIT: {e}")))?; + info!( + "Squash: updated {} placeholder blob offsets in {}", + updated, + fmt_duration(start.elapsed()) + ); + Ok(updated) +} + +impl MARF { + /// Squash the MARF at `height` into a new database at `dst_path`. + /// + /// Produces a hash-preserving squash: the squashed MARF contains a single + /// shared trie storage with all trie nodes reachable at `height`. Each historical + /// block (0..=height) has a `marf_data` row pointing at this shared trie storage so + /// that `get_block_hash_caching(local_id)` returns the correct original + /// `StacksBlockId`. + /// + /// Backpointer identity is preserved via `TriePtr.back_block` annotations. + /// Children that were backpointers in the archival MARF are stored inline in + /// the blob but with `back_block` set to the squashed DB's local_id for the + /// original block. When the squashed MARF is extended to height H+1, + /// `node_copy_update_ptrs` preserves these annotations, ensuring + /// that `inner_write_children_hashes` uses the same `StacksBlockId` values + /// as the archival MARF. This guarantees identical per-block root hashes. + pub fn squash_to_path( + src_path: &str, + dst_path: &str, + open_opts: MARFOpenOpts, + height: u32, + label: &str, + ) -> Result { + if open_opts.compress { + return Err(Error::CorruptionError( + "squash_to_path does not support compress=true; \ + the direct blob write path only emits uncompressed nodes" + .to_string(), + )); + } + + let overall_start = Instant::now(); + + // Step 1: bulk SQL block map + let src_storage = TrieFileStorage::open_readonly(src_path, open_opts.clone())?; + let mut src = MARF::from_storage(src_storage); + + let tip = trie_sql::get_latest_confirmed_block_hash::(src.sqlite_conn())?; + let block_at_height = src + .get_block_at_height(height, &tip)? + .ok_or(Error::NotFoundError)?; + + let start = Instant::now(); + let block_map = collect_block_map(&src)?; + info!( + "[{label}] [1/8] Load block map: {} entries in {}", + block_map.len(), + fmt_duration(start.elapsed()) + ); + + // [2/8] Build height index + info!( + "[{label}] [2/8] Build height index: reading {} heights...", + height + 1 + ); + let mut blob_reader = BlobReader::new(src_path, open_opts.external_blobs)?; + let block_info = collect_per_height_metadata( + &mut src, + &tip, + &block_map, + &mut blob_reader, + height, + label, + )?; + + // [3/8] Collect trie nodes (DFS walk) + // + // Derive the temp directory from dst_path: use the parent directory. + let tmp_dir = std::path::Path::new(dst_path) + .parent() + .filter(|p| !p.as_os_str().is_empty()) + .and_then(|p| p.to_str()) + .unwrap_or("."); + log_memory_snapshot("before trie DFS"); + info!("[{label}] [3/8] Collect trie nodes: starting DFS..."); + let start = Instant::now(); + let (mut node_store, source_to_idx) = src.with_conn(|conn| { + MARF::::collect_reachable_nodes(conn, &block_at_height, tmp_dir) + })?; + let node_count = node_store.len() as u64; + info!( + "[{label}] [3/8] Collect trie nodes: {node_count} nodes in {}", + fmt_duration(start.elapsed()) + ); + log_memory_snapshot("after trie DFS"); + + let mut dst_open_opts = open_opts.clone(); + dst_open_opts.external_blobs = true; + + // Open destination MARF and begin transaction + let mut dst = MARF::from_path(dst_path, dst_open_opts.clone())?; + let mut tx = dst.begin_tx()?; + tx.begin(&T::sentinel(), &block_at_height)?; + + // [4/8] Register placeholder blocks + let mut archival_to_squashed = insert_placeholder_blocks( + tx.sqlite_tx(), + &block_info, + &block_at_height, + &block_map, + label, + )?; + + // Build `block_id_map`: every archival `block_id` that appears + // as a node origin in the DFS must be mappable. insert_placeholder_blocks + // covers heights 0..H-1 but skips block_at_height and sentinel. + // Add them explicitly so `remap_child_ptrs` can resolve all children. + // + // Sentinel: flushed to marf_data by tx.begin() -> flush(). + let sentinel = T::sentinel(); + if let Some((archival_sentinel_id, _)) = block_map.get(&sentinel) { + let squashed_sentinel_id: u32 = tx.sqlite_tx().query_row( + "SELECT block_id FROM marf_data WHERE block_hash = ?1", + rusqlite::params![sentinel.to_string()], + |row| row.get(0), + )?; + archival_to_squashed.insert(*archival_sentinel_id, squashed_sentinel_id); + } + + // block_at_height: not yet in the destination `marf_data` (only in + // `block_extension_locks`). Insert an empty placeholder now to get a + // real `block_id`. Step [7/8] will UPDATE this row instead of + // inserting a new one via `update_external_trie_blob`. + let squashed_tip_placeholder_id = { + let (archival_tip_id, _) = block_map + .get(&block_at_height) + .ok_or(Error::NotFoundError)?; + let empty_blob: &[u8] = &[]; + let placeholder_id = tx + .sqlite_tx() + .prepare(PLACEHOLDER_INSERT_SQL)? + .insert(params![block_at_height.to_string(), empty_blob, 0i64, 0i64])? + .try_into() + .expect("block_id overflow"); + archival_to_squashed.insert(*archival_tip_id, placeholder_id); + placeholder_id + }; + drop(block_map); + + // [5/8] Remap trie pointers (disk-backed) + log_memory_snapshot("before pointer remap"); + info!("[{label}] [5/8] Remap trie pointers: {node_count} nodes..."); + let start = Instant::now(); + remap_child_ptrs( + &mut node_store, + &source_to_idx, + Some(&archival_to_squashed), + label, + )?; + info!( + "[{label}] [5/8] Remap trie pointers: {node_count} nodes in {}", + fmt_duration(start.elapsed()) + ); + drop(source_to_idx); + drop(archival_to_squashed); + node_store.drop_block_ids(); // free ~200 MB + log_memory_snapshot("after pointer remap"); + + // [6/8] Recompute node hashes (disk-backed) + log_memory_snapshot("before hash recompute"); + info!("[{label}] [6/8] Recompute node hashes: {node_count} nodes..."); + let start = Instant::now(); + recompute_content_hashes(&mut node_store)?; + info!( + "[{label}] [6/8] Recompute node hashes: {node_count} nodes in {}", + fmt_duration(start.elapsed()) + ); + log_memory_snapshot("after hash recompute"); + + let squash_root_node_hash = if node_store.len() > 0 { + node_store.hash(0) + } else { + return Err(Error::CorruptionError( + "No nodes in squash trie".to_string(), + )); + }; + + // [7/8] Write trie blob (compute offsets + stream to destination) + log_memory_snapshot("before blob write"); + info!("[{label}] [7/8] Write trie blob: {node_count} nodes..."); + let start = Instant::now(); + let parent_hash = T::sentinel(); + + let (blob_offsets, total_blob_size) = compute_blob_offsets(&mut node_store)?; + // Destination squash MARFs always use external blobs. + let block_id = tx.storage.with_trie_blobs(|db, blobs| { + let Some(trie_file) = blobs else { + return Err(Error::CorruptionError( + "squash destination requires external .blobs file but handle is unavailable" + .to_string(), + )); + }; + let offset = trie_sql::get_external_blobs_length(db)?; + trie_file + .seek(SeekFrom::Start(offset)) + .map_err(Error::IOError)?; + // buffer size is 1 MiB, completely arbitrary. + let mut buf_writer = BufWriter::with_capacity(1 << 20, trie_file); + stream_squash_blob( + &mut node_store, + &parent_hash, + &blob_offsets, + &mut buf_writer, + )?; + buf_writer.flush().map_err(Error::IOError)?; + let trie_file = buf_writer.into_inner().map_err(|e| { + Error::IOError(std::io::Error::other(format!( + "failed to flush BufWriter: {e}" + ))) + })?; + trie_file.flush().map_err(Error::IOError)?; + trie_file.sync_data().map_err(Error::IOError)?; + trie_sql::update_external_trie_blob( + db, + &block_at_height, + offset, + total_blob_size, + squashed_tip_placeholder_id, + ) + })?; + info!( + "[{label}] [7/8] Write trie blob: block_id={block_id}, {total_blob_size} bytes in {}", + fmt_duration(start.elapsed()) + ); + drop(blob_offsets); + drop(node_store); // free temp file + metadata + log_memory_snapshot("after blob write"); + + // [8/8] Persist metadata & commit + let step8_start = Instant::now(); + let source_root_hash = block_info + .iter() + .find(|(_, bh, _)| bh == &block_at_height) + .map(|(_, _, rh)| *rh) + .ok_or(Error::NotFoundError)?; + persist_squash_metadata(tx.sqlite_tx(), &block_info, &source_root_hash, height)?; + info!("[{label}] Squash root hash: {squash_root_node_hash}"); + + tx.set_squash_info(Some(SquashInfo { + archival_marf_root_hash: source_root_hash, + squash_root_node_hash, + height, + })); + + // Commit the SQL transaction without flushing TrieRAM (we already wrote the blob directly) + tx.commit_squash()?; + + // Post-commit: share blob offsets across placeholder blocks + finalize_shared_blob_offsets(&mut dst, &block_at_height, &squash_root_node_hash)?; + + info!( + "[{label}] [8/8] Persist metadata & commit: finished in {}", + fmt_duration(step8_start.elapsed()) + ); + + info!( + "[{label}] Squash complete: {node_count} nodes, total time {}", + fmt_duration(overall_start.elapsed()) + ); + + Ok(SquashStats { node_count }) + } + + /// DFS collection pass: gather all trie nodes reachable from `block_hash`. + /// + /// Uses a disk-backed `NodeStore` to avoid holding ~50M full node objects + /// in memory (~20 GB). Only lightweight metadata (hashes, block_ids, + /// file offsets) is kept in RAM (~4 GB). + /// + /// Uses iterative DFS instead of BFS. The DFS stack holds at most + /// `trie_height` frames (~32), each with one node's child pointer list. + /// Total stack memory is ~128 KB, compared to the BFS frontier which + /// could hold millions of entries (~GBs) for wide, hash-distributed tries. + /// + /// Nodes are pushed in DFS preorder (parent before children), which is + /// all the remap and hash-recompute passes require. + /// + /// Returns: + /// - `node_store`: disk-backed node data + in-memory metadata. + /// - `source_to_idx`: `(source_block_id, byte_offset) -> node index` map + /// needed by the remap pass. + fn collect_reachable_nodes( + source: &mut TrieStorageConnection, + block_hash: &T, + tmp_dir: &str, + ) -> Result<(NodeStore, HashMap<(u32, u64), usize>), Error> { + source.open_block(block_hash)?; + let (root_node, root_hash) = Trie::read_root(source)?; + let root_block_id = source.get_cur_block_identifier()?; + + let mut store = NodeStore::new(tmp_dir)?; + let mut source_to_idx: HashMap<(u32, u64), usize> = HashMap::new(); + + let root_disk_ptr = TrieStorageConnection::::root_ptr_disk(); + source_to_idx.insert((root_block_id, root_disk_ptr), 0); + + let root_is_leaf = root_node.is_leaf(); + let root_ptrs: Vec = if root_is_leaf { + vec![] + } else { + root_node.ptrs().to_vec() + }; + store.push(&root_node, root_hash, root_block_id)?; + + // DFS stack frame: holds remaining child pointers for one node. + // Stack depth is bounded by trie height (~32), so total memory is + // ~32 * max_ptrs * sizeof(TriePtr) ≈ 128 KB - negligible. + struct DfsFrame { + origin_block_id: u32, + child_ptrs: Vec, + next_child: usize, + } + + let mut stack: Vec = Vec::new(); + if !root_is_leaf { + stack.push(DfsFrame { + origin_block_id: root_block_id, + child_ptrs: root_ptrs, + next_child: 0, + }); + } + + let dfs_start = Instant::now(); + let mut nodes_collected: u64 = 1; // root already counted + let mut last_log = Instant::now(); + + while !stack.is_empty() { + let stack_depth = stack.len(); + let frame = stack.last_mut().expect("stack is non-empty"); + // Scan this frame's remaining children for the next one to descend into. + let mut descend_frame: Option = None; + + while frame.next_child < frame.child_ptrs.len() { + let ptr = *frame + .child_ptrs + .get(frame.next_child) + .expect("BUG: next_child within bounds"); + frame.next_child += 1; + + if ptr.id() == TrieNodeID::Empty as u8 { + continue; + } + + let (child_block_id, read_ptr) = if is_backptr(ptr.id()) { + (ptr.back_block(), ptr.from_backptr()) + } else { + (frame.origin_block_id, ptr) + }; + + let source_key = (child_block_id, read_ptr.ptr()); + if source_to_idx.contains_key(&source_key) { + continue; + } + + let child_bh = source.get_block_from_local_id(child_block_id)?.clone(); + source.open_block_maybe_id(&child_bh, Some(child_block_id))?; + let (child_node, child_hash) = source.read_nodetype(&read_ptr)?; + + let child_is_leaf = child_node.is_leaf(); + let child_ptrs_vec: Vec = if child_is_leaf { + vec![] + } else { + child_node.ptrs().to_vec() + }; + + source_to_idx.insert(source_key, store.len()); + store.push(&child_node, child_hash, child_block_id)?; + + nodes_collected += 1; + if last_log.elapsed().as_secs() >= 30 || nodes_collected % 1_000_000 == 0 { + info!( + "Trie DFS: {nodes_collected} nodes, stack depth {stack_depth}, {} elapsed", + fmt_duration(dfs_start.elapsed()) + ); + last_log = Instant::now(); + } + + // If internal node, descend into it (push frame and break). + // If leaf, continue scanning siblings. + if !child_is_leaf { + descend_frame = Some(DfsFrame { + origin_block_id: child_block_id, + child_ptrs: child_ptrs_vec, + next_child: 0, + }); + break; + } + } + + match descend_frame { + Some(new_frame) => stack.push(new_frame), + None => { + // All children of this frame processed, backtrack. + stack.pop(); + } + } + } + + store.finish_writing()?; + + info!( + "Trie DFS: {} nodes in {}", + store.len(), + fmt_duration(dfs_start.elapsed()) + ); + + Ok((store, source_to_idx)) + } +} diff --git a/stackslib/src/chainstate/stacks/index/storage.rs b/stackslib/src/chainstate/stacks/index/storage.rs index cab8b0c1ab8..6b282cb4392 100644 --- a/stackslib/src/chainstate/stacks/index/storage.rs +++ b/stackslib/src/chainstate/stacks/index/storage.rs @@ -256,7 +256,7 @@ impl UncommittedState { /// Panics of the UncommittedState is sealed already. pub fn write_nodetype( &mut self, - node_array_ptr: u32, + node_array_ptr: u64, node: &TrieNodeType, hash: TrieHash, ) -> Result<(), Error> { @@ -284,7 +284,7 @@ impl UncommittedState { } /// Get the last pointer (i.e. last slot) of the TrieRAM - pub fn last_ptr(&mut self) -> Result { + pub fn last_ptr(&mut self) -> Result { self.trie_ram_mut().last_ptr() } @@ -379,6 +379,12 @@ impl UncommittedState { /// In-RAM trie storage. /// Used by TrieFileStorage to buffer the next trie being built. +/// +/// Pointers in `TrieRAM` are index-based, not disk-offset-based: +/// `TriePtr::ptr()` is treated as an in-memory node index into `data`, and +/// traversal/indexing paths are intentionally bounded to `u32`. +/// Large `u64` byte offsets are only materialized when serializing this trie +/// to persistent storage (see `dump_consume`/`write_trie_indirect`). #[derive(Clone)] pub struct TrieRAM { data: Vec<(TrieNodeType, TrieHash)>, @@ -626,7 +632,7 @@ impl TrieRAM { f: &mut F, node_data_order: &[u32], node_data: &[(TrieNodeType, TrieHash)], - offsets: &[u32], + offsets: &[u64], parent_hash: &T, ) -> Result<(), Error> { assert_eq!(node_data_order.len(), offsets.len()); @@ -652,7 +658,7 @@ impl TrieRAM { let next_offset = *offsets.get(ix).ok_or_else(|| { Error::CorruptionError("node_data_order.len() != offsets.len()".into()) })?; - f.seek(SeekFrom::Start(next_offset.into()))?; + f.seek(SeekFrom::Start(next_offset))?; } Ok(()) @@ -668,7 +674,7 @@ impl TrieRAM { f: &mut F, node_data_order: &[DumpPtr], node_data: &[(TrieNodeType, TrieHash)], - offsets: &[u32], + offsets: &[u64], parent_hash: &T, ) -> Result<(), Error> { assert_eq!(node_data_order.len(), offsets.len()); @@ -711,7 +717,7 @@ impl TrieRAM { let next_offset = *offsets.get(ix).ok_or_else(|| { Error::CorruptionError("node_data_order.len() != offsets.len()".into()) })?; - f.seek(SeekFrom::Start(u64::from(next_offset)))?; + f.seek(SeekFrom::Start(next_offset))?; } Ok(()) @@ -818,11 +824,11 @@ impl TrieRAM { fn calculate_node_hashes( &mut self, storage_tx: &mut TrieStorageTransaction, - node_ptr: u64, + node_ptr: u32, // in-memory index is always a u32 ) -> Result { let start_time = storage_tx.bench.write_children_hashes_start(); let mut start_node_time = Some(storage_tx.bench.write_children_hashes_same_block_start()); - let (node, node_hash) = self.get_nodetype(node_ptr as u32)?.to_owned(); + let (node, node_hash) = self.get_nodetype(node_ptr)?.to_owned(); if node.is_leaf() { // base case: we already have the hash of the leaf, so return it. Ok(node_hash) @@ -863,7 +869,7 @@ impl TrieRAM { .write_children_hashes_empty_finish(start_time); } else if !is_backptr(ptr.id()) { // hash is the hash of this node's children - let node_hash = self.calculate_node_hashes(storage_tx, ptr.ptr() as u64)?; + let node_hash = self.calculate_node_hashes(storage_tx, ptr.ptr_as_u32()?)?; // count the time taken to store the hash towards the // write_children_hashes_same_benchmark @@ -882,7 +888,7 @@ impl TrieRAM { && ptr.id() != TrieNodeID::Leaf as u8 { // need to store this hash too, since we deferred calculation - self.write_node_hash(ptr.ptr(), node_hash)?; + self.write_node_hash(ptr.ptr_as_u32()?, node_hash)?; } storage_tx @@ -930,63 +936,119 @@ impl TrieRAM { /// Walk through the buffered TrieNodes and dump them to f. /// This consumes this TrieRAM instance. - fn dump_consume(mut self, f: &mut F) -> Result { - // step 1: write out each node in breadth-first order to get their ptr offsets + pub(crate) fn dump_consume(mut self, f: &mut F) -> Result { + // step 1: determine breadth-first node order let mut frontier: VecDeque = VecDeque::new(); - let mut node_data = vec![]; - let mut offsets = vec![]; + let mut forward_ptr_count = 0usize; + // True when a node has forward pointers whose encoding may widen. + let mut has_forward_ptrs = vec![]; let start = TriePtr::new(TrieNodeID::Node256 as u8, 0, 0).ptr(); - frontier.push_back(start); - - // first 32 bytes is reserved for the parent block hash - // next 4 bytes is the local block identifier - let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + frontier.push_back( + u32::try_from(start) + .map_err(|_| Error::CorruptionError("Root pointer exceeds u32::MAX".into()))?, + ); while let Some(pointer) = frontier.pop_front() { let (node, _node_hash) = self.get_nodetype(pointer)?; - // calculate size - let num_written = get_node_byte_len(node); - ptr += num_written as u64; // queue each child + let mut has_fwd = false; if !node.is_leaf() { for ptr in node.ptrs().iter() { if !ptr.is_empty() && !is_backptr(ptr.id) { - frontier.push_back(ptr.ptr()); + let idx = ptr.ptr_as_u32()?; + frontier.push_back(idx); + forward_ptr_count = forward_ptr_count + .checked_add(1) + .ok_or_else(|| Error::OverflowError)?; + has_fwd = true; } } } + has_forward_ptrs.push(has_fwd); node_data.push(pointer); - offsets.push(ptr as u32); } - assert_eq!(offsets.len(), node_data.len()); + // step 2: repeatedly lay out nodes until serialized offsets stabilize + // The first 32 bytes are reserved for the parent block hash, + // and the next 4 bytes for the local block identifier. + let mut end_offset = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + let mut offsets = Vec::with_capacity(node_data.len()); + // Cached byte lengths: nodes without forward pointers have constant + // sizes across passes, so we only recompute nodes with forward ptrs. + let mut byte_lens = node_data + .iter() + .map(|p| { + let (node, _) = self.get_nodetype(*p)?; + u64::try_from(get_node_byte_len(node)).map_err(|_| Error::OverflowError) + }) + .collect::, Error>>()?; + // The first pass replaces in-memory indices with serialized offsets. + // Afterwards, each mutable child pointer can widen from u32 to u64 at most once. + // A pass that changes offsets without introducing any new wide pointers is the final + // settling pass, so `forward_ptr_count + 2` bounds convergence. + let max_layout_passes = forward_ptr_count.saturating_add(2); + let mut converged = false; + for _ in 0..max_layout_passes { + offsets.clear(); + let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + for ((&pointer, &has_fwd), blen) in node_data + .iter() + .zip(has_forward_ptrs.iter()) + .zip(byte_lens.iter_mut()) + { + if has_fwd { + let (node, _) = self.get_nodetype(pointer)?; + *blen = + u64::try_from(get_node_byte_len(node)).map_err(|_| Error::OverflowError)?; + } + ptr += *blen; + offsets.push(ptr); + } + end_offset = ptr; - // step 2: update ptrs in all nodes - let mut i = 0; - for node_data_ptr in node_data.iter() { - let next_node = &mut self - .data - .get_mut(*node_data_ptr as usize) - .ok_or_else(|| Error::CorruptionError("Miscalculated dump_consume pointer".into()))? - .0; - if !next_node.is_leaf() { + let mut changed = false; + let mut i = 0; + for (&node_data_ptr, &has_fwd) in node_data.iter().zip(has_forward_ptrs.iter()) { + if !has_fwd { + continue; + } + let next_node = &mut self + .data + .get_mut(usize::try_from(node_data_ptr).map_err(|_| Error::OverflowError)?) + .ok_or_else(|| { + Error::CorruptionError("Miscalculated dump_consume pointer".into()) + })? + .0; let ptrs = next_node.ptrs_mut(); for ptr in ptrs.iter_mut() { if !ptr.is_empty() && !is_backptr(ptr.id) { - ptr.ptr = *offsets.get(i).ok_or_else(|| { + let next_offset = *offsets.get(i).ok_or_else(|| { Error::CorruptionError("Miscalculated dump_consume offsets".into()) })?; + if ptr.ptr != next_offset { + ptr.ptr = next_offset; + changed = true; + } i += 1; } } } + if !changed { + converged = true; + break; + } + } + if !converged { + return Err(Error::CorruptionError(format!( + "dump_consume layout did not converge after {max_layout_passes} passes" + ))); } - // step 3: write out each node (now that they have the write ptrs) + // step 3: write out each node (now that they have stable write ptrs) TrieRAM::write_trie_indirect( f, &node_data, @@ -995,7 +1057,7 @@ impl TrieRAM { &self.parent, )?; - Ok(ptr) + Ok(end_offset) } fn make_node_patch( @@ -1078,23 +1140,23 @@ impl TrieRAM { /// /// Returns Ok(len) to report number of bytes written /// Returns Err(..) if we fail to write - fn dump_compressed_consume( + pub(crate) fn dump_compressed_consume( mut self, storage_tx: &mut TrieStorageTransaction, f: &mut F, ) -> Result { - // step 1: write out each node in breadth-first order to get their ptr offsets + // step 1: determine breadth-first node order and any patch payloads let mut frontier: VecDeque = VecDeque::new(); let mut node_data = vec![]; - let mut offsets = vec![]; + let mut forward_ptr_count = 0usize; + let mut has_forward_ptrs = vec![]; let start = TriePtr::new(TrieNodeID::Node256 as u8, 0, 0).ptr(); - frontier.push_back(start); - - // first 32 bytes is reserved for the parent block hash - // next 4 bytes is the local block identifier - let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + frontier.push_back( + u32::try_from(start) + .map_err(|_| Error::CorruptionError("Root pointer exceeds u32::MAX".into()))?, + ); while let Some(pointer) = frontier.pop_front() { let (node, node_hash) = self.get_nodetype(pointer)?; @@ -1146,97 +1208,160 @@ impl TrieRAM { None }; - // calculate size if let Some((_, patch_node)) = patch_node_opt.as_ref() { - // IMPROVEMENT: don't store a copy of a node that was copied forward via - // MARF::walk_cow(). Instead, store only the new ptrs in the copied node, and store - // a pointer to the original node in the ancestral trie. - // TRIEHASH_ENCODED_SIZE accounts for the trie hash bytes written before the patch - trace!( - "Patch node {:?} for {:?} to be written at {}", - &patch_node, - &node, - ptr - ); - let num_written = TRIEHASH_ENCODED_SIZE + patch_node.size(); - ptr += num_written as u64; - - let mut num_new_nodes = 0; - if !node.is_leaf() { - for ptr in node.ptrs().iter() { - if !ptr.is_empty() && !is_backptr(ptr.id) { - num_new_nodes += 1; - } - } - } - assert_eq!(num_new_nodes, patch_node.ptr_diff.len()); - } else { - // IMPROVEMENT: don't store backptr block ID if it's 0 - trace!("Normal node {:?} to be written at {}", &node, ptr); - let num_written = get_node_byte_len_compressed(node); - ptr += num_written as u64; + // The BFS frontier and the convergence loop must visit the + // exact same forward children in the same order. Compare the + // chr() sequence of forward pointers in the full node against + // the patch diff to guarantee this. + let node_forward = node + .ptrs() + .iter() + .filter(|p| !p.is_empty() && !is_backptr(p.id)) + .map(|p| p.chr()); + let diff_forward = patch_node + .ptr_diff + .iter() + .filter(|p| !p.is_empty() && !is_backptr(p.id)) + .map(|p| p.chr()); + assert!(node_forward.eq(diff_forward)); } // queue each child + let mut has_fwd = false; if !node.is_leaf() { for ptr in node.ptrs().iter() { if !ptr.is_empty() && !is_backptr(ptr.id) { - frontier.push_back(ptr.ptr()); + let idx = u32::try_from(ptr.ptr()).map_err(|_| { + Error::CorruptionError(format!( + "In-memory node index {} exceeds u32::MAX", + ptr.ptr() + )) + })?; + frontier.push_back(idx); + forward_ptr_count = forward_ptr_count + .checked_add(1) + .ok_or_else(|| Error::OverflowError)?; + has_fwd = true; } } } + // Nodes with forward ptrs need re-measurement each layout + // pass because child offsets can widen from u32 to u64. if let Some((hash_bytes, patch)) = patch_node_opt.take() { node_data.push(DumpPtr::Patch(pointer, hash_bytes, patch)); } else { node_data.push(DumpPtr::Normal(pointer)); } - offsets.push(ptr as u32); + has_forward_ptrs.push(has_fwd); } - assert_eq!(offsets.len(), node_data.len()); + // step 2: repeatedly lay out nodes until serialized offsets stabilize + // The first 32 bytes are reserved for the parent block hash, + // and the next 4 bytes for the local block identifier. + let mut end_offset = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + let mut offsets = vec![]; + // Cached byte lengths: leaf / pointer-free / patch node sizes are + // constant across passes, so we only recompute non-leaf nodes. + let mut byte_lens = node_data + .iter() + .map(|dp| { + let byte_len = if let Some(patch) = dp.patch() { + TRIEHASH_ENCODED_SIZE + patch.size() + } else { + let (node, _) = self.get_nodetype(dp.ptr())?; + get_node_byte_len_compressed(node) + }; + u64::try_from(byte_len).map_err(|_| Error::OverflowError) + }) + .collect::, Error>>()?; + // The first pass replaces in-memory indices with serialized offsets. + // Afterwards, each mutable child pointer can widen from u32 to u64 at most once. + // A pass that changes offsets without introducing any new wide pointers is the final + // settling pass, so `forward_ptr_count + 2` bounds convergence. + let max_layout_passes = forward_ptr_count.saturating_add(2); + let mut converged = false; + for _pass in 0..max_layout_passes { + offsets.clear(); + let mut ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; + for (node_data_ptr, (&has_fwd, blen)) in node_data + .iter() + .zip(has_forward_ptrs.iter().zip(byte_lens.iter_mut())) + { + if has_fwd { + let new_len = if let Some(patch) = node_data_ptr.patch() { + TRIEHASH_ENCODED_SIZE + patch.size() + } else { + let (node, _) = self.get_nodetype(node_data_ptr.ptr())?; + get_node_byte_len_compressed(node) + }; + *blen = u64::try_from(new_len).map_err(|_| Error::OverflowError)?; + } + ptr += *blen; + offsets.push(ptr); + } + end_offset = ptr; - // step 2: update ptrs in all nodes - let mut i = 0; - for node_data_ptr in node_data.iter_mut() { - if let Some(patch) = node_data_ptr.patch_mut() { - for ptr in patch.ptr_diff.iter_mut() { - if !ptr.is_empty() && !is_backptr(ptr.id) { - ptr.ptr = *offsets.get(i).ok_or_else(|| { - Error::CorruptionError( - "Miscalculated dump_compressed_consume offsets".into(), - ) - })?; - i += 1; + let mut changed = false; + let mut i = 0; + for (node_data_ptr, &has_fwd) in node_data.iter_mut().zip(has_forward_ptrs.iter()) { + if let Some(patch) = node_data_ptr.patch_mut() { + for ptr in patch.ptr_diff.iter_mut() { + if !ptr.is_empty() && !is_backptr(ptr.id) { + let next_offset = *offsets.get(i).ok_or_else(|| { + Error::CorruptionError( + "Miscalculated dump_compressed_consume offsets".into(), + ) + })?; + if ptr.ptr != next_offset { + ptr.ptr = next_offset; + changed = true; + } + i += 1; + } } - } - } else { - let next_node = &mut self - .data - .get_mut(node_data_ptr.ptr() as usize) - .ok_or_else(|| { - Error::CorruptionError( - "Miscalculated dump_compressed_consume pointer".into(), + } else if has_fwd { + let next_node = &mut self + .data + .get_mut( + usize::try_from(node_data_ptr.ptr()) + .map_err(|_| Error::OverflowError)?, ) - })? - .0; - if !next_node.is_leaf() { + .ok_or_else(|| { + Error::CorruptionError( + "Miscalculated dump_compressed_consume pointer".into(), + ) + })? + .0; let ptrs = next_node.ptrs_mut(); for ptr in ptrs.iter_mut() { if !ptr.is_empty() && !is_backptr(ptr.id) { - ptr.ptr = *offsets.get(i).ok_or_else(|| { + let next_offset = *offsets.get(i).ok_or_else(|| { Error::CorruptionError( "Miscalculated dump_compressed_consume offsets".into(), ) })?; + if ptr.ptr != next_offset { + ptr.ptr = next_offset; + changed = true; + } i += 1; } } } } + if !changed { + converged = true; + break; + } + } + if !converged { + return Err(Error::CorruptionError(format!( + "dump_compressed_consume layout did not converge after {max_layout_passes} passes" + ))); } - // step 3: write out each node (now that they have the write ptrs) + // step 3: write out each node (now that they have stable write ptrs) TrieRAM::write_trie_indirect_compressed( f, &node_data, @@ -1245,14 +1370,14 @@ impl TrieRAM { &self.parent, )?; - Ok(ptr) + Ok(end_offset) } /// load the trie from F. /// The trie will have the same structure as the on-disk trie, but it may have nodes in a /// different order. pub fn load(f: &mut F, bhh: &T) -> Result, Error> { - let mut data = vec![]; + let mut data: Vec<(TrieNodeType, TrieHash)> = vec![]; let mut frontier = VecDeque::new(); // read parent @@ -1262,7 +1387,7 @@ impl TrieRAM { let root_disk_ptr = BLOCK_HEADER_HASH_ENCODED_SIZE as u64 + 4; - let root_ptr = TriePtr::new(TrieNodeID::Node256 as u8, 0, root_disk_ptr as u32); + let root_ptr = TriePtr::new(TrieNodeID::Node256 as u8, 0, root_disk_ptr); let (mut root_node, root_hash) = read_nodetype(f, &root_ptr) .inspect_err(|e| error!("Failed to read root node info for {bhh:?}: {e:?}"))?; @@ -1343,7 +1468,8 @@ impl TrieRAM { /// Read a node's hash from the TrieRAM. ptr.ptr() is an array index. pub fn read_node_hash(&self, ptr: &TriePtr) -> Result { - let (_, node_trie_hash) = self.data.get(ptr.ptr() as usize).ok_or_else(|| { + let idx = ptr.ptr_as_usize()?; + let (_, node_trie_hash) = self.data.get(idx).ok_or_else(|| { error!( "TrieRAM: Failed to read node bytes: {} >= {}", ptr.ptr(), @@ -1388,7 +1514,8 @@ impl TrieRAM { self.read_node_count += 1; } - if let Some(node) = self.data.get(ptr.ptr() as usize) { + let idx = ptr.ptr_as_usize()?; + if let Some(node) = self.data.get(idx) { Ok(node.clone()) } else { error!( @@ -1405,7 +1532,7 @@ impl TrieRAM { /// Store a node and its hash to the TrieRAM at the given slot. pub fn write_nodetype( &mut self, - node_array_ptr: u32, + node_array_ptr: u64, node: &TrieNodeType, hash: TrieHash, ) -> Result<(), Error> { @@ -1432,10 +1559,13 @@ impl TrieRAM { } } - if let Some(existing_node) = self.data.get_mut(node_array_ptr as usize) { + let node_index = usize::try_from(node_array_ptr).map_err(|_| Error::NotFoundError)?; + if let Some(existing_node) = self.data.get_mut(node_index) { *existing_node = (node.clone(), hash); Ok(()) - } else if node_array_ptr == (self.data.len() as u32) { + } else if node_array_ptr + == u64::try_from(self.data.len()).map_err(|_| Error::OverflowError)? + { self.data.push((node.clone(), hash)); self.total_bytes += get_node_byte_len(node); Ok(()) @@ -1460,7 +1590,8 @@ impl TrieRAM { ); // can only set the hash of an existing node - if let Some(existing_node) = self.data.get_mut(node_array_ptr as usize) { + let node_index = usize::try_from(node_array_ptr).map_err(|_| Error::NotFoundError)?; + if let Some(existing_node) = self.data.get_mut(node_index) { existing_node.1 = hash; Ok(()) } else { @@ -1470,8 +1601,8 @@ impl TrieRAM { } /// Get the next ptr value for a node to store. - pub fn last_ptr(&mut self) -> Result { - Ok(self.data.len() as u32) + pub fn last_ptr(&mut self) -> Result { + u64::try_from(self.data.len()).map_err(|_| Error::OverflowError) } #[cfg(test)] @@ -1489,7 +1620,8 @@ impl TrieRAM { impl NodeHashReader for TrieRAM { fn read_node_hash_bytes(&mut self, ptr: &TriePtr, w: &mut W) -> Result<(), Error> { - let (_, node_trie_hash) = self.data.get(ptr.ptr() as usize).ok_or_else(|| { + let idx = ptr.ptr_as_usize()?; + let (_, node_trie_hash) = self.data.get(idx).ok_or_else(|| { error!( "TrieRAM: Failed to read node bytes: {} >= {}", ptr.ptr(), @@ -1634,6 +1766,26 @@ pub struct TrieStorageTransientData { /// Does this trie represent unconfirmed state? unconfirmed: bool, + + /// Snapshot metadata if this MARF is squashed. + squash_info: Option, +} + +/// Snapshot metadata cached at open time for squashed MARFs. +/// +/// Contains the archival root hash, squash root node hash, height, and +/// block hash at which the MARF was squashed. This is populated once +/// when the MARF is opened and used by the ancestor-hash computation to +/// avoid opening pruned historical blocks. +#[derive(Clone, Debug)] +pub struct SquashInfo { + /// Archival MARF root hash committed to the chain at the squash height. + pub archival_marf_root_hash: TrieHash, + /// Root node hash of the squash trie. i.e. `hash(consensus_bytes(root) || children_content_hashes)` + /// `TrieHash::from_data(&[])` if not yet computed. + pub squash_root_node_hash: TrieHash, + /// Height at which the MARF was squashed. + pub height: u32, } // disk-backed Trie. @@ -1679,6 +1831,10 @@ impl TrieStorageTransientData { fn clear_block_id(&mut self) { self.cur_block_id = None; } + + fn set_squash_info(&mut self, squash_info: Option) { + self.squash_info = squash_info; + } } pub struct ReopenedTrieStorageConnection<'a, T: MarfTrieId> { @@ -1728,6 +1884,33 @@ impl<'a, T: MarfTrieId> ReopenedTrieStorageConnection<'a, T> { } impl TrieFileStorage { + /// Detect whether this MARF was produced by a squash operation and, if + /// so, cache the squash metadata [`SquashInfo`]. + /// + /// The metadata is read from the `marf_squash_info` SQL table + fn init_squash_info(&mut self) -> Result<(), Error> { + let squash_info = match trie_sql::read_squash_info(&self.db)? { + Some((archival_marf_root_hash, squash_root_node_hash_opt, height)) => { + Some(SquashInfo { + archival_marf_root_hash, + // While creating a squash, this may still be empty. + squash_root_node_hash: squash_root_node_hash_opt + .unwrap_or_else(|| TrieHash::from_data(&[])), + height, + }) + } + None => None, + }; + + self.data.set_squash_info(squash_info); + Ok(()) + } + + /// Returns cached squashing metadata, if present. + pub fn squash_info(&self) -> Option<&SquashInfo> { + self.data.squash_info.as_ref() + } + pub fn connection(&mut self) -> TrieStorageConnection<'_, T> { TrieStorageConnection { db: SqliteConnection::ConnRef(&self.db), @@ -1768,6 +1951,8 @@ impl TrieFileStorage { readonly: true, unconfirmed: self.unconfirmed(), + + squash_info: self.data.squash_info.clone(), }; // perf note: should we attempt to clone the cache let cache = TrieCache::default(); @@ -1881,17 +2066,21 @@ impl TrieFileStorage { None }; - let prev_schema_version = trie_sql::migrate_tables_if_needed::(&mut db)?; - if prev_schema_version != trie_sql::SQL_MARF_SCHEMA_VERSION || marf_opts.force_db_migrate { - if let Some(blobs) = blobs.as_mut() { - if TrieFile::exists(&db_path)? { - // migrate blobs out of the old DB - blobs.export_trie_blobs::(&db, &db_path)?; + let prev_schema_version = trie_sql::migrate_tables_if_needed::(&mut db, readonly)?; + if !readonly { + if prev_schema_version != trie_sql::SQL_MARF_SCHEMA_VERSION + || marf_opts.force_db_migrate + { + if let Some(blobs) = blobs.as_mut() { + if TrieFile::exists(&db_path)? { + // migrate blobs out of the old DB + blobs.export_trie_blobs::(&db, &db_path)?; + } } } - } - if trie_sql::detect_partial_migration(&db)? { - panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis."); + if trie_sql::detect_partial_migration(&db)? { + panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis."); + } } debug!( @@ -1902,7 +2091,7 @@ impl TrieFileStorage { let cache = TrieCache::new(&marf_opts.cache_strategy); - let ret = TrieFileStorage { + let mut ret = TrieFileStorage { db_path, db, cache, @@ -1929,6 +2118,8 @@ impl TrieFileStorage { readonly, unconfirmed, + + squash_info: None, }, // used in testing in order to short-circuit block-height lookups @@ -1937,6 +2128,7 @@ impl TrieFileStorage { test_genesis_block: None, }; + ret.init_squash_info()?; Ok(ret) } @@ -1992,7 +2184,7 @@ impl TrieFileStorage { trace!("Make read-only view of TrieFileStorage: {}", &self.db_path); // TODO: borrow self.uncommitted_writes; don't copy them - let ret = TrieFileStorage { + let mut ret = TrieFileStorage { db_path: self.db_path.clone(), db, blobs, @@ -2019,6 +2211,8 @@ impl TrieFileStorage { readonly: true, unconfirmed: self.unconfirmed(), + + squash_info: self.data.squash_info.clone(), }, // used in testing in order to short-circuit block-height lookups @@ -2027,6 +2221,7 @@ impl TrieFileStorage { test_genesis_block: self.test_genesis_block.clone(), }; + ret.init_squash_info()?; Ok(ret) } @@ -2062,7 +2257,7 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { let cache = TrieCache::default(); // TODO: borrow self.uncommitted_writes; don't copy them - let ret = TrieFileStorage { + let mut ret = TrieFileStorage { db_path: self.db_path.to_string(), db, blobs, @@ -2089,6 +2284,8 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { readonly: true, unconfirmed: self.unconfirmed(), + + squash_info: self.data.squash_info.clone(), }, // used in testing in order to short-circuit block-height lookups @@ -2097,11 +2294,12 @@ impl<'a, T: MarfTrieId> TrieStorageTransaction<'a, T> { test_genesis_block: self.test_genesis_block.clone(), }; + ret.init_squash_info()?; Ok(ret) } /// Run `cls` with a mutable reference to the inner trie blobs opt. - fn with_trie_blobs(&mut self, cls: F) -> R + pub(crate) fn with_trie_blobs(&mut self, cls: F) -> R where F: FnOnce(&Connection, &mut Option<&mut TrieFile>) -> R, { @@ -2433,6 +2631,26 @@ impl TrieStorageConnection<'_, T> { self.data.unconfirmed } + /// Returns true when this storage represents a squashed MARF. + pub fn is_squashed(&self) -> bool { + self.data.squash_info.is_some() + } + + /// Returns cached squashing metadata, if present. + pub fn squash_info(&self) -> Option<&SquashInfo> { + self.data.squash_info.as_ref() + } + + /// Set cached squashing metadata for this storage connection. + pub(crate) fn set_squash_info(&mut self, squash_info: Option) { + self.data.set_squash_info(squash_info); + } + + /// Returns a reference to the underlying SQLite connection. + pub(crate) fn sqlite_conn(&self) -> &Connection { + &self.db + } + pub fn set_cached_ancestor_hashes_bytes(&mut self, bhh: &T, bytes: Vec) { self.data.trie_ancestor_hash_bytes_cache = Some((bhh.clone(), bytes)); } @@ -2442,6 +2660,20 @@ impl TrieStorageConnection<'_, T> { } pub fn get_root_hash_at(&mut self, tip: &T) -> Result { + // In a squashed MARF, blocks within 0..=H share a single blob + // whose root hash is the squash root, not the per-height archival + // root. Use the side-table when available. + if self.data.squash_info.is_some() { + if let Some(h) = trie_sql::read_squash_block_height(self.sqlite_conn(), tip)? { + return trie_sql::read_squash_archival_marf_root_hash(self.sqlite_conn(), h)? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Missing archival root hash at height {h} for block {tip}" + )) + }); + } + } + let cur_block_hash = self.get_cur_block(); self.open_block(tip)?; @@ -2527,10 +2759,46 @@ impl TrieStorageConnection<'_, T> { Ok(ret) } - /// Generate a mapping between Trie root hashes and the blocks that contain them + /// Generate a mapping between Trie root hashes and the blocks that contain them. + /// + /// For squashed MARFs, blocks within the squashed range (0..=H) share a + /// single shared trie storage whose stored trie hash was computed at height H. The + /// standard blob-scanning approach would produce collisions (all blocks + /// get the same trie hash). Instead, for each squashed block at height + /// K we re-derive the trie hash by combining the squash trie's content + /// hash with the archival ancestor hashes at height K from the SQL + /// metadata. This mirrors what the proof verifier computes when it + /// processes a segment proof inside the squash trie and the subsequent + /// initial shunt. #[cfg(test)] pub fn read_root_to_block_table(&mut self) -> Result, Error> { let mut ret = self.inner_read_persisted_root_to_blocks()?; + + // Override entries for blocks in the squashed range. + // All blocks at heights 0..=H share a single squash trie, so + // `inner_read_persisted_root_to_blocks` maps them all to the same + // trie hash. Replace those entries with the per-height archival + // trie hashes stored during squashing. These are the hashes that + // the proof verifier expects (the squash shunt at idx = -1 injects + // the archival trie hash directly). + if let Some(info) = self.data.squash_info.clone() { + for h in 0..=info.height { + let bh: T = match trie_sql::read_squash_block_height_reverse(self.sqlite_conn(), h) + { + Ok(Some(bh)) => bh, + _ => continue, + }; + + let archival_trie_hash = + match trie_sql::read_squash_archival_marf_root_hash(self.sqlite_conn(), h) { + Ok(Some(h)) => h, + _ => continue, + }; + + ret.insert(archival_trie_hash, bh); + } + } + let uncommitted_writes = match self.data.uncommitted_writes.take() { Some((bhh, trie_ram)) => { let ptr = TriePtr::new(set_backptr(TrieNodeID::Node256 as u8), 0, 0); @@ -2749,7 +3017,7 @@ impl TrieStorageConnection<'_, T> { } /// Get the TriePtr::ptr() value for the root node in the currently-open block. - pub fn root_ptr(&self) -> u32 { + pub fn root_ptr(&self) -> u64 { if let Some((ref uncommitted_bhh, _)) = self.data.uncommitted_writes { if &self.data.cur_block == uncommitted_bhh { return 0; @@ -2765,10 +3033,10 @@ impl TrieStorageConnection<'_, T> { } /// Get the TriePtr::ptr() value for a trie's root node if the node is stored to disk. - pub fn root_ptr_disk() -> u32 { + pub fn root_ptr_disk() -> u64 { // first 32 bytes are the block parent hash // next 4 are the identifier - (BLOCK_HEADER_HASH_ENCODED_SIZE as u32) + 4 + (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4 } /// Read a node's children's hashes into the provided implementation. @@ -3161,7 +3429,7 @@ impl TrieStorageConnection<'_, T> { /// If the uncommitted state is not instantiated, then this panics. pub fn write_nodetype( &mut self, - disk_ptr: u32, + disk_ptr: u64, node: &TrieNodeType, hash: TrieHash, ) -> Result<(), Error> { @@ -3201,7 +3469,7 @@ impl TrieStorageConnection<'_, T> { /// Store a node and its hash to uncommitted state. pub fn write_node( &mut self, - ptr: u32, + ptr: u64, node: &N, hash: TrieHash, ) -> Result<(), Error> { @@ -3215,7 +3483,7 @@ impl TrieStorageConnection<'_, T> { /// Get the last slot into which a node will be inserted in the uncommitted state. /// Panics if there is no uncommmitted state instantiated. - pub fn last_ptr(&mut self) -> Result { + pub fn last_ptr(&mut self) -> Result { if let Some((_, ref mut uncommitted_trie)) = self.data.uncommitted_writes { uncommitted_trie.last_ptr() } else { diff --git a/stackslib/src/chainstate/stacks/index/test/file.rs b/stackslib/src/chainstate/stacks/index/test/file.rs index 5a2a1faffd4..0bbb6298e1a 100644 --- a/stackslib/src/chainstate/stacks/index/test/file.rs +++ b/stackslib/src/chainstate/stacks/index/test/file.rs @@ -48,7 +48,7 @@ fn setup_db(test_name: &str) -> Connection { fn test_load_store_trie_blob() { let mut db = setup_db("test_load_store_trie_blob"); let mut blobs = TrieFile::from_db_path(&db_path("test_load_store_trie_blob"), false).unwrap(); - trie_sql::migrate_tables_if_needed::(&mut db).unwrap(); + trie_sql::migrate_tables_if_needed::(&mut db, false).unwrap(); blobs .store_trie_blob::(&db, &BlockHeaderHash([0x01; 32]), &[1, 2, 3, 4, 5]) @@ -74,6 +74,38 @@ fn test_load_store_trie_blob() { assert_eq!(buf, vec![10, 20, 30, 40, 50]); } +#[test] +fn test_migrate_tables_readonly_succeeds_when_current() { + let mut db = setup_db("test_migrate_tables_readonly_ok"); + // First migrate in writable mode to bring schema to current version + trie_sql::migrate_tables_if_needed::(&mut db, false).unwrap(); + // Now a read-only migration check should succeed + let version = trie_sql::migrate_tables_if_needed::(&mut db, true).unwrap(); + assert_eq!(version, trie_sql::SQL_MARF_SCHEMA_VERSION); +} + +#[test] +fn test_migrate_tables_readonly_fails_when_outdated() { + let path = db_path("test_migrate_tables_readonly_fail"); + if fs::metadata(&path).is_ok() { + fs::remove_file(&path).unwrap(); + } + let mut db = sqlite_open( + &path, + OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE, + true, + ) + .unwrap(); + trie_sql::create_tables_if_needed(&mut db).unwrap(); + // Don't migrate - schema is at version 1. + // A read-only open should fail because the schema is outdated. + let err = trie_sql::migrate_tables_if_needed::(&mut db, true).unwrap_err(); + assert!( + matches!(&err, crate::chainstate::stacks::index::Error::CorruptionError(msg) if msg.contains("not compatible with read-only")), + "instead got: {err}" + ); +} + #[test] fn test_migrate_existing_trie_blobs() { let test_file = "/tmp/test_migrate_existing_trie_blobs.sqlite"; @@ -158,3 +190,39 @@ fn test_migrate_existing_trie_blobs() { } } } + +#[test] +fn test_bulk_read_block_entries_rejects_negative_external_offset() { + let mut db = setup_db("test_bulk_read_block_entries_rejects_negative_external_offset"); + trie_sql::migrate_tables_if_needed::(&mut db, false).unwrap(); + + let block_hash = BlockHeaderHash([0x11; 32]); + db.execute( + "INSERT INTO marf_data (block_hash, data, unconfirmed, external_offset, external_length) \ + VALUES (?1, ?2, 0, ?3, ?4)", + rusqlite::params![block_hash.to_string(), Vec::::new(), -1i64, 0i64], + ) + .unwrap(); + + let err = trie_sql::bulk_read_block_entries::(&db).unwrap_err(); + assert!( + matches!(err, crate::chainstate::stacks::index::Error::OverflowError), + "instead got: {err:?}" + ); +} + +#[test] +fn test_update_squash_root_node_hash_requires_existing_row() { + let db = setup_db("test_update_squash_root_node_hash_requires_existing_row"); + let hash = TrieHash::from_data(b"squash-root"); + + let err = trie_sql::update_squash_root_node_hash(&db, &hash).unwrap_err(); + assert!( + matches!( + err, + crate::chainstate::stacks::index::Error::CorruptionError(ref msg) + if msg.contains("no marf_squash_info row exists") + ), + "instead got: {err:?}" + ); +} diff --git a/stackslib/src/chainstate/stacks/index/test/marf.rs b/stackslib/src/chainstate/stacks/index/test/marf.rs index 40fec787972..cc33af3ea75 100644 --- a/stackslib/src/chainstate/stacks/index/test/marf.rs +++ b/stackslib/src/chainstate/stacks/index/test/marf.rs @@ -2209,14 +2209,14 @@ fn assert_metadata_keys_present( } } -/// Create a configurable multi-block MARF for `for_each_leaf` tests. +/// Create a configurable multi-block MARF for tests. /// /// `k1` is updated at every block (exercises backpointers at every depth). /// For each block at height h > 0, inserts `keys_per_block` new keys. /// Also creates: /// - 10 common keys updated at every block /// - 10 common keys updated only on some blocks -fn setup_for_each_leaf_marf( +pub(super) fn setup_marf( path: &str, num_blocks: usize, keys_per_block: usize, @@ -2299,7 +2299,7 @@ fn setup_for_each_leaf_marf( #[test] fn test_for_each_leaf_yields_all_keys() { - let (mut marf, blocks, expected_keys) = setup_for_each_leaf_marf(":memory:", 2, 1); + let (mut marf, blocks, expected_keys) = setup_marf(":memory:", 2, 1); let b1 = blocks[0].clone(); let b2 = blocks[1].clone(); @@ -2352,7 +2352,7 @@ fn test_for_each_leaf_yields_all_keys() { #[test] fn test_for_each_leaf_large_scale_resolves_backpointers_and_values() { - let (mut marf, blocks, expected_keys) = setup_for_each_leaf_marf(":memory:", 300, 150); + let (mut marf, blocks, expected_keys) = setup_marf(":memory:", 300, 150); let block_at_tip = &blocks[299]; @@ -2450,7 +2450,7 @@ fn test_for_each_leaf_single_block() { #[test] fn test_for_each_leaf_at_intermediate_height() { - let (mut marf, blocks, _expected_keys) = setup_for_each_leaf_marf(":memory:", 300, 150); + let (mut marf, blocks, _expected_keys) = setup_marf(":memory:", 300, 150); // Walk at height 4 (blocks[4]), NOT the tip. let block_at_4 = &blocks[4]; @@ -2514,8 +2514,8 @@ fn test_for_each_leaf_at_intermediate_height() { #[test] fn test_for_each_leaf_callback_error_propagates() { - let (mut marf, blocks, _expected_keys) = setup_for_each_leaf_marf(":memory:", 10, 10); - let tip = &blocks[9]; + let (mut marf, blocks, _expected_keys) = setup_marf(":memory:", 2, 1); + let tip = &blocks[1]; let call_count = Cell::new(0u64); let result = marf.with_conn(|conn| { diff --git a/stackslib/src/chainstate/stacks/index/test/mod.rs b/stackslib/src/chainstate/stacks/index/test/mod.rs index 32140f9d1c6..ce5b039a9a6 100644 --- a/stackslib/src/chainstate/stacks/index/test/mod.rs +++ b/stackslib/src/chainstate/stacks/index/test/mod.rs @@ -38,6 +38,7 @@ pub mod marf_perfs; pub mod node; pub mod node_patch; pub mod proofs; +pub mod squash; pub mod storage; pub mod trie; diff --git a/stackslib/src/chainstate/stacks/index/test/node.rs b/stackslib/src/chainstate/stacks/index/test/node.rs index 7a0d15ca1be..42765d79573 100644 --- a/stackslib/src/chainstate/stacks/index/test/node.rs +++ b/stackslib/src/chainstate/stacks/index/test/node.rs @@ -41,7 +41,7 @@ fn trie_node4_to_bytes() { assert!(node4.insert(&TriePtr::new( TrieNodeID::Node16 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let node4_bytes = vec![ @@ -128,7 +128,7 @@ fn trie_node4_to_consensus_bytes() { assert!(node4.insert(&TriePtr::new( TrieNodeID::Node16 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let node4_bytes = vec![ @@ -309,7 +309,7 @@ fn trie_node16_to_bytes() { assert!(node16.insert(&TriePtr::new( TrieNodeID::Node48 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let node16_bytes = vec![ @@ -516,7 +516,7 @@ fn trie_node16_to_consensus_bytes() { assert!(node16.insert(&TriePtr::new( TrieNodeID::Node48 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let node16_bytes = vec![ @@ -1103,7 +1103,7 @@ fn trie_node48_to_bytes() { assert!(node48.insert(&TriePtr::new( TrieNodeID::Node256 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } @@ -1889,7 +1889,7 @@ fn trie_node48_to_consensus_bytes() { assert!(node48.insert(&TriePtr::new( TrieNodeID::Node256 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let node48_bytes = vec![ @@ -3823,7 +3823,7 @@ fn read_write_node4() { assert!(node4.insert(&TriePtr::new( TrieNodeID::Node16 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } let marf_opts = MARFOpenOpts::default(); @@ -3852,7 +3852,7 @@ fn read_write_node16() { assert!(node16.insert(&TriePtr::new( TrieNodeID::Node48 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } @@ -3882,7 +3882,7 @@ fn read_write_node48() { assert!(node48.insert(&TriePtr::new( TrieNodeID::Node256 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } @@ -3912,7 +3912,7 @@ fn read_write_node256() { assert!(node256.insert(&TriePtr::new( TrieNodeID::Node256 as u8, (i + 1) as u8, - (i + 2) as u32 + (i + 2) as u64 ))); } @@ -5044,23 +5044,294 @@ fn trie_cursor_walk_32() { #[test] fn trie_ptr_compressed_size_for_id() { let normal_node_id = TrieNodeID::Node4 as u8; + assert_eq!(6, TriePtr::compressed_size_for_id(normal_node_id)); assert_eq!( - TRIEPTR_SIZE_COMPRESSED, - TriePtr::compressed_size_for_id(normal_node_id) + 10, + TriePtr::compressed_size_for_id(set_u64_ptr(normal_node_id)) ); let backptr_node_id = set_backptr(normal_node_id); + assert_eq!(10, TriePtr::compressed_size_for_id(backptr_node_id)); assert_eq!( - TRIEPTR_SIZE, - TriePtr::compressed_size_for_id(backptr_node_id) + 14, + TriePtr::compressed_size_for_id(set_u64_ptr(backptr_node_id)) ); } #[test] fn trie_ptr_compressed_size() { let normal_node = TriePtr::new(TrieNodeID::Node4 as u8, 0x00, 0); - assert_eq!(TRIEPTR_SIZE_COMPRESSED, normal_node.compressed_size()); + assert_eq!(6, normal_node.compressed_size()); let backptr_node = TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x00, 0, 1); - assert_eq!(TRIEPTR_SIZE, backptr_node.compressed_size()); + assert_eq!(10, backptr_node.compressed_size()); + + let big_node = TriePtr::new(TrieNodeID::Node4 as u8, 0x00, u64::from(u32::MAX) + 1); + assert_eq!(10, big_node.compressed_size()); + + let big_backptr = + TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x00, u64::from(u32::MAX) + 1, 1); + assert_eq!(14, big_backptr.compressed_size()); +} + +#[test] +fn trieptr_uncompressed_roundtrip_boundaries() { + let ptr_values = [ + 0u64, + u64::from(u32::MAX), + u64::from(u32::MAX) + 1, + (1u64 << 40) + 0x1234, + ]; + + for ptr_value in ptr_values { + let ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0x2a, ptr_value); + let mut bytes = vec![]; + ptr.write_bytes(&mut bytes).unwrap(); + + let encoded_id = if ptr_value > u64::from(u32::MAX) { + set_u64_ptr(TrieNodeID::Node16 as u8) + } else { + TrieNodeID::Node16 as u8 + }; + let mut expected = vec![encoded_id, 0x2a]; + if ptr_value > u64::from(u32::MAX) { + expected.extend_from_slice(&ptr_value.to_be_bytes()); + } else { + expected.extend_from_slice(&(ptr_value as u32).to_be_bytes()); + } + expected.extend_from_slice(&0u32.to_be_bytes()); + + assert_eq!(expected, bytes); + assert_eq!(ptr, TriePtr::from_bytes(&bytes)); + } +} + +#[test] +fn trieptr_compressed_roundtrip_non_backptr() { + let ptr_values = [ + 0u64, + u64::from(u32::MAX), + u64::from(u32::MAX) + 1, + (1u64 << 56) - 3, + ]; + + for ptr_value in ptr_values { + let ptr = TriePtr::new(TrieNodeID::Node4 as u8, 0x42, ptr_value); + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + let encoded_id = if ptr_value > u64::from(u32::MAX) { + set_u64_ptr(TrieNodeID::Node4 as u8) + } else { + TrieNodeID::Node4 as u8 + }; + assert_eq!(TriePtr::compressed_size_for_id(encoded_id), bytes.len()); + assert_eq!(set_compressed(encoded_id), bytes[0]); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); + } +} + +#[test] +fn trieptr_compressed_roundtrip_inline_back_block_payload_u32() { + let mut ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0x21, 777); + ptr.back_block = 42; + + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + assert_eq!(10, bytes.len()); + assert_eq!( + set_compressed(set_inline_back_block(TrieNodeID::Node16 as u8)), + bytes[0] + ); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); +} + +#[test] +fn trieptr_compressed_roundtrip_inline_back_block_payload_u64() { + let mut ptr = TriePtr::new(TrieNodeID::Node16 as u8, 0x22, u64::from(u32::MAX) + 9); + ptr.back_block = 314; + + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + assert_eq!(14, bytes.len()); + assert_eq!( + set_compressed(set_inline_back_block(set_u64_ptr(TrieNodeID::Node16 as u8))), + bytes[0] + ); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); +} + +#[test] +fn trieptr_compressed_roundtrip_backptr() { + let ptr = TriePtr::new_backptr( + TrieNodeID::Node48 as u8, + 0x7f, + u64::from(u32::MAX) + 123, + 0x01020304, + ); + + let mut bytes = vec![]; + ptr.write_bytes_compressed(&mut bytes).unwrap(); + + assert_eq!(14, bytes.len()); + assert_eq!( + set_compressed(set_u64_ptr(set_backptr(TrieNodeID::Node48 as u8))), + bytes[0] + ); + assert_eq!(ptr, TriePtr::from_bytes_compressed(&bytes)); + assert_eq!( + ptr, + TriePtr::read_bytes_compressed(&mut Cursor::new(&bytes)).unwrap() + ); +} + +#[test] +fn trieptr_large_offsets_set_u64_bit() { + let ptr = TriePtr::new(TrieNodeID::Node4 as u8, 0x1, u64::from(u32::MAX) + 1); + let mut bytes = vec![]; + ptr.write_bytes(&mut bytes).unwrap(); + assert_eq!(set_u64_ptr(TrieNodeID::Node4 as u8), bytes[0]); + + let mut compressed = vec![]; + ptr.write_bytes_compressed(&mut compressed).unwrap(); + assert_eq!( + set_compressed(set_u64_ptr(TrieNodeID::Node4 as u8)), + compressed[0] + ); +} + +fn decode_node4_ptrs_from_compressed_bytes( + ptrs: &[TriePtr], +) -> (Vec, Vec, Vec, u64, u64) { + let mut node4 = TrieNode4::new(&[]); + for ptr in ptrs.iter() { + if ptr.id() != TrieNodeID::Empty as u8 { + assert!(node4.insert(ptr)); + } + } + let expected_ptrs = node4.ptrs().to_vec(); + + let mut encoded = vec![]; + node4 + .write_bytes_compressed(&mut encoded) + .expect("node4 encode"); + + let mut decoded_ptrs = vec![TriePtr::default(); 4]; + let mut cursor = Cursor::new(encoded.clone()); + let decoded_node_id = + ptrs_from_bytes(encoded[0], &mut cursor, &mut decoded_ptrs).expect("node4 decode"); + + assert_eq!(TrieNodeID::Node4 as u8, decoded_node_id); + let expected_consumed = u64::try_from(get_ptrs_byte_len_compressed( + TrieNodeID::Node4 as u8, + &expected_ptrs, + )) + .expect("infallible"); + ( + encoded, + expected_ptrs, + decoded_ptrs, + cursor.position(), + expected_consumed, + ) +} + +#[test] +fn ptrs_from_bytes_compressed_sparse_mixed_width() { + let sparse_ptrs = [ + TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x10, u64::from(u32::MAX) + 5, 7), + TriePtr::new(TrieNodeID::Empty as u8, 0x00, 0), + TriePtr::new(TrieNodeID::Node16 as u8, 0x30, 12345), + TriePtr::new(TrieNodeID::Empty as u8, 0x00, 0), + ]; + + let (encoded, expected, decoded, cursor_pos, expected_consumed) = + decode_node4_ptrs_from_compressed_bytes(&sparse_ptrs); + assert!(is_compressed(encoded[0])); + assert_eq!( + crate::chainstate::stacks::index::bits::SPARSE_PTR_BITMAP_MARKER, + encoded[1] + ); + assert_eq!(expected, decoded); + assert_eq!(expected_consumed, cursor_pos); +} + +#[test] +fn ptrs_from_bytes_compressed_dense_mixed_width() { + let dense_ptrs = [ + TriePtr::new(TrieNodeID::Node4 as u8, 0x01, 1), + TriePtr::new_backptr(TrieNodeID::Node16 as u8, 0x02, u64::from(u32::MAX) + 2, 9), + TriePtr::new(TrieNodeID::Node48 as u8, 0x03, u64::from(u32::MAX) + 3), + TriePtr::new_backptr(TrieNodeID::Node256 as u8, 0x04, 4, 11), + ]; + + let (encoded, expected, decoded, cursor_pos, expected_consumed) = + decode_node4_ptrs_from_compressed_bytes(&dense_ptrs); + assert!(is_compressed(encoded[0])); + assert_ne!( + crate::chainstate::stacks::index::bits::SPARSE_PTR_BITMAP_MARKER, + encoded[1] + ); + assert_eq!(expected, decoded); + assert_eq!(expected_consumed, cursor_pos); +} + +#[test] +fn test_node_copy_update_ptrs_preserves_nonzero_back_block() { + use crate::chainstate::stacks::index::node::node_copy_update_ptrs; + + // Inline pointer with back_block = 0 (normal archival case) - should be overwritten + let mut ptrs = [TriePtr::new(TrieNodeID::Node4 as u8, 0x10, 100)]; + assert_eq!(ptrs[0].back_block, 0); + node_copy_update_ptrs(&mut ptrs, 42); + assert!(is_backptr(ptrs[0].id())); + assert_eq!(ptrs[0].back_block, 42); + assert_eq!(ptrs[0].chr(), 0x10); + assert_eq!(ptrs[0].ptr(), 100); + + // Inline pointer with back_block != 0 (squash annotation) - should be preserved + let mut ptrs = [TriePtr { + id: TrieNodeID::Node4 as u8, + chr: 0x20, + ptr: 200, + back_block: 99, + }]; + node_copy_update_ptrs(&mut ptrs, 42); + assert!(is_backptr(ptrs[0].id())); + assert_eq!( + ptrs[0].back_block, 99, + "squash annotation must be preserved" + ); + assert_eq!(ptrs[0].chr(), 0x20); + assert_eq!(ptrs[0].ptr(), 200); + + // Empty pointer - should be untouched + let mut ptrs = [TriePtr::default()]; + node_copy_update_ptrs(&mut ptrs, 42); + assert_eq!(ptrs[0], TriePtr::default()); + + // Already a backptr - should be skipped entirely + let orig = TriePtr { + id: set_backptr(TrieNodeID::Node16 as u8), + chr: 0x30, + ptr: 300, + back_block: 7, + }; + let mut ptrs = [orig]; + node_copy_update_ptrs(&mut ptrs, 42); + assert_eq!(ptrs[0], orig, "existing backptr must not be touched"); } diff --git a/stackslib/src/chainstate/stacks/index/test/node_patch.rs b/stackslib/src/chainstate/stacks/index/test/node_patch.rs index f81af57b5cb..49f06ca8983 100644 --- a/stackslib/src/chainstate/stacks/index/test/node_patch.rs +++ b/stackslib/src/chainstate/stacks/index/test/node_patch.rs @@ -139,6 +139,147 @@ fn trie_node_patch_deserialize_ok_with_ptr_diffs_len_1() { assert_eq!(expected, patch_node); } +#[test] +fn trie_node_patch_u64_ptr_roundtrip_ok() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new(1, 10, u64::from(u32::MAX) + 7), + ptr_diff: vec![TriePtr::new(1, 20, u64::from(u32::MAX) + 11)], + }; + + let mut buffer = Cursor::new(Vec::new()); + patch_node + .consensus_serialize(&mut buffer) + .expect("u64 ptr serialization should be ok"); + + let decoded = TrieNodePatch::consensus_deserialize(&mut Cursor::new(buffer.into_inner())) + .expect("u64 ptr deserialization should be ok"); + assert_eq!(patch_node, decoded); +} + +#[test] +fn trie_node_patch_apply_node4_preserves_inline_payload_pointer_identity() { + let mut old_node = TrieNode4::new(&[]); + let mut inline_with_payload = TriePtr::new(TrieNodeID::Node16 as u8, 0x10, 1234); + inline_with_payload.back_block = 55; + assert!(old_node.insert(&inline_with_payload)); + + let patch = TrieNodePatch { + ptr: TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x00, 1, 7), + ptr_diff: vec![TriePtr::new(TrieNodeID::Node16 as u8, 0x20, 2345)], + }; + + let patched = patch + .apply_node4(old_node, 8, 99) + .expect("patch application should succeed"); + let patched_ptr = patched + .walk(0x10) + .expect("inline child with payload should still exist"); + assert!(is_backptr(patched_ptr.id())); + assert_eq!(patched_ptr.back_block(), 55); +} + +#[test] +fn trie_node_patch_u64_ptr_serialize_fails_with_ptr_diffs_len_0() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new(TrieNodeID::Node4 as u8, 1, 77), + ptr_diff: vec![], + }; + let mut buffer = Cursor::new(Vec::new()); + let error = patch_node + .consensus_serialize(&mut buffer) + .expect_err("u64 ptr serialization should fail"); + assert!( + matches!(&error, codec_error::SerializeError(msg) if msg.contains("len 0")), + "instead got: {error}" + ); +} + +#[test] +fn trie_node_patch_u64_ptr_serialize_fails_with_ptr_diffs_len_257() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new(TrieNodeID::Node4 as u8, 1, 77), + ptr_diff: vec![TriePtr::new(TrieNodeID::Node4 as u8, 2, 88); 257], + }; + let mut buffer = Cursor::new(Vec::new()); + let error = patch_node + .consensus_serialize(&mut buffer) + .expect_err("u64 ptr serialization should fail"); + assert!( + matches!(&error, codec_error::SerializeError(msg) if msg.contains("len 257")), + "instead got: {error}" + ); +} + +#[test] +fn trie_node_patch_u64_ptr_deserialize_fails_on_truncated_payload() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new_backptr(TrieNodeID::Node4 as u8, 0x01, u64::from(u32::MAX) + 2, 5), + ptr_diff: vec![ + TriePtr::new(TrieNodeID::Node16 as u8, 0x02, u64::from(u32::MAX) + 3), + TriePtr::new_backptr(TrieNodeID::Node16 as u8, 0x03, u64::from(u32::MAX) + 4, 7), + ], + }; + + let mut buffer = Cursor::new(Vec::new()); + patch_node + .consensus_serialize(&mut buffer) + .expect("u64 ptr serialization should be ok"); + let mut payload = buffer.into_inner(); + payload.pop(); + + let error = TrieNodePatch::consensus_deserialize(&mut Cursor::new(payload)) + .expect_err("u64 ptr deserialization should fail on truncated payload"); + assert!( + error.to_string().contains("fill whole buffer"), + "instead got: {error}" + ); +} + +#[test] +fn trie_node_patch_u64_ptr_deserialize_fails_on_malformed_payload() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new(TrieNodeID::Node4 as u8, 1, 2), + ptr_diff: vec![TriePtr::new(TrieNodeID::Node16 as u8, 2, 3)], + }; + let mut buffer = Cursor::new(Vec::new()); + patch_node + .consensus_serialize(&mut buffer) + .expect("u64 ptr serialization should be ok"); + let mut payload = buffer.into_inner(); + + // Corrupt the patch node marker. + payload[0] = TrieNodeID::Leaf as u8; + + let error = TrieNodePatch::consensus_deserialize(&mut Cursor::new(payload)) + .expect_err("u64 ptr deserialization should fail on malformed payload"); + assert!( + error + .to_string() + .contains("Did not read a TrieNodeID::Patch"), + "instead got: {error}" + ); +} + +#[test] +fn trie_node_patch_u64_ptr_roundtrip_mixed_backptrs() { + let patch_node = TrieNodePatch { + ptr: TriePtr::new_backptr(TrieNodeID::Node16 as u8, 0x10, u64::from(u32::MAX) + 55, 42), + ptr_diff: vec![ + TriePtr::new(TrieNodeID::Node4 as u8, 0x11, u64::from(u32::MAX) + 56), + TriePtr::new_backptr(TrieNodeID::Node48 as u8, 0x12, u64::from(u32::MAX) + 57, 43), + TriePtr::new(TrieNodeID::Node256 as u8, 0x13, 19), + ], + }; + + let mut buffer = Cursor::new(Vec::new()); + patch_node + .consensus_serialize(&mut buffer) + .expect("u64 ptr mixed serialization should be ok"); + let decoded = TrieNodePatch::consensus_deserialize(&mut Cursor::new(buffer.into_inner())) + .expect("u64 ptr mixed deserialization should be ok"); + assert_eq!(patch_node, decoded); +} + /// [`TrieNodePatch::make_ptr_diff`] in the following scenario: /// /// ## Input diff --git a/stackslib/src/chainstate/stacks/index/test/squash.rs b/stackslib/src/chainstate/stacks/index/test/squash.rs new file mode 100644 index 00000000000..f1db2beee91 --- /dev/null +++ b/stackslib/src/chainstate/stacks/index/test/squash.rs @@ -0,0 +1,1606 @@ +// Copyright (C) 2026 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::io::{Cursor, Seek}; +use std::path::PathBuf; + +use stacks_common::types::chainstate::{StacksBlockId, TrieHash, BLOCK_HEADER_HASH_ENCODED_SIZE}; +use tempfile::tempdir; + +use super::marf::setup_marf; +use crate::chainstate::stacks::index::bits::get_node_byte_len; +use crate::chainstate::stacks::index::marf::{ + MARFOpenOpts, MarfConnection, SquashStats, MARF, OWN_BLOCK_HEIGHT_KEY, +}; +use crate::chainstate::stacks::index::node::{ + is_u64_ptr, set_backptr, TrieNode as _, TrieNode16, TrieNode256, TrieNode4, TrieNode48, + TrieNodeID, TrieNodeType, TriePtr, +}; +use crate::chainstate::stacks::index::squash::{ + compute_blob_offsets, compute_blob_offsets_inner, deserialize_node, remap_ptrs_to_blob_offsets, + serialize_node, stream_squash_blob, NodeStore, +}; +use crate::chainstate::stacks::index::storage::TrieHashCalculationMode; +use crate::chainstate::stacks::index::{trie_sql, ClarityMarfTrieId, Error, MARFValue, TrieLeaf}; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn squash_helper(src_path: &str, dst_dir: &std::path::Path, height: u32) -> (PathBuf, SquashStats) { + std::fs::create_dir_all(dst_dir).unwrap(); + let dst_db_path = dst_dir.join("index.sqlite"); + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let stats = MARF::::squash_to_path( + src_path, + dst_db_path.to_str().unwrap(), + open_opts, + height, + "test", + ) + .unwrap(); + (dst_db_path, stats) +} + +const STRESS_SQUASH_BLOCKS: usize = 128; +const STRESS_SQUASH_KEYS_PER_BLOCK: usize = 8; +const STRESS_SQUASH_HEIGHT: u32 = 96; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[test] +fn test_squash_to_path_outputs_data() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, stats) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + 1, + ); + + assert!(stats.node_count > 0); + assert!(dst_db_path.exists()); + assert!(PathBuf::from(format!("{}.blobs", dst_db_path.display())).exists()); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut dst = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + let k1 = dst.get(&blocks[1], "k1").unwrap().unwrap(); + assert_eq!(k1, MARFValue::from_value("v1_at_1")); + let own_height = dst.get(&blocks[1], OWN_BLOCK_HEIGHT_KEY).unwrap().unwrap(); + assert_eq!(own_height, MARFValue::from(1u32)); +} + +#[test] +fn test_squash_info_detected_on_open() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let _ = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + let tip = + trie_sql::get_latest_confirmed_block_hash::(squashed.sqlite_conn()).unwrap(); + + // Verify squash metadata was detected from the SQL table on open. + let (is_squashed, info_root, info_height) = squashed + .with_conn(|conn| -> Result<(bool, TrieHash, u32), Error> { + let info = conn.squash_info().expect("missing squash info"); + Ok(( + conn.is_squashed(), + info.archival_marf_root_hash, + info.height, + )) + }) + .unwrap(); + + // Cross-check with the SQL table directly. + let (sql_root, _sql_squash_root, sql_height) = + trie_sql::read_squash_info(squashed.sqlite_conn()) + .unwrap() + .expect("SQL squash info missing"); + + assert!(is_squashed); + assert_eq!(info_root, sql_root); + assert_eq!(info_height, sql_height); + assert_eq!(info_height, 1); +} + +#[test] +fn test_squash_info_absent_on_archival_open() { + let (mut marf, _blocks, _expected_keys) = setup_marf(":memory:", 2, 1); + + let (is_squashed, has_info) = marf + .with_conn(|conn| -> Result<(bool, bool), Error> { + Ok((conn.is_squashed(), conn.squash_info().is_some())) + }) + .unwrap(); + + assert!(!is_squashed); + assert!(!has_info); +} + +#[test] +fn test_squashed_marf_can_extend_past_snapshot_height() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let (_, blocks, _) = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + + let b2 = blocks[1].clone(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + let b4 = StacksBlockId::from_bytes(&[4u8; 32]).unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed.insert("k3", MARFValue::from_value("v4")).unwrap(); + squashed.commit().unwrap(); + + squashed.begin(&b3, &b4).unwrap(); + squashed.insert("k4", MARFValue::from_value("v5")).unwrap(); + squashed.commit().unwrap(); + + let v4 = squashed.get(&b4, "k4").unwrap().unwrap(); + assert_eq!(v4, MARFValue::from_value("v5")); + let own_height = squashed.get(&b4, OWN_BLOCK_HEIGHT_KEY).unwrap().unwrap(); + assert_eq!(own_height, MARFValue::from(3u32)); +} + +/// Verify that `get_root_hash_at` and `get_block_height_of` return correct +/// per-height values for blocks *inside* the squashed range. Without the +/// squash-aware overrides these would return the shared blob's root hash +/// (wrong) and the squash height H (wrong) for every historical block. +#[test] +fn test_squashed_historical_root_hash_and_height() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _) = setup_marf(archival_path.to_str().unwrap(), 5, 1); + + // Collect archival root hashes and heights for blocks inside range. + let archival_roots: Vec = (0..=4) + .map(|i| archival.get_root_hash_at(&blocks[i]).unwrap()) + .collect(); + + // Squash at height 4 (blocks 0..=4 are in the squashed range). + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + 4, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // (a) get_root_hash_at must return the archival per-height root, not + // the shared squash blob root. + for i in 0..=4 { + let sq_root = squashed.get_root_hash_at(&blocks[i]).unwrap(); + assert_eq!( + archival_roots[i], sq_root, + "root hash mismatch at height {i} (inside squashed range)" + ); + } + + // (b) get_block_height_of must return the correct per-block height, + // not the squash height (4) for all of them. + for i in 0..=4usize { + let h = squashed + .get_block_height_of(&blocks[i], &blocks[4]) + .unwrap() + .expect("height should be Some"); + assert_eq!( + h, i as u32, + "height mismatch for block at index {i}: expected {i}, got {h}" + ); + } + + // (c) The archival roots should not all be identical (sanity). + assert_ne!(archival_roots[0], archival_roots[4]); +} + +/// Verify that `test_squash_info_detected_on_open` also asserts the +/// squash_root_node_hash from the SQL table. +#[test] +fn test_squash_info_sql_squash_root_asserted() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let _ = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let (dst_db_path, _) = squash_helper( + src_db_path.to_str().unwrap(), + &dir.path().join("squashed"), + 1, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), open_opts).unwrap(); + + let (_, sql_squash_root, _) = trie_sql::read_squash_info(squashed.sqlite_conn()) + .unwrap() + .expect("SQL squash info missing"); + + let cached_root = squashed + .with_conn(|conn| -> Result { + Ok(conn.squash_info().unwrap().squash_root_node_hash) + }) + .unwrap(); + + // sql_squash_root may be None if not yet computed (squash_to_path sets + // it after blob commit). If present, it must match the cached value. + if let Some(sql_root) = sql_squash_root { + assert_eq!(sql_root, cached_root, "cached vs SQL squash root mismatch"); + } + // Either way, the cached root must not be the zero hash (squash_to_path + // computes and stores it). + assert_ne!( + cached_root, + TrieHash::from_data(&[]), + "squash root node hash should be populated after squash" + ); +} + +#[test] +fn test_large_marf_squash_extend_root_hash_matches_archival() { + // Squash a 10-block MARF at height 8, then extend both the archival + // and squashed MARFs with the same data at heights 9 and 10. + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf(archival_path.to_str().unwrap(), 10, 1); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + 8, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let b_new_9 = StacksBlockId::from_bytes(&[101u8; 32]).unwrap(); + let b_new_10 = StacksBlockId::from_bytes(&[102u8; 32]).unwrap(); + + // --- Extend archival --- + archival.begin(&blocks[8], &b_new_9).unwrap(); + archival + .insert("k_new_9", MARFValue::from_value("val9")) + .unwrap(); + archival.commit().unwrap(); + + archival.begin(&b_new_9, &b_new_10).unwrap(); + archival + .insert("k_new_10", MARFValue::from_value("val10")) + .unwrap(); + archival.commit().unwrap(); + + // --- Extend squashed --- + squashed.begin(&blocks[8], &b_new_9).unwrap(); + squashed + .insert("k_new_9", MARFValue::from_value("val9")) + .unwrap(); + squashed.commit().unwrap(); + + squashed.begin(&b_new_9, &b_new_10).unwrap(); + squashed + .insert("k_new_10", MARFValue::from_value("val10")) + .unwrap(); + squashed.commit().unwrap(); + + // (a) Data inserted at the extended heights is readable. + assert_eq!( + squashed.get(&b_new_9, "k_new_9").unwrap().unwrap(), + MARFValue::from_value("val9") + ); + assert_eq!( + squashed.get(&b_new_10, "k_new_10").unwrap().unwrap(), + MARFValue::from_value("val10") + ); + assert_eq!( + squashed.get(&b_new_10, "k1").unwrap().unwrap(), + MARFValue::from_value("v1_at_8") + ); + + // (b) MARF root hashes at the extended heights must match. + let archival_root_9 = archival.get_root_hash_at(&b_new_9).unwrap(); + let squashed_root_9 = squashed.get_root_hash_at(&b_new_9).unwrap(); + assert_eq!( + archival_root_9, squashed_root_9, + "Root hash mismatch at height 9" + ); + + let archival_root_10 = archival.get_root_hash_at(&b_new_10).unwrap(); + let squashed_root_10 = squashed.get_root_hash_at(&b_new_10).unwrap(); + assert_eq!( + archival_root_10, squashed_root_10, + "Root hash mismatch at height 10" + ); + + assert_ne!(archival_root_9, TrieHash([0u8; 32]), "root at 9 is zero"); + assert_ne!(archival_root_10, TrieHash([0u8; 32]), "root at 10 is zero"); + assert_ne!( + archival_root_9, archival_root_10, + "roots at 9 and 10 should differ" + ); + + let own_h = squashed + .get(&b_new_10, OWN_BLOCK_HEIGHT_KEY) + .unwrap() + .unwrap(); + assert_eq!(own_h, MARFValue::from(10u32)); +} + +/// Squash a larger MARF at a deep height, then extend both MARFs through 10 additional +/// heights and verify hash equality at EVERY extended height. +#[test] +fn test_multi_height_extension_hash_equality() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf( + archival_path.to_str().unwrap(), + STRESS_SQUASH_BLOCKS, + STRESS_SQUASH_KEYS_PER_BLOCK, + ); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + STRESS_SQUASH_HEIGHT, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let mut prev_block = blocks[STRESS_SQUASH_HEIGHT as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + for i in 0..10u8 { + let new_bh = StacksBlockId::from_bytes(&[200 + i; 32]).unwrap(); + let key = format!("ext_k{i}"); + let val = format!("ext_v{i}"); + + archival.begin(&prev_block, &new_bh).unwrap(); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + archival.commit().unwrap(); + + squashed.begin(&prev_block, &new_bh).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + for (i, bh) in new_blocks.iter().enumerate() { + let arch_root = archival.get_root_hash_at(bh).unwrap(); + let sq_root = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!( + arch_root, + sq_root, + "Root hash mismatch at extended height {}", + i + STRESS_SQUASH_HEIGHT as usize + 1 + ); + assert_ne!( + arch_root, + TrieHash([0u8; 32]), + "root at {} is zero", + i + STRESS_SQUASH_HEIGHT as usize + 1 + ); + } + + let last = new_blocks.last().unwrap(); + assert_eq!( + squashed.get(last, "k1").unwrap().unwrap(), + MARFValue::from_value(&format!("v1_at_{STRESS_SQUASH_HEIGHT}")), + ); + assert_eq!( + squashed.get(last, "ext_k9").unwrap().unwrap(), + MARFValue::from_value("ext_v9"), + ); +} + +/// Test that extending a squashed MARF with blocks that write MANY keys +/// per block produces the same root hashes as the archival MARF. +#[test] +fn test_dense_writes_after_squash_hash_equality() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf( + archival_path.to_str().unwrap(), + STRESS_SQUASH_BLOCKS, + STRESS_SQUASH_KEYS_PER_BLOCK, + ); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + STRESS_SQUASH_HEIGHT, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + // Extend with blocks that write MANY keys each - simulating a block + // with many contract calls (like mainnet block 201697 with 42 txs). + let keys_per_extension_block = 200; + let extension_blocks = 20; + + let mut prev_block = blocks[STRESS_SQUASH_HEIGHT as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + + for blk in 0..extension_blocks { + let new_bh = StacksBlockId::from_bytes(&[200 + blk as u8; 32]).unwrap(); + + archival.begin(&prev_block, &new_bh).unwrap(); + squashed.begin(&prev_block, &new_bh).unwrap(); + + for k in 0..keys_per_extension_block { + let key = format!("dense_blk{blk}_k{k}"); + let val = format!("dense_blk{blk}_v{k}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Also overwrite some keys from the archival history to force + // COW copies of deeper trie nodes. + for k in 0..STRESS_SQUASH_KEYS_PER_BLOCK { + let key = format!("k{k}"); + let val = format!("overwritten_blk{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Simulate at-block: read a key from a historical block mid-transaction. + // This exercises the open_block/restore cycle on the squashed blob. + if blk > 0 { + let historical_block = &new_blocks[blk - 1]; + let _arch_val = archival.get(historical_block, "dense_blk0_k0").unwrap(); + let _sq_val = squashed.get(historical_block, "dense_blk0_k0").unwrap(); + assert_eq!(_arch_val, _sq_val, "at-block read mismatch at blk {blk}"); + + // Also read from a pre-squash block + let old_block = &blocks[STRESS_SQUASH_HEIGHT as usize / 2]; + let _arch_val2 = archival.get(old_block, "k0").unwrap(); + let _sq_val2 = squashed.get(old_block, "k0").unwrap(); + assert_eq!( + _arch_val2, _sq_val2, + "at-block pre-squash read mismatch at blk {blk}" + ); + } + + archival.commit().unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + for (i, bh) in new_blocks.iter().enumerate() { + let arch_root = archival.get_root_hash_at(bh).unwrap(); + let sq_root = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!( + arch_root, sq_root, + "Root hash mismatch at dense extension block {} (wrote {} keys + {} overwrites)", + i, keys_per_extension_block, STRESS_SQUASH_KEYS_PER_BLOCK + ); + } +} + +/// Verify that reading a key at a pre-squash block on a squashed MARF +/// returns the squash-tip's value, not the value that existed at that block. +/// This documents the known limitation of the single-blob design: historical +/// reads within the squash range return tip-era values for keys that changed. +#[test] +fn test_squash_historical_read_returns_tip_value() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + // 64 blocks, 4 keys per block, squash at height 48 + let (mut archival, blocks, _) = setup_marf(src_path.to_str().unwrap(), 64, 4); + + let squash_height: u32 = 48; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let tip_block = &blocks[squash_height as usize]; + + // `k1` is written at EVERY block with value `v1_at_{height}`. + // At block 10, the archival should return "v1_at_10". + // The squash should return "v1_at_{squash_height}" because + // all blocks share the squash-tip's blob. + let early_block = &blocks[10]; + + let arch_val = archival.get(early_block, "k1").unwrap(); + let sq_val = squashed.get(early_block, "k1").unwrap(); + let tip_val = squashed.get(tip_block, "k1").unwrap(); + + // Archival correctly returns the value at block 10 + assert_eq!( + arch_val, + Some(MARFValue::from_value("v1_at_10")), + "archival should return the historical value" + ); + + // Squash returns the TIP value, not the block-10 value. + // This is the documented limitation of the single-blob squash. + assert_eq!( + sq_val, tip_val, + "squashed historical read should return tip value, not historical value" + ); + assert_ne!( + sq_val, arch_val, + "squashed historical read should differ from archival for keys that changed" + ); +} + +/// Same as above but for `common_some_*` keys that only change on some blocks. +/// At blocks where the key was NOT updated, the archival returns the last-written +/// value before that block. The squash returns the tip value regardless. +#[test] +fn test_squash_historical_read_intermittent_key() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (mut archival, blocks, _) = setup_marf(src_path.to_str().unwrap(), 64, 4); + + let squash_height: u32 = 48; + let (squashed_path, _) = squash_helper( + src_path.to_str().unwrap(), + &dir.path().join("squashed"), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let tip_block = &blocks[squash_height as usize]; + + // common_some_0 is written at heights where (height + 0) % 3 == 0, + // i.e. heights 0, 3, 6, 9, 12, ... with value "common_some_0_at_{h}". + // Read at block 10 - last write was at height 9. + let early_block = &blocks[10]; + + let arch_val = archival.get(early_block, "common_some_0").unwrap(); + let sq_val = squashed.get(early_block, "common_some_0").unwrap(); + let tip_val = squashed.get(tip_block, "common_some_0").unwrap(); + + // Archival returns the value from the last write at/before height 10 + assert_eq!( + arch_val, + Some(MARFValue::from_value("common_some_0_at_9")), + "archival should return value from height 9" + ); + + // Squash returns the tip value + assert_eq!( + sq_val, tip_val, + "squashed should return tip value for intermittent key" + ); + assert_ne!( + sq_val, arch_val, + "squashed historical read should differ from archival" + ); +} + +/// Extend a squashed MARF through enough blocks to exercise deep backpointer +/// chains and node promotions, then verify hash equality with the archival. +/// Uses 256 blocks, 32 keys/block at squash height 192 (leaving 64 extension blocks). +#[test] +fn test_deep_extension_hash_equality() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let num_blocks: usize = 256; + let keys_per_block: usize = 32; + let squash_height: u32 = 192; + let extension_blocks: usize = 20; + + let (mut archival, blocks, _) = + setup_marf(archival_path.to_str().unwrap(), num_blocks, keys_per_block); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + squash_height, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let mut prev_block = blocks[squash_height as usize].clone(); + let mut new_blocks: Vec = Vec::new(); + + for blk in 0..extension_blocks { + let new_bh = { + let mut bytes = [0u8; 32]; + bytes[0] = 0xee; + bytes[24..28].copy_from_slice(&(blk as u32).to_be_bytes()); + StacksBlockId::from_bytes(&bytes).unwrap() + }; + + archival.begin(&prev_block, &new_bh).unwrap(); + squashed.begin(&prev_block, &new_bh).unwrap(); + + // Write many new keys (forces node promotions in the trie) + for k in 0..(keys_per_block * 4) { + let key = format!("ext_blk{blk}_k{k}"); + let val = format!("ext_blk{blk}_v{k}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Overwrite keys from across the squash range (deep COW walks) + for k in 0..keys_per_block { + let key_index = 2 + (squash_height as usize / 2) * keys_per_block + k; + let key = format!("k{key_index}"); + let val = format!("deep_overwrite_blk{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + // Also overwrite common keys (causes COW of root-adjacent nodes) + for c in 0..10 { + let key = format!("common_all_{c}"); + let val = format!("common_all_{c}_ext_{blk}"); + archival.insert(&key, MARFValue::from_value(&val)).unwrap(); + squashed.insert(&key, MARFValue::from_value(&val)).unwrap(); + } + + archival.commit().unwrap(); + squashed.commit().unwrap(); + + new_blocks.push(new_bh.clone()); + prev_block = new_bh; + } + + for (i, bh) in new_blocks.iter().enumerate() { + let arch_root = archival.get_root_hash_at(bh).unwrap(); + let sq_root = squashed.get_root_hash_at(bh).unwrap(); + assert_eq!( + arch_root, + sq_root, + "Root hash mismatch at deep extension block {i} (256 blocks, \ + 32 keys/block, squash at 192, {} new keys + {} overwrites per ext block)", + keys_per_block * 4, + keys_per_block + 10 + ); + } +} + +/// Verify that all historical marf_data entries share the same +/// external_offset (i.e. point to the single shared trie storage). +#[test] +fn test_marf_data_entries_share_blob_offset() { + let dir = tempdir().unwrap(); + let src_path = dir.path().join("index.sqlite"); + let (_, blocks, _expected_keys) = setup_marf(src_path.to_str().unwrap(), 10, 1); + + let (dst_path, _) = squash_helper(src_path.to_str().unwrap(), &dir.path().join("squashed"), 8); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let squashed = MARF::::from_path(dst_path.to_str().unwrap(), open_opts).unwrap(); + let conn = squashed.sqlite_conn(); + + let tip_id = trie_sql::get_block_identifier(conn, &blocks[8]).unwrap(); + let (tip_offset, tip_length) = trie_sql::get_external_trie_offset_length(conn, tip_id).unwrap(); + assert!(tip_length > 0, "blob length should be non-zero"); + + for i in 0..8 { + let blk_id = trie_sql::get_block_identifier(conn, &blocks[i]).unwrap(); + let (offset, length) = trie_sql::get_external_trie_offset_length(conn, blk_id).unwrap(); + assert_eq!(offset, tip_offset, "block {i} offset mismatch"); + assert_eq!(length, tip_length, "block {i} length mismatch"); + } +} + +/// Verify that walk_cow correctly follows annotated back_block values +/// when copying nodes from a squashed blob into a new block. +#[test] +fn test_walk_cow_preserves_backpointer_identity() { + let dir = tempdir().unwrap(); + let archival_path = dir.path().join("archival.sqlite"); + let (mut archival, blocks, _expected_keys) = setup_marf(archival_path.to_str().unwrap(), 10, 1); + + let (squashed_path, _) = squash_helper( + archival_path.to_str().unwrap(), + &dir.path().join("squashed"), + 8, + ); + + let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + let mut squashed = + MARF::::from_path(squashed_path.to_str().unwrap(), open_opts).unwrap(); + + let b_new = StacksBlockId::from_bytes(&[250u8; 32]).unwrap(); + squashed.begin(&blocks[8], &b_new).unwrap(); + squashed + .insert("k1", MARFValue::from_value("v1_at_10")) + .unwrap(); + squashed + .insert("new_key", MARFValue::from_value("new_val")) + .unwrap(); + squashed.commit().unwrap(); + + for key in ["k2", "k5", "k9"] { + let result = squashed.get(&b_new, &key).unwrap(); + assert!(result.is_some(), "missing key {key} after extend"); + } + + assert_eq!( + squashed.get(&b_new, "k1").unwrap().unwrap(), + MARFValue::from_value("v1_at_10"), + ); + + assert_eq!( + squashed.get(&b_new, "new_key").unwrap().unwrap(), + MARFValue::from_value("new_val"), + ); + + archival.begin(&blocks[8], &b_new).unwrap(); + archival + .insert("k1", MARFValue::from_value("v1_at_10")) + .unwrap(); + archival + .insert("new_key", MARFValue::from_value("new_val")) + .unwrap(); + archival.commit().unwrap(); + + let arch_root = archival.get_root_hash_at(&b_new).unwrap(); + let sq_root = squashed.get_root_hash_at(&b_new).unwrap(); + assert_eq!(arch_root, sq_root, "Root hash mismatch after walk_cow"); +} + +#[test] +fn test_squash_internal_blobs_extend_with_compression() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("sort.sqlite"); + + let squash_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false); + let mut src = + MARF::::from_path(src_db_path.to_str().unwrap(), squash_opts.clone()) + .unwrap(); + + let b1 = StacksBlockId::from_bytes(&[1u8; 32]).unwrap(); + let b2 = StacksBlockId::from_bytes(&[2u8; 32]).unwrap(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + + src.begin(&StacksBlockId::sentinel(), &b1).unwrap(); + for i in 0u8..32 { + src.insert( + &format!("k{i:02}"), + MARFValue::from_value(&format!("v1-{i:02}")), + ) + .unwrap(); + } + src.commit().unwrap(); + + src.begin(&b1, &b2).unwrap(); + for i in 0u8..32 { + src.insert( + &format!("k{i:02}"), + MARFValue::from_value(&format!("v2-{i:02}")), + ) + .unwrap(); + } + src.commit().unwrap(); + drop(src); + + let dst_dir = dir.path().join("squashed-compressed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("sort.sqlite"); + + MARF::::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + squash_opts, + 1, + "test", + ) + .unwrap(); + + let compressed_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true).with_compression(true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), compressed_opts).unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed + .insert("k_extra", MARFValue::from_value("v3-extra")) + .unwrap(); + squashed.commit().unwrap(); + + let value = squashed.get(&b3, "k_extra").unwrap().unwrap(); + assert_eq!(value, MARFValue::from_value("v3-extra")); +} + +// --------------------------------------------------------------------------- +// Targeted unit tests for the disk-backed squash mechanisms +// --------------------------------------------------------------------------- + +/// Helper: build a leaf node for tests. +fn make_test_leaf(path: &[u8], value_byte: u8) -> TrieNodeType { + let mut data = [0u8; 40]; + data[0] = value_byte; + TrieNodeType::Leaf(TrieLeaf { + path: path.to_vec(), + data: MARFValue(data), + }) +} + +/// Helper: build a Node4 with the given child pointers. +fn make_test_node4(path: &[u8], ptrs: [TriePtr; 4]) -> TrieNodeType { + TrieNodeType::Node4(TrieNode4 { + path: path.to_vec(), + ptrs, + cowptr: None, + patches: vec![], + }) +} + +#[test] +fn test_node_store_roundtrip_all_variants() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let mut store = NodeStore::new(dir_str).unwrap(); + + // Leaf + let leaf = make_test_leaf(&[1, 2, 3], 0xAA); + let leaf_hash = TrieHash::from_data(&[1]); + store.push(&leaf, leaf_hash, 10).unwrap(); + + // Node4 + let n4 = make_test_node4( + &[4, 5], + [ + TriePtr::new(1, b'a', 100), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + let n4_hash = TrieHash::from_data(&[2]); + store.push(&n4, n4_hash, 20).unwrap(); + + // Node16 + let mut ptrs16 = [TriePtr::default(); 16]; + ptrs16[0] = TriePtr::new(2, b'b', 200); + let n16 = TrieNodeType::Node16(TrieNode16 { + path: vec![6, 7, 8], + ptrs: ptrs16, + cowptr: None, + patches: vec![], + }); + let n16_hash = TrieHash::from_data(&[3]); + store.push(&n16, n16_hash, 30).unwrap(); + + // Node48 + let mut indexes48 = [-1i8; 256]; + indexes48[b'c' as usize] = 0; + let mut ptrs48 = [TriePtr::default(); 48]; + ptrs48[0] = TriePtr::new(3, b'c', 300); + let n48 = TrieNodeType::Node48(Box::new(TrieNode48 { + path: vec![9, 10], + indexes: indexes48, + ptrs: ptrs48, + cowptr: None, + patches: vec![], + })); + let n48_hash = TrieHash::from_data(&[4]); + store.push(&n48, n48_hash, 40).unwrap(); + + // Node256 + let mut ptrs256 = [TriePtr::default(); 256]; + ptrs256[b'd' as usize] = TriePtr::new(4, b'd', 400); + let n256 = TrieNodeType::Node256(Box::new(TrieNode256 { + path: vec![11], + ptrs: ptrs256, + cowptr: None, + patches: vec![], + })); + let n256_hash = TrieHash::from_data(&[5]); + store.push(&n256, n256_hash, 50).unwrap(); + + store.finish_writing().unwrap(); + assert_eq!(store.len(), 5); + + // Read back and verify + let mut reader = store.open_reader().unwrap(); + + // Leaf round-trip + let rt_leaf = store.read_node_with(&mut reader, 0).unwrap(); + assert!(rt_leaf.is_leaf()); + assert_eq!(rt_leaf.path_bytes(), &[1, 2, 3]); + assert_eq!(store.hash(0), leaf_hash); + assert_eq!(store.block_id(0), 10); + + // Node4 round-trip + let rt_n4 = store.read_node_with(&mut reader, 1).unwrap(); + assert_eq!(rt_n4.ptrs()[0].chr(), b'a'); + assert_eq!(rt_n4.ptrs()[0].ptr(), 100); + + // Node16 round-trip + let rt_n16 = store.read_node_with(&mut reader, 2).unwrap(); + assert_eq!(rt_n16.ptrs()[0].chr(), b'b'); + assert_eq!(rt_n16.ptrs()[0].ptr(), 200); + + // Node48 round-trip + let rt_n48 = store.read_node_with(&mut reader, 3).unwrap(); + assert_eq!(rt_n48.ptrs()[0].chr(), b'c'); + assert_eq!(rt_n48.ptrs()[0].ptr(), 300); + + // Node256 round-trip + let rt_n256 = store.read_node_with(&mut reader, 4).unwrap(); + assert_eq!(rt_n256.ptrs()[b'd' as usize].chr(), b'd'); + assert_eq!(rt_n256.ptrs()[b'd' as usize].ptr(), 400); +} + +#[test] +fn test_node_store_spill_file_cleaned_on_drop() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let spill_path; + { + let mut store = NodeStore::new(dir_str).unwrap(); + spill_path = store.path.clone(); + + let leaf = make_test_leaf(&[1], 0x01); + store.push(&leaf, TrieHash::from_data(&[]), 0).unwrap(); + store.finish_writing().unwrap(); + + // File should exist while store is alive + assert!(spill_path.exists(), "spill file should exist before drop"); + } + // After drop, file should be cleaned up + assert!( + !spill_path.exists(), + "spill file should be removed after drop" + ); +} + +#[test] +fn test_node_store_unique_temp_file_names() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + + let store1 = NodeStore::new(dir_str).unwrap(); + // Ensure different nanos by adding a tiny sleep + std::thread::sleep(std::time::Duration::from_millis(1)); + let store2 = NodeStore::new(dir_str).unwrap(); + + assert_ne!( + store1.path, store2.path, + "concurrent NodeStores should have different temp file paths" + ); +} + +#[test] +fn test_serialize_deserialize_node_roundtrip() { + // Test the raw serialize/deserialize functions independently of NodeStore + let nodes: Vec = vec![ + make_test_leaf(&[1, 2, 3, 4], 0xFF), + make_test_node4( + &[10, 20], + [ + TriePtr::new(1, b'x', 42), + TriePtr::new(1, b'y', 99), + TriePtr::default(), + TriePtr::default(), + ], + ), + ]; + + for original in &nodes { + let mut buf = Vec::new(); + serialize_node(&mut buf, original).unwrap(); + + let mut cursor = Cursor::new(&buf); + let roundtripped = deserialize_node(&mut cursor).unwrap(); + + assert_eq!(original.path_bytes(), roundtripped.path_bytes()); + assert_eq!(original.ptrs().len(), roundtripped.ptrs().len()); + for (a, b) in original.ptrs().iter().zip(roundtripped.ptrs().iter()) { + assert_eq!(a.id(), b.id()); + assert_eq!(a.chr(), b.chr()); + assert_eq!(a.ptr(), b.ptr()); + assert_eq!(a.back_block(), b.back_block()); + } + } +} + +/// Build a branching trie with mixed node types and verify that +/// `compute_blob_offsets` + `stream_squash_blob` agree on sizes. +/// +/// Trie layout (indices 0–6): +/// 0: Node16 (root) -> children 1, 2 +/// 1: Node4 -> children 3, 4 +/// 2: Node4 -> child 5 +/// 3: Leaf +/// 4: Leaf +/// 5: Node4 -> child 6 +/// 6: Leaf +#[test] +fn test_blob_offsets_with_mixed_node_types() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + let h = TrieHash([0; 32]); + + // Index 0: Node16 root with two forward children. + let mut root_ptrs = [TriePtr::default(); 16]; + root_ptrs[0] = TriePtr::new(TrieNodeID::Node4 as u8, b'a', 1); + root_ptrs[1] = TriePtr::new(TrieNodeID::Node4 as u8, b'b', 2); + let root = TrieNodeType::Node16(TrieNode16 { + path: vec![0], + ptrs: root_ptrs, + cowptr: None, + patches: vec![], + }); + store.push(&root, h, 0).unwrap(); + + // Index 1: Node4 with two forward children. + store + .push( + &make_test_node4( + &[1], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'c', 3), + TriePtr::new(TrieNodeID::Leaf as u8, b'd', 4), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 2: Node4 with one forward child. + store + .push( + &make_test_node4( + &[2], + [ + TriePtr::new(TrieNodeID::Node4 as u8, b'e', 5), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 3: Leaf + store.push(&make_test_leaf(&[3, 4], 0xAA), h, 0).unwrap(); + // Index 4: Leaf + store.push(&make_test_leaf(&[5, 6], 0xBB), h, 0).unwrap(); + + // Index 5: Node4 with one forward child (deeper subtree). + store + .push( + &make_test_node4( + &[7], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'f', 6), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ), + h, + 0, + ) + .unwrap(); + + // Index 6: Leaf + store.push(&make_test_leaf(&[8, 9], 0xCC), h, 0).unwrap(); + + store.finish_writing().unwrap(); + + let (blob_offsets, total_size) = compute_blob_offsets(&mut store).unwrap(); + assert_eq!(blob_offsets.len(), 7); + + // Offsets must be strictly increasing (each node has non-zero size). + for w in blob_offsets.windows(2) { + assert!(w[1] > w[0], "offsets must be strictly increasing"); + } + + // stream_squash_blob must write exactly total_size bytes. + let parent_hash = StacksBlockId::sentinel(); + let mut output = Cursor::new(Vec::new()); + let bytes_written = + stream_squash_blob(&mut store, &parent_hash, &blob_offsets, &mut output).unwrap(); + assert_eq!(bytes_written, total_size); + + // Verify blob header. + let blob = output.into_inner(); + assert_eq!(&blob[..32], parent_hash.as_bytes()); + assert_eq!( + &blob[BLOCK_HEADER_HASH_ENCODED_SIZE..BLOCK_HEADER_HASH_ENCODED_SIZE + 4], + &0u32.to_le_bytes() + ); +} + +/// Verify that writing the blob at a non-zero sink offset doesn't corrupt +/// the output. bytes_written equals total_size and the prefix is untouched. +#[test] +fn test_stream_squash_blob_at_nonzero_offset() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + + let leaf = make_test_leaf(&[1, 2], 0xBB); + let root = make_test_node4( + &[0], + [ + TriePtr::new(1, b'a', 1), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&root, TrieHash::from_data(&[0xAA]), 0).unwrap(); + store.push(&leaf, TrieHash::from_data(&[0xBB]), 0).unwrap(); + store.finish_writing().unwrap(); + + let parent_hash = StacksBlockId::sentinel(); + let (blob_offsets, total_size) = compute_blob_offsets(&mut store).unwrap(); + + // Write to a sink that already has 1000 bytes of garbage prefix. + let prefix_len: u64 = 1000; + let mut buf = vec![0xFFu8; prefix_len as usize]; + let mut output = Cursor::new(&mut buf); + output.seek(std::io::SeekFrom::End(0)).unwrap(); + + let bytes_written = + stream_squash_blob(&mut store, &parent_hash, &blob_offsets, &mut output).unwrap(); + assert_eq!(bytes_written, total_size); + + let total_buf = output.into_inner(); + assert_eq!(total_buf.len() as u64, prefix_len + total_size); + assert!(total_buf[..prefix_len as usize].iter().all(|&b| b == 0xFF)); +} + +/// Test `remap_ptrs_to_blob_offsets` directly: verify it replaces forward +/// child pointers with their blob offsets, leaves back/empty pointers +/// untouched, and returns CorruptionError for out-of-bounds indices. +#[test] +fn test_remap_ptrs_to_blob_offsets() { + // Build a Node4 with a mix of pointer types: + // slot 0: forward ptr to child index 1 + // slot 1: back ptr (should be left untouched) + // slot 2: empty (should be left untouched) + // slot 3: forward ptr to child index 2 + let back_id = set_backptr(TrieNodeID::Node4 as u8); + let mut node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 1), + TriePtr { + id: back_id, + chr: b'x', + ptr: 999, + back_block: 5, + }, + TriePtr::default(), + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 2), + ], + ); + + let offsets: Vec = vec![100, 200, 300]; + remap_ptrs_to_blob_offsets(&mut node, &offsets).unwrap(); + + let ptrs = node.ptrs(); + // Forward ptrs remapped to blob offsets. + assert_eq!(ptrs[0].ptr(), 200); // child_idx 1 -> offset 200 + assert_eq!(ptrs[3].ptr(), 300); // child_idx 2 -> offset 300 + // Back ptr untouched. + assert_eq!(ptrs[1].ptr(), 999); + assert_eq!(ptrs[1].back_block(), 5); + // Empty ptr untouched. + assert_eq!(ptrs[2].ptr(), 0); + + // Leaves are a no-op. + let mut leaf = make_test_leaf(&[1], 0xAA); + remap_ptrs_to_blob_offsets(&mut leaf, &offsets).unwrap(); + + // Out-of-bounds child index returns CorruptionError. + let mut bad_node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 99), // index 99 > offsets.len() + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + assert!(remap_ptrs_to_blob_offsets(&mut bad_node, &offsets).is_err()); +} + +/// Verify that `remap_ptrs_to_blob_offsets` with offsets > u32::MAX causes +/// the node's serialized size to grow (u32 -> u64 pointer encoding), which +/// is the mechanism that drives the fixpoint in `compute_blob_offsets`. +#[test] +fn test_remap_ptrs_u64_encoding_widens_node() { + let mut node = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'a', 0), // child_idx 0 + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 1), // child_idx 1 + TriePtr::default(), + TriePtr::default(), + ], + ); + + let size_before = get_node_byte_len(&node); + + // One offset below u32::MAX, one above -> mixed encoding. + let offsets: Vec = vec![1000, u64::from(u32::MAX) + 1]; + remap_ptrs_to_blob_offsets(&mut node, &offsets).unwrap(); + + let size_after = get_node_byte_len(&node); + + // Exactly one pointer widened from u32 (4 bytes) to u64 (8 bytes) -> +4 bytes. + assert_eq!( + size_after - size_before, + 4, + "one u64 pointer should add exactly 4 bytes" + ); + + // The ptr that stayed below u32::MAX should still use u32 encoding. + assert_eq!(node.ptrs()[0].ptr(), 1000); + assert!(!is_u64_ptr(node.ptrs()[0].encoded_id())); + + // The ptr that crossed u32::MAX should use u64 encoding. + assert_eq!(node.ptrs()[1].ptr(), u64::from(u32::MAX) + 1); + assert!(is_u64_ptr(node.ptrs()[1].encoded_id())); +} + +#[test] +fn test_squash_rejects_compress_true() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("index.sqlite"); + let _ = setup_marf(src_db_path.to_str().unwrap(), 2, 1); + + let dst_dir = dir.path().join("squashed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("index.sqlite"); + + let mut open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true); + open_opts.compress = true; + + let result = MARF::::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + open_opts, + 1, + "test", + ); + assert!(result.is_err(), "compress=true should be rejected"); + let err_msg = format!("{}", result.unwrap_err()); + assert!( + err_msg.contains("compress=true"), + "error should mention compress=true: {err_msg}" + ); +} + +/// Exercise the fixpoint loop inside `compute_blob_offsets_inner` by +/// passing `early_exit_threshold = 0`, which forces the loop to run +/// even though the blob is small. The results must be identical to the +/// normal (early-exit) path because no pointers actually widen. +#[test] +fn test_compute_blob_offsets_fixpoint_loop() { + let dir = tempdir().unwrap(); + let dir_str = dir.path().to_str().unwrap(); + let mut store = NodeStore::new(dir_str).unwrap(); + let h = TrieHash([0; 32]); + + // Build a small trie: root (Node4) -> inner (Node4) -> leaf. + // Both interior nodes have forward pointers. + let root = make_test_node4( + &[0], + [ + TriePtr::new(TrieNodeID::Node4 as u8, b'a', 1), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&root, h, 0).unwrap(); + + let inner = make_test_node4( + &[1], + [ + TriePtr::new(TrieNodeID::Leaf as u8, b'b', 2), + TriePtr::default(), + TriePtr::default(), + TriePtr::default(), + ], + ); + store.push(&inner, h, 0).unwrap(); + + store.push(&make_test_leaf(&[2, 3], 0xAA), h, 0).unwrap(); + store.finish_writing().unwrap(); + + // Normal call. early exit, no fixpoint loop. + let (offsets_normal, total_normal) = compute_blob_offsets(&mut store).unwrap(); + + // Forced fixpoint. threshold = 0 means the loop always runs. + let (offsets_forced, total_forced) = compute_blob_offsets_inner(&mut store, 0).unwrap(); + + // Results must be identical (no actual pointer widening for small blobs). + assert_eq!(offsets_normal, offsets_forced); + assert_eq!(total_normal, total_forced); + + // Verify stream_squash_blob agrees on total size. + let parent_hash = StacksBlockId::sentinel(); + let mut output = Cursor::new(Vec::new()); + let bytes_written = + stream_squash_blob(&mut store, &parent_hash, &offsets_forced, &mut output).unwrap(); + assert_eq!(bytes_written, total_forced); +} + +/// Build a synthetic >4 GiB squash blob so at least one real remapped child +/// pointer crosses `u32::MAX` and is emitted with the u64-width encoding bit. +#[test] +#[ignore = "synthetic large-offset regression"] +fn compute_blob_offsets_large_offset_sets_u64_ptr_bit() { + let dir = tempdir().expect("create temp dir"); + let dir_str = dir.path().to_str().unwrap(); + let path = dir + .path() + .join("compute_blob_offsets_large_offset_sets_u64_ptr_bit.bin"); + + let mut store = NodeStore::new(dir_str).expect("create node store"); + let template = TrieNodeType::Node256(Box::new(TrieNode256::new(&[]))); + let per_node_size = u64::try_from(get_node_byte_len(&template)).expect("infallible"); + let required_nodes = u64::from(u32::MAX) / per_node_size + 2; + let hash = TrieHash([0; 32]); + for i in 0..required_nodes { + let mut node = TrieNode256::new(&[]); + if i + 1 < required_nodes { + assert!(node.insert(&TriePtr::new(TrieNodeID::Node256 as u8, 0x00, i + 1))); + } + store + .push(&TrieNodeType::Node256(Box::new(node)), hash, 0) + .expect("push trie node"); + } + store.finish_writing().expect("finish node store"); + + let (blob_offsets, total_size) = compute_blob_offsets(&mut store).expect("compute offsets"); + assert!(total_size > u64::from(u32::MAX)); + + let parent_hash = StacksBlockId([0x55; 32]); + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path) + .expect("create temp squash blob"); + let bytes_written = stream_squash_blob(&mut store, &parent_hash, &blob_offsets, &mut file) + .expect("stream squash blob"); + assert_eq!(bytes_written, total_size); + let second_last_node_start = total_size + .checked_sub(per_node_size + (per_node_size + 4)) + .expect("second-last node should exist"); + file.seek(std::io::SeekFrom::Start( + second_last_node_start + + u64::try_from(stacks_common::types::chainstate::TRIEHASH_ENCODED_SIZE + 1) + .expect("infallible"), + )) + .expect("seek to second-last child ptr id"); + let mut encoded_id = [0u8; 1]; + std::io::Read::read_exact(&mut file, &mut encoded_id) + .expect("read encoded second-last child ptr id"); + assert!(is_u64_ptr(encoded_id[0])); +} + +/// Extending a squashed MARF must correctly serialize patch nodes even when +/// the squash tip has many inline (forward-ptr) children that become +/// backpointers in the new block. +/// +/// This test uses `insert_raw` with controlled `TrieHash` paths to build a +/// deterministic wide Node256 root (64 children in distinct chr() slots). +/// After squashing and extending with a single-key modification, the root is +/// COW'd as a patch: 1 forward child + 63 inherited backpointers. +/// +/// The test verifies: +/// 1. The `assert!(node_forward.eq(diff_forward))` in `dump_compressed_consume` +/// does not panic - the forward-ptr sequence filtering is correct. +/// 2. The root of b3's blob is actually stored as a `TrieNodeID::Patch`, +/// proving the patch path was exercised (not silently skipped). +/// 3. Both archival and squashed MARFs produce identical data when extended +/// with the same operations. +/// +/// Regression test for the `assert_eq!(num_new_nodes, patch_node.ptr_diff.len())` +/// panic that occurred when extending squashed mainnet MARFs. +#[test] +fn test_squash_extend_many_keys_patch_backptr_regression() { + let dir = tempdir().unwrap(); + let src_db_path = dir.path().join("src.sqlite"); + + // Compression MUST be enabled for the patch path in dump_compressed_consume. + let open_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false).with_compression(true); + + // Build a controlled trie path for each first-byte value. + // 64 distinct first bytes guarantees a Node256 root (>48 children). + let make_path = |first_byte: u8| -> TrieHash { + let mut bytes = [0u8; 32]; + bytes[0] = first_byte; + TrieHash(bytes) + }; + let make_leaf = |val: u8| -> TrieLeaf { + let mut data = [0u8; 40]; + data[0] = val; + TrieLeaf { + path: vec![], + data: MARFValue(data), + } + }; + let num_keys: u8 = 64; + + let b1 = StacksBlockId::from_bytes(&[1u8; 32]).unwrap(); + let b2 = StacksBlockId::from_bytes(&[2u8; 32]).unwrap(); + let b3 = StacksBlockId::from_bytes(&[3u8; 32]).unwrap(); + + // --- Build archival source MARF --- + let mut src = + MARF::::from_path(src_db_path.to_str().unwrap(), open_opts.clone()).unwrap(); + + src.begin(&StacksBlockId::sentinel(), &b1).unwrap(); + for i in 0..num_keys { + src.insert_raw(make_path(i), make_leaf(i)).unwrap(); + } + src.commit().unwrap(); + + src.begin(&b1, &b2).unwrap(); + for i in 0..num_keys { + src.insert_raw(make_path(i), make_leaf(i.wrapping_add(100))) + .unwrap(); + } + src.commit().unwrap(); + + // Extend archival to b3: modify ONE key so the root is COW'd with + // 1 changed child + (num_keys - 1) inherited backpointers. + src.begin(&b2, &b3).unwrap(); + src.insert_raw(make_path(0), make_leaf(255)).unwrap(); + src.commit().unwrap(); + + // Collect archival values at b3 for later comparison. + let archival_val_0 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(0))) + .unwrap(); + let archival_val_1 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(1))) + .unwrap(); + let archival_val_63 = src + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(num_keys - 1))) + .unwrap(); + drop(src); + + // --- Squash at height 1 (= b2) --- + let dst_dir = dir.path().join("squashed"); + std::fs::create_dir_all(&dst_dir).unwrap(); + let dst_db_path = dst_dir.join("dst.sqlite"); + + // squash_to_path requires compress=false; compression is for the extend step. + let squash_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", false); + MARF::::squash_to_path( + src_db_path.to_str().unwrap(), + dst_db_path.to_str().unwrap(), + squash_opts, + 1, + "test", + ) + .unwrap(); + + // --- Extend squashed MARF to b3 with compression enabled --- + // Compression enables the patch-node path in dump_compressed_consume. + let squashed_opts = + MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", true).with_compression(true); + let mut squashed = + MARF::::from_path(dst_db_path.to_str().unwrap(), squashed_opts.clone()) + .unwrap(); + + squashed.begin(&b2, &b3).unwrap(); + squashed.insert_raw(make_path(0), make_leaf(255)).unwrap(); + // The commit exercises dump_compressed_consume with a COW'd Node256 + // root where most children are backpointers. The forward-ptr sequence + // assertion must pass for this to succeed. + squashed.commit().unwrap(); + + // --- Verify patch node was actually emitted --- + // b3's blob is in the .blobs file. The root node starts at + // blob_offset + HEADER (36 bytes). Its type ID byte is at +68. + // TrieNodeID::Patch = 6 proves patch encoding was used, not Normal. + let b3_block_id = trie_sql::get_block_identifier(squashed.sqlite_conn(), &b3).unwrap(); + let (b3_blob_offset, b3_blob_length) = + trie_sql::get_external_trie_offset_length(squashed.sqlite_conn(), b3_block_id).unwrap(); + assert!(b3_blob_length > 0, "b3 blob should have non-zero length"); + + let blobs_path = format!("{}.blobs", dst_db_path.display()); + let mut blobs_file = std::fs::File::open(&blobs_path).unwrap(); + // Root node type ID is at: blob_offset + 32 (parent hash) + 4 (identifier) + 32 (node hash) + let root_type_offset = b3_blob_offset + (BLOCK_HEADER_HASH_ENCODED_SIZE as u64) + 4 + 32; + blobs_file + .seek(std::io::SeekFrom::Start(root_type_offset)) + .unwrap(); + let mut type_byte = [0u8; 1]; + std::io::Read::read_exact(&mut blobs_file, &mut type_byte).unwrap(); + assert_eq!( + type_byte[0], + TrieNodeID::Patch as u8, + "Root of b3 should be stored as a Patch node (type {}), got type {}. \ + This means dump_compressed_consume fell back to Normal serialization.", + TrieNodeID::Patch as u8, + type_byte[0] + ); + + // --- Verify data matches archival MARF --- + let squashed_val_0 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(0))) + .unwrap(); + let squashed_val_1 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(1))) + .unwrap(); + let squashed_val_63 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b3, &make_path(num_keys - 1))) + .unwrap(); + + assert_eq!(archival_val_0, squashed_val_0, "modified key mismatch"); + assert_eq!(archival_val_1, squashed_val_1, "inherited key mismatch"); + assert_eq!( + archival_val_63, squashed_val_63, + "last inherited key mismatch" + ); + + // Pre-squash data still readable through the squash tip. + let val_at_b2 = squashed + .with_conn(|c| MARF::get_by_hash(c, &b2, &make_path(1))) + .unwrap(); + assert!(val_at_b2.is_some(), "data at b2 should still be readable"); +} diff --git a/stackslib/src/chainstate/stacks/index/test/storage.rs b/stackslib/src/chainstate/stacks/index/test/storage.rs index cb22c0b8346..44a394658f1 100644 --- a/stackslib/src/chainstate/stacks/index/test/storage.rs +++ b/stackslib/src/chainstate/stacks/index/test/storage.rs @@ -16,6 +16,9 @@ use std::collections::VecDeque; use std::fs; +use std::io::{Seek, SeekFrom}; + +use tempfile::tempdir; use super::*; use crate::chainstate::stacks::index::*; @@ -126,18 +129,23 @@ fn trie_cmp( return true; } -fn load_store_trie_m_n_same(m: u64, n: u64, same: bool) { +fn load_store_trie_m_n_same_with_compression(m: u64, n: u64, same: bool, compress: bool) { let test_name = format!( - "/tmp/load_store_trie_{}_{}_{}", + "/tmp/load_store_trie_{}_{}_{}_{}", m, n, - if same { "same" } else { "unique" } + if same { "same" } else { "unique" }, + if compress { + "compressed" + } else { + "uncompressed" + } ); if fs::metadata(&test_name).is_ok() { fs::remove_file(&test_name).unwrap(); } - let marf_opts = MARFOpenOpts::default(); + let marf_opts = MARFOpenOpts::default().with_compression(compress); let confirmed_marf_storage = TrieFileStorage::::open(&test_name, marf_opts).unwrap(); let mut confirmed_marf = MARF::::from_storage(confirmed_marf_storage); @@ -162,7 +170,7 @@ fn load_store_trie_m_n_same(m: u64, n: u64, same: bool) { let confirmed_tip = StacksBlockId([0x01; 32]); confirmed_marf.commit_to(&confirmed_tip).unwrap(); - let marf_opts = MARFOpenOpts::default(); + let marf_opts = MARFOpenOpts::default().with_compression(compress); let marf_storage = TrieFileStorage::::open_unconfirmed(&test_name, marf_opts).unwrap(); let mut marf = MARF::from_storage(marf_storage); @@ -286,6 +294,10 @@ fn load_store_trie_m_n_same(m: u64, n: u64, same: bool) { } } +fn load_store_trie_m_n_same(m: u64, n: u64, same: bool) { + load_store_trie_m_n_same_with_compression(m, n, same, false); +} + #[test] fn load_store_trie_4_4_same() { load_store_trie_m_n_same(4, 4, true); @@ -325,3 +337,288 @@ fn load_store_trie_4_256_same() { fn load_store_trie_4_256_unique() { load_store_trie_m_n_same(4, 256, false); } + +#[test] +fn load_store_trie_4_16_unique_compression_enabled_unconfirmed_stable() { + load_store_trie_m_n_same_with_compression(4, 16, false, true); +} + +#[test] +fn load_store_trie_4_48_same_compression_enabled_roundtrip() { + load_store_trie_m_n_same_with_compression(4, 48, true, true); +} + +fn large_offset_required_nodes(per_node_size: u64) -> u64 { + u64::from(u32::MAX) / per_node_size + 2 +} + +fn fill_linear_node256_trie( + trie: &mut TrieRAM, + required_nodes: u64, + hash: TrieHash, +) { + for i in 0..required_nodes { + let mut node = TrieNode256::new(&[]); + if i + 1 < required_nodes { + assert!(node.insert(&TriePtr::new(TrieNodeID::Node256 as u8, 0x00, i + 1))); + } + trie.write_nodetype(i, &TrieNodeType::Node256(Box::new(node)), hash) + .expect("write trie node"); + } +} + +fn assert_second_last_ptr_id_is_u64( + file: &mut fs::File, + end_offset: u64, + last_node_size: u64, + second_last_node_size: u64, + ptr_id_offset_within_node: u64, + context: &str, +) { + let second_last_node_start = end_offset + .checked_sub(last_node_size + second_last_node_size) + .expect("second-last node should exist"); + file.seek(SeekFrom::Start( + second_last_node_start + ptr_id_offset_within_node, + )) + .expect(context); + let mut encoded_id = [0u8; 1]; + std::io::Read::read_exact(file, &mut encoded_id) + .expect("read encoded second-last child ptr id"); + assert!(is_u64_ptr(encoded_id[0])); +} + +#[test] +#[ignore = "u64-pointer support"] +fn dump_consume_large_offset_sets_u64_ptr_bit() { + let dir = tempdir().expect("create temp dir"); + let path = dir + .path() + .join("dump_consume_large_offset_sets_u64_ptr_bit.bin"); + + let block = StacksBlockId([0x11; 32]); + let parent = StacksBlockId([0x22; 32]); + let mut trie = TrieRAM::new(&block, 0, &parent); + + let template = TrieNodeType::Node256(Box::new(TrieNode256::new(&[]))); + let per_node_size = u64::try_from(get_node_byte_len(&template)).expect("infallible"); + let required_nodes = large_offset_required_nodes(per_node_size); + fill_linear_node256_trie(&mut trie, required_nodes, TrieHash([0; 32])); + + let mut file = fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path) + .expect("create temp trie dump"); + let end_offset = trie.dump_consume(&mut file).expect("dump large trie"); + assert!(end_offset > u64::from(u32::MAX)); + assert_second_last_ptr_id_is_u64( + &mut file, + end_offset, + per_node_size, + per_node_size + 4, + u64::try_from(TRIEHASH_ENCODED_SIZE + 1).expect("infallible"), + "seek to second-last child ptr id", + ); +} + +#[test] +#[ignore = "u64-pointer support"] +fn dump_compressed_consume_large_offset_sets_u64_ptr_bit() { + let dir = tempdir().expect("create temp dir"); + let path = dir + .path() + .join("dump_compressed_consume_large_offset_sets_u64_ptr_bit.bin"); + + let block = StacksBlockId([0x12; 32]); + let parent = StacksBlockId([0x23; 32]); + let mut trie = TrieRAM::new(&block, 0, &parent); + + let template = TrieNodeType::Node256(Box::new(TrieNode256::new(&[]))); + let per_node_size = u64::try_from(get_node_byte_len_compressed(&template)).expect("infallible"); + let required_nodes = large_offset_required_nodes(per_node_size); + fill_linear_node256_trie(&mut trie, required_nodes, TrieHash([0; 32])); + + let mut widened_second_last = TrieNode256::new(&[]); + assert!(widened_second_last.insert(&TriePtr::new( + TrieNodeID::Node256 as u8, + 0x00, + u64::from(u32::MAX) + 1, + ))); + let widened_second_last_size = u64::try_from(get_node_byte_len_compressed( + &TrieNodeType::Node256(Box::new(widened_second_last)), + )) + .expect("infallible"); + + let storage = TrieFileStorage::::new_memory( + MARFOpenOpts::default().with_compression(true), + ) + .expect("create in-memory storage"); + let mut marf = MARF::::from_storage(storage); + let mut storage_tx = marf.borrow_storage_transaction(); + + let mut file = fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open(&path) + .expect("create temp trie dump"); + let end_offset = trie + .dump_compressed_consume(&mut storage_tx, &mut file) + .expect("dump large compressed trie"); + assert!(end_offset > u64::from(u32::MAX)); + + let bitmap_size = u64::try_from( + get_sparse_ptrs_bitmap_size(TrieNodeID::Node256 as u8).expect("node256 bitmap size"), + ) + .expect("infallible"); + assert_second_last_ptr_id_is_u64( + &mut file, + end_offset, + per_node_size, + widened_second_last_size, + u64::try_from(TRIEHASH_ENCODED_SIZE + 1 + 1).expect("infallible") + bitmap_size, + "seek to second-last compressed child ptr id", + ); +} + +/// Verify that `dump_compressed_consume` exercises COW-patch and +/// amendment-patch branches when compressing multi-block tries. +/// +/// Block A: insert initial keys -> fresh trie, no patches. +/// Block B: modify some keys -> COW pointers on inherited interior nodes. +/// Block C: modify same keys again -> amendment patches on top of B's patches. +#[test] +fn test_dump_compressed_consume_cow_and_amendment_patches() { + use stacks_common::types::chainstate::TrieHash as TrieHashType; + + let dir = tempdir().unwrap(); + let test_path = dir.path().join("marf.sqlite"); + let test_path_str = test_path.to_str().unwrap(); + + let block_a = StacksBlockId([0x01; 32]); + let block_b = StacksBlockId([0x02; 32]); + let block_c = StacksBlockId([0x03; 32]); + + let marf_opts = MARFOpenOpts::default().with_compression(true); + + // Block A: insert 16 keys to build a trie with interior nodes + { + let storage = + TrieFileStorage::::open(test_path_str, marf_opts.clone()).unwrap(); + let mut marf = MARF::::from_storage(storage); + marf.begin(&StacksBlockId::sentinel(), &block_a).unwrap(); + for i in 0u64..16 { + let mut path_bytes = [0u8; 32]; + path_bytes[24..32].copy_from_slice(&i.to_be_bytes()); + let path = TrieHashType::from_bytes(&path_bytes).unwrap(); + let value = TrieLeaf::new(&[], &[i as u8; 40]); + marf.insert_raw(path, value).unwrap(); + } + marf.commit_to(&block_a).unwrap(); + } + + // Block B: modify 8 keys -> creates COW pointers on interior nodes + { + let storage = + TrieFileStorage::::open(test_path_str, marf_opts.clone()).unwrap(); + let mut marf = MARF::::from_storage(storage); + marf.begin(&block_a, &block_b).unwrap(); + for i in 0u64..8 { + let mut path_bytes = [0u8; 32]; + path_bytes[24..32].copy_from_slice(&i.to_be_bytes()); + let path = TrieHashType::from_bytes(&path_bytes).unwrap(); + let value = TrieLeaf::new(&[], &[(i + 100) as u8; 40]); + marf.insert_raw(path, value).unwrap(); + } + + // Assert: at least one non-leaf node has a COW pointer before commit. + { + let backend = marf.borrow_storage_backend(); + let uncommitted = backend + .transient_data() + .uncommitted_writes + .as_ref() + .expect("uncommitted writes should exist"); + let trie_ram = uncommitted.1.trie_ram_ref(); + let cow_count = trie_ram + .data() + .iter() + .filter(|(node, _)| !node.is_leaf() && node.get_cow_ptr().is_some()) + .count(); + assert!( + cow_count > 0, + "expected at least one non-leaf COW node before block B commit, got 0" + ); + } + + marf.commit_to(&block_b).unwrap(); + } + + // Block C: modify same 8 keys again -> amendment patches + { + let storage = + TrieFileStorage::::open(test_path_str, marf_opts.clone()).unwrap(); + let mut marf = MARF::::from_storage(storage); + marf.begin(&block_b, &block_c).unwrap(); + for i in 0u64..8 { + let mut path_bytes = [0u8; 32]; + path_bytes[24..32].copy_from_slice(&i.to_be_bytes()); + let path = TrieHashType::from_bytes(&path_bytes).unwrap(); + let value = TrieLeaf::new(&[], &[(i + 200) as u8; 40]); + marf.insert_raw(path, value).unwrap(); + } + + // Assert: at least one non-leaf node has patches (amendment path). + { + let backend = marf.borrow_storage_backend(); + let uncommitted = backend + .transient_data() + .uncommitted_writes + .as_ref() + .expect("uncommitted writes should exist"); + let trie_ram = uncommitted.1.trie_ram_ref(); + let patch_count = trie_ram + .data() + .iter() + .filter(|(node, _)| !node.is_leaf() && !node.get_patches().is_empty()) + .count(); + assert!( + patch_count > 0, + "expected at least one non-leaf patched node before block C commit, got 0" + ); + } + + marf.commit_to(&block_c).unwrap(); + } + + // Verify data integrity at block C + { + let storage = TrieFileStorage::::open(test_path_str, marf_opts).unwrap(); + let mut marf = MARF::::from_storage(storage); + for i in 0u64..16 { + let mut path_bytes = [0u8; 32]; + path_bytes[24..32].copy_from_slice(&i.to_be_bytes()); + let path = TrieHashType::from_bytes(&path_bytes).unwrap(); + let leaf = MARF::get_path(&mut marf.borrow_storage_backend(), &block_c, &path) + .unwrap() + .unwrap(); + if i < 8 { + assert_eq!( + leaf.data.to_vec()[0], + (i + 200) as u8, + "key {i} should have block C value" + ); + } else { + assert_eq!( + leaf.data.to_vec()[0], + i as u8, + "key {i} should have block A value" + ); + } + } + } +} diff --git a/stackslib/src/chainstate/stacks/index/trie.rs b/stackslib/src/chainstate/stacks/index/trie.rs index b7019e1bbff..a1632d37e2b 100644 --- a/stackslib/src/chainstate/stacks/index/trie.rs +++ b/stackslib/src/chainstate/stacks/index/trie.rs @@ -24,7 +24,7 @@ use crate::chainstate::stacks::index::node::{ TrieNode4, TrieNode48, TrieNodeID, TrieNodeType, TriePtr, }; use crate::chainstate::stacks::index::storage::{TrieHashCalculationMode, TrieStorageConnection}; -use crate::chainstate::stacks::index::{Error, MarfTrieId, TrieHasher, TrieLeaf}; +use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId, TrieHasher, TrieLeaf}; use crate::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; use crate::util::macros::is_trace; @@ -753,7 +753,46 @@ impl Trie { // here is where some mind-bending things begin to happen. // we want to find the block at a given _height_. but how to do so? // use the data stored already in the MARF. - let cur_block_height = + // + // In a squashed MARF, all blocks at heights 0..=H share a single + // blob whose OWN_BLOCK_HEIGHT_KEY value is H (the squash height). + // Using that value would produce the wrong number of ancestors. + // Instead, look up the actual height from the SQL side-table that + // was populated during squashing. + let cur_block_height = if storage.squash_info().is_some() { + // Try the squash side-table first. Blocks within the squashed + // range (0..=H) MUST be resolved here because the trie would + // return H for all of them. Blocks extended after squashing + // are not in the side-table and fall through to the trie path. + match trie_sql::read_squash_block_height(storage.sqlite_conn(), &cur_block_header)? { + Some(h) => h, + None => { + // Not in the side-table — must be a post-squash block. + let h = MARF::get_block_height_miner_tip( + storage, + &cur_block_header, + &cur_block_header, + )? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain block height for block {}: got None", + &cur_block_header + )) + })?; + // Sanity: a post-squash block must be above the squash height. + if let Some(info) = storage.squash_info() { + if h <= info.height { + return Err(Error::CorruptionError(format!( + "Block {cur_block_header} at height {h} is within squashed \ + range (0..={}) but missing from marf_squash_block_heights", + info.height + ))); + } + } + h + } + } + } else { MARF::get_block_height_miner_tip(storage, &cur_block_header, &cur_block_header) .map_err(|e| match e { Error::NotFoundError => Error::CorruptionError(format!( @@ -767,26 +806,44 @@ impl Trie { "Could not obtain block height for block {}: got None", &cur_block_header )) - })?; + })? + }; let mut log_depth = 0; while log_depth < 32 && (1u32 << log_depth) <= cur_block_height { - let prev_block_header = MARF::get_block_at_height( - storage, - cur_block_height - (1u32 << log_depth), - &cur_block_header, - )? - .ok_or_else(|| { - Error::CorruptionError(format!( - "Could not obtain block hash at block height {}", - cur_block_height - (1u32 << log_depth) - )) - })?; - - storage.open_block(&prev_block_header)?; - - let root_ptr = storage.root_trieptr(); - let ancestor_hash = storage.read_node_hash_bytes(&root_ptr)?; + let ancestor_height = cur_block_height - (1u32 << log_depth); + let prev_block_header = + MARF::get_block_at_height(storage, ancestor_height, &cur_block_header)? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain block hash at block height {ancestor_height}" + )) + })?; + + // Use the stored root-hash key for squashed MARFs when the ancestor + // height falls within the squashed range, otherwise fall back to + // the archival path (open_block). This eliminates the duplicated + // fallback arm that previously existed for the > info.height and + // non-squashed cases. + let use_stored_root = storage + .squash_info() + .is_some_and(|info| ancestor_height <= info.height); + + let ancestor_hash = if use_stored_root { + trie_sql::read_squash_archival_marf_root_hash( + storage.sqlite_conn(), + ancestor_height, + )? + .ok_or_else(|| { + Error::CorruptionError(format!( + "Could not obtain squashed root hash at height {ancestor_height}" + )) + })? + } else { + storage.open_block(&prev_block_header)?; + let root_ptr = storage.root_trieptr(); + storage.read_node_hash_bytes(&root_ptr)? + }; trace!( "Include root hash {} from block {} in ancestor #{}", diff --git a/stackslib/src/chainstate/stacks/index/trie_sql.rs b/stackslib/src/chainstate/stacks/index/trie_sql.rs index 5634fe7ea2c..3f042d8f074 100644 --- a/stackslib/src/chainstate/stacks/index/trie_sql.rs +++ b/stackslib/src/chainstate/stacks/index/trie_sql.rs @@ -28,13 +28,13 @@ use crate::chainstate::stacks::index::node::{TrieNodeType, TriePtr}; #[cfg(test)] use crate::chainstate::stacks::index::storage::TrieStorageConnection; use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId}; -use crate::types::chainstate::TrieHash; +use crate::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; use crate::types::sqlite::NO_PARAMS; -use crate::util_lib::db::{query_count, query_row, tx_begin_immediate, u64_to_sql}; +use crate::util_lib::db::{query_count, query_row, table_exists, tx_begin_immediate, u64_to_sql}; static SQL_MARF_DATA_TABLE: &str = " CREATE TABLE IF NOT EXISTS marf_data ( - block_id INTEGER PRIMARY KEY, + block_id INTEGER PRIMARY KEY, block_hash TEXT UNIQUE NOT NULL, -- the trie itself. -- if not used, then set to a zero-byte entry. @@ -47,7 +47,7 @@ CREATE INDEX IF NOT EXISTS unconfirmed_marf_data ON marf_data(unconfirmed); "; static SQL_MARF_MINED_TABLE: &str = " CREATE TABLE IF NOT EXISTS mined_blocks ( - block_id INTEGER PRIMARY KEY, + block_id INTEGER PRIMARY KEY, block_hash TEXT UNIQUE NOT NULL, data BLOB NOT NULL ); @@ -78,16 +78,242 @@ INSERT OR REPLACE INTO migrated_version (version) VALUES (1); pub static SQL_MARF_SCHEMA_VERSION: u64 = 2; +/// SQL table for squash metadata (root hash and height). +/// Stored outside the trie so it does not affect the MARF root hash. +static SQL_MARF_SQUASH_TABLES: &str = " +CREATE TABLE IF NOT EXISTS marf_squash_info ( + id INTEGER PRIMARY KEY CHECK (id = 1), + archival_marf_root_hash BLOB NOT NULL, + squash_root_node_hash BLOB, + squash_height INTEGER NOT NULL +); +CREATE TABLE IF NOT EXISTS marf_squash_archival_marf_roots ( + height INTEGER PRIMARY KEY, + marf_root_hash BLOB NOT NULL +); +CREATE TABLE IF NOT EXISTS marf_squash_block_heights ( + block_hash TEXT PRIMARY KEY, + height INTEGER NOT NULL +); +"; + pub fn create_tables_if_needed(conn: &mut Connection) -> Result<(), Error> { let tx = tx_begin_immediate(conn)?; tx.execute_batch(SQL_MARF_DATA_TABLE)?; tx.execute_batch(SQL_MARF_MINED_TABLE)?; tx.execute_batch(SQL_EXTENSION_LOCKS_TABLE)?; + tx.execute_batch(SQL_MARF_SQUASH_TABLES)?; tx.commit().map_err(|e| e.into()) } +/// Write squash metadata to the out-of-trie SQL table. +pub fn write_squash_info( + conn: &Connection, + archival_marf_root_hash: &TrieHash, + height: u32, +) -> Result<(), Error> { + conn.execute( + "INSERT OR REPLACE INTO marf_squash_info (id, archival_marf_root_hash, squash_height) VALUES (1, ?1, ?2)", + params![archival_marf_root_hash.as_bytes().to_vec(), height as i64], + )?; + Ok(()) +} + +/// Read squash metadata from the out-of-trie SQL table. +/// Returns `None` for archival (non-squashed) MARFs. +/// Returns `(archival_marf_root_hash, squash_root_node_hash_opt, height)`. +pub fn read_squash_info( + conn: &Connection, +) -> Result, u32)>, Error> { + if !table_exists(conn, "marf_squash_info")? { + return Ok(None); + } + + let result: Option<(Vec, Option>, i64)> = conn + .query_row( + "SELECT archival_marf_root_hash, squash_root_node_hash, squash_height FROM marf_squash_info WHERE id = 1", + NO_PARAMS, + |row| { + let archival_bytes: Vec = row.get(0)?; + let squash_bytes: Option> = row.get(1)?; + let height: i64 = row.get(2)?; + Ok((archival_bytes, squash_bytes, height)) + }, + ) + .optional()?; + + match result { + Some((archival_bytes, squash_bytes, height)) => { + if archival_bytes.len() != TRIEHASH_ENCODED_SIZE { + return Err(Error::CorruptionError( + "Invalid archival root hash length".to_string(), + )); + } + let archival_marf_root_hash = + TrieHash::from_bytes(&archival_bytes).ok_or_else(|| { + Error::CorruptionError("Invalid archival root hash bytes".to_string()) + })?; + + let squash_root_node_hash = match squash_bytes { + Some(bytes) => { + if bytes.len() != TRIEHASH_ENCODED_SIZE { + return Err(Error::CorruptionError( + "Invalid squash root hash length".to_string(), + )); + } + Some(TrieHash::from_bytes(&bytes).ok_or_else(|| { + Error::CorruptionError("Invalid squash root hash bytes".to_string()) + })?) + } + None => None, + }; + + let squash_height = u32::try_from(height) + .map_err(|_| Error::CorruptionError("Invalid squash height".to_string()))?; + + Ok(Some(( + archival_marf_root_hash, + squash_root_node_hash, + squash_height, + ))) + } + None => Ok(None), + } +} + +/// Update the squash_root_node_hash in the squash info table (computed after blob commit). +/// Fails if no squash info row exists. +pub fn update_squash_root_node_hash(conn: &Connection, hash: &TrieHash) -> Result<(), Error> { + let updated = conn.execute( + "UPDATE marf_squash_info SET squash_root_node_hash = ?1 WHERE id = 1", + params![hash.as_bytes().to_vec()], + )?; + if updated == 0 { + return Err(Error::CorruptionError( + "update_squash_root_node_hash: no marf_squash_info row exists".to_string(), + )); + } + Ok(()) +} + +/// Read the stored root hash for a given height from the SQL table. +/// Returns `None` if the height is not present (archival MARF or height +/// outside the squashed range). +pub fn read_squash_archival_marf_root_hash( + conn: &Connection, + height: u32, +) -> Result, Error> { + let result: Option> = conn + .query_row( + "SELECT marf_root_hash FROM marf_squash_archival_marf_roots WHERE height = ?1", + params![height as i64], + |row| row.get(0), + ) + .optional()?; + + match result { + Some(bytes) => { + if bytes.len() != TRIEHASH_ENCODED_SIZE { + return Err(Error::CorruptionError( + "Invalid squash root hash length".to_string(), + )); + } + Ok(Some(TrieHash::from_bytes(&bytes).ok_or_else(|| { + Error::CorruptionError("Invalid squash root hash bytes".to_string()) + })?)) + } + None => Ok(None), + } +} + +/// Read the stored height for a block hash from the squash block-heights table. +/// Returns `None` if the block hash is not present (archival MARF or block +/// outside the squashed range). +pub fn read_squash_block_height( + conn: &Connection, + block_hash: &T, +) -> Result, Error> { + let result: Option = conn + .query_row( + "SELECT height FROM marf_squash_block_heights WHERE block_hash = ?1", + params![&block_hash.to_string()], + |row| row.get(0), + ) + .optional()?; + + result + .map(|h| { + u32::try_from(h) + .map_err(|_| Error::CorruptionError("Invalid squash block height".to_string())) + }) + .transpose() +} + +/// Read the block hash for a given height from the squash block-heights table. +/// This is the reverse lookup: height -> block_hash. +/// Returns `None` if the height is not in the squashed range. +#[cfg(test)] +pub fn read_squash_block_height_reverse( + conn: &Connection, + height: u32, +) -> Result, Error> { + let result: Option = conn + .query_row( + "SELECT block_hash FROM marf_squash_block_heights WHERE height = ?1", + params![height as i64], + |row| row.get(0), + ) + .optional()?; + + Ok(result) +} + +/// Bulk-read all confirmed block entries from `marf_data`. +/// +/// Returns `(block_id, block_hash, external_offset)` for every confirmed row, +/// ordered by `block_id`. Used by the squash pipeline to avoid per-row SQL +/// lookups for block IDs and blob offsets. +pub fn bulk_read_block_entries( + conn: &Connection, +) -> Result, Error> { + let mut stmt = conn.prepare( + "SELECT block_id, block_hash, external_offset FROM marf_data \ + WHERE unconfirmed = 0 ORDER BY block_id", + )?; + let rows = stmt.query_map(NO_PARAMS, |row| { + let block_id: u32 = row.get(0)?; + let block_hash: T = row.get(1)?; + let offset_i64: i64 = row.get(2)?; + Ok((block_id, block_hash, offset_i64)) + })?; + let mut result = Vec::new(); + for row in rows { + let (block_id, block_hash, offset_i64) = row?; + let offset = u64::try_from(offset_i64).map_err(|_| Error::OverflowError)?; + result.push((block_id, block_hash, offset)); + } + Ok(result) +} + +/// Bulk-update all `marf_data` entries to share the same blob offset/length, +/// except for the tip block. Used post-commit in the squash pipeline to +/// point all historical placeholder entries at the shared squash trie storage. +pub fn bulk_update_blob_offsets( + conn: &Connection, + offset: u64, + length: u64, + tip_block_hash: &T, +) -> Result { + conn.execute( + "UPDATE marf_data SET external_offset = ?1, external_length = ?2 \ + WHERE block_hash != ?3 AND unconfirmed = 0", + params![u64_to_sql(offset)?, u64_to_sql(length)?, tip_block_hash], + ) + .map_err(|e| e.into()) +} + fn get_schema_version(conn: &Connection) -> u64 { // if the table doesn't exist, then the version is 1. let sql = "SELECT version FROM schema_version"; @@ -115,8 +341,23 @@ fn get_migrated_version(conn: &Connection) -> u64 { /// Migrate the MARF database to the currently-supported schema. /// Returns the version of the DB prior to the migration. -pub fn migrate_tables_if_needed(conn: &mut Connection) -> Result { +/// +/// If `readonly` is `true`, this performs compatibility checks only and +/// returns an error if migration would be required. +pub fn migrate_tables_if_needed( + conn: &mut Connection, + readonly: bool, +) -> Result { let first_version = get_schema_version(conn); + if readonly { + if first_version != SQL_MARF_SCHEMA_VERSION { + return Err(Error::CorruptionError(format!( + "MARF schema version {first_version} is not compatible with read-only open (expected {SQL_MARF_SCHEMA_VERSION})" + ))); + } + return Ok(first_version); + } + loop { let version = get_schema_version(conn); match version { @@ -198,6 +439,15 @@ pub fn get_unconfirmed_block_identifier( .map_err(|e| e.into()) } +pub fn get_latest_confirmed_block_hash(conn: &Connection) -> Result { + conn.query_row( + "SELECT block_hash FROM marf_data WHERE unconfirmed = 0 ORDER BY block_id DESC LIMIT 1", + NO_PARAMS, + |row| row.get("block_hash"), + ) + .map_err(|e| e.into()) +} + pub fn get_block_hash(conn: &Connection, local_id: u32) -> Result { let result = conn .query_row(