Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9bb657e
add support for u64 children pointers offsets
francesco-stacks Mar 3, 2026
b1462c8
Merge branch 'develop' into feat/marf-u64-offset-pointers
francesco-stacks Mar 6, 2026
155b853
use 0x20 bit for mixed u32/u64 pointers
francesco-stacks Mar 11, 2026
b08d490
Merge branch 'develop' into feat/marf-u64-offset-pointers
francesco-stacks Mar 11, 2026
e368def
remove unnecessary casts to u64
francesco-stacks Mar 11, 2026
83f06b8
add safe casts to usize
francesco-stacks Mar 11, 2026
45c98a8
cargo fmt
francesco-stacks Mar 11, 2026
a554f68
Merge branch 'develop' into feat/marf-u64-offset-pointers
francesco-stacks Mar 20, 2026
504473b
Merge branch 'develop' into feat/marf-u64-offset-pointers
francesco-stacks Mar 24, 2026
5eb3035
update changelog
francesco-stacks Mar 24, 2026
d7af514
crc: update comments
francesco-stacks Mar 24, 2026
d950f3b
crc: use u64::from instead of cast. return OverflowError on error
francesco-stacks Mar 24, 2026
673f3a9
crc: add migrate_tables tests
francesco-stacks Mar 24, 2026
8690320
crc: update tests names from v2 to u64_ptr
francesco-stacks Mar 24, 2026
a550bd9
crc: remove more castings
francesco-stacks Mar 24, 2026
208c00a
fix ptrs_from_bytes
francesco-stacks Mar 25, 2026
ba79988
simplify overflow error propagations
francesco-stacks Mar 25, 2026
5a427a0
improve dump_consume and dump_compressed_consume
francesco-stacks Mar 25, 2026
eac9816
fix write_node_hash signature
francesco-stacks Mar 25, 2026
c6009c8
fix missing +1
francesco-stacks Mar 25, 2026
a3a04db
remove unused symbol
francesco-stacks Mar 25, 2026
45489df
fix dump_compressed_consume for patch nodes
francesco-stacks Mar 26, 2026
ec3e1c3
fix typo
francesco-stacks Mar 26, 2026
ae7369c
add squash metadata tables and storage plumbing
francesco-stacks Mar 30, 2026
7f94df2
add squash logic and squash-aware trie lookups
francesco-stacks Mar 30, 2026
f1d73b8
Merge branch 'develop' into feat/marf-u64-offset-pointers
francesco-stacks Mar 31, 2026
35b9d6a
Merge branch 'feat/marf-u64-offset-pointers' into feat/marf-squash-fo…
francesco-stacks Mar 31, 2026
04f3aad
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Mar 31, 2026
8c1f2e9
fix tests
francesco-stacks Apr 2, 2026
059bf79
cargo fmt
francesco-stacks Apr 2, 2026
d142bf5
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Apr 2, 2026
090af2c
add side-table snapshot framework with index and SPV side-table copy
francesco-stacks Apr 2, 2026
78f2ca3
add changelog fragment
francesco-stacks Apr 2, 2026
60e19dc
Merge branch 'feat/marf-squash-foundation' into feat/marf-squash-engine
francesco-stacks Apr 2, 2026
3ac2225
add changelog fragment
francesco-stacks Apr 2, 2026
514cdcd
Merge branch 'feat/marf-squash-engine' into feat/marf-snapshot-framework
francesco-stacks Apr 2, 2026
75293cc
add changelog fragment
francesco-stacks Apr 2, 2026
a84d52b
use table_exists helper and leaf hash filtered fork_storage validation
francesco-stacks Apr 2, 2026
5c05915
make SPV source mandatory and update tests
francesco-stacks Apr 7, 2026
5492356
add missing tests
francesco-stacks Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/marf-snapshot-framework.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add snapshot framework for copying chainstate index and SPV side-tables into squashed output
1 change: 1 addition & 0 deletions changelog.d/marf-squash-engine.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights
1 change: 1 addition & 0 deletions changelog.d/marf-squash-foundation.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add squash metadata SQL tables and storage foundation for MARF squashing support
1 change: 1 addition & 0 deletions changelog.d/marf-u64-offset-pointers.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for u64 children pointer offsets in the MARF trie, using a mixed u32/u64 encoding with a 0x20 bit flag for backward compatibility
6 changes: 3 additions & 3 deletions stackslib/src/burnchains/bitcoin/spv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ const BLOCK_DIFFICULTY_INTERVAL: u32 = 14 * 24 * 60 * 60; // two weeks, in secon

pub const SPV_DB_VERSION: &str = "3";

const SPV_INITIAL_SCHEMA: &[&str] = &[
pub(crate) const SPV_INITIAL_SCHEMA: &[&str] = &[
r#"
CREATE TABLE headers(
version INTEGER NOT NULL,
Expand All @@ -75,15 +75,15 @@ const SPV_INITIAL_SCHEMA: &[&str] = &[
// unlike the `headers` table, this table will never be deleted from, since we use it to determine
// whether or not newly-arrived headers represent a better chain than the best-known chain. The
// only way to _replace_ a row is to find a header difficulty interval with a _higher_ work score.
const SPV_SCHEMA_2: &[&str] = &[r#"
pub(crate) const SPV_SCHEMA_2: &[&str] = &[r#"
CREATE TABLE chain_work(
interval INTEGER PRIMARY KEY,
work TEXT NOT NULL -- 32-byte (256-bit) integer
);
"#];

// force the node to go and store the burnchain block header hash as well
const SPV_SCHEMA_3: &[&str] = &[
pub(crate) const SPV_SCHEMA_3: &[&str] = &[
r#"
DROP TABLE headers;
"#,
Expand Down
1 change: 1 addition & 0 deletions stackslib/src/chainstate/stacks/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub mod accounts;
pub mod blocks;
pub mod contracts;
pub mod headers;
pub mod snapshot;
pub mod transactions;
pub mod unconfirmed;

Expand Down
343 changes: 343 additions & 0 deletions stackslib/src/chainstate/stacks/db/snapshot/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
// Copyright (C) 2026 Stacks Open Internet Foundation
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use std::collections::HashSet;
use std::time::Instant;

use rusqlite::{params, Connection};
use stacks_common::util::hash::to_hex;

use crate::chainstate::stacks::index::marf::{MARFOpenOpts, MarfConnection, MARF};
use crate::chainstate::stacks::index::storage::{TrieFileStorage, TrieHashCalculationMode};
use crate::chainstate::stacks::index::{trie_sql, Error, MarfTrieId};

/// A spec for copying a single table from the ATTACHed `src` database.
///
/// The `source_sql` is the exact `SELECT` used to filter source rows.
/// Copy uses plain `INSERT ... SELECT` (no `OR IGNORE`) so that unexpected
/// pre-population in the destination fails loudly.
pub struct TableCopySpec {
pub table: &'static str,
/// The exact SELECT for the source side, e.g.
/// `"SELECT * FROM src.snapshots WHERE sortition_id IN (SELECT sortition_id FROM canonical_sortitions)"`.
pub source_sql: String,
}

/// Clone table and index schemas from the source DB (via `sqlite_master`) into the
/// destination connection. This avoids duplicating any CREATE TABLE / ALTER TABLE /
/// CREATE INDEX statements and is always in sync with whatever migration version the
/// source is at.
///
/// Expects the source DB to be ATTACHed as `src`.
pub fn clone_schemas_from_source(conn: &Connection, tables: &[&str]) -> Result<(), Error> {
let mut stmts: Vec<String> = Vec::new();

for table in tables {
let sql: Option<String> = conn
.query_row(
"SELECT sql FROM src.sqlite_master WHERE type='table' AND name=?1",
params![table],
|row| row.get(0),
)
.ok();

if let Some(create_sql) = sql {
let safe_sql = if create_sql.contains("IF NOT EXISTS") {
create_sql
} else {
create_sql.replacen("CREATE TABLE", "CREATE TABLE IF NOT EXISTS", 1)
};
stmts.push(safe_sql);
}

let mut idx_stmt = conn
.prepare("SELECT sql FROM src.sqlite_master WHERE type='index' AND tbl_name=?1 AND sql IS NOT NULL")
.map_err(Error::SQLError)?;
let idx_rows = idx_stmt
.query_map(params![table], |row| row.get::<_, String>(0))
.map_err(Error::SQLError)?;
for idx_sql in idx_rows {
let idx_sql = idx_sql.map_err(Error::SQLError)?;
let safe_sql = if idx_sql.contains("IF NOT EXISTS") {
idx_sql
} else {
idx_sql.replacen("CREATE INDEX", "CREATE INDEX IF NOT EXISTS", 1)
};
stmts.push(safe_sql);
}
}

for stmt in &stmts {
conn.execute_batch(stmt).map_err(Error::SQLError)?;
}

Ok(())
}

/// Clone schemas only for tables that exist in the source DB.
/// Returns the list of tables that were actually cloned.
pub fn clone_optional_schemas_from_source(
conn: &Connection,
tables: &[&str],
) -> Result<Vec<String>, Error> {
let mut present = Vec::new();
for table in tables {
let exists: bool = conn
.query_row(
"SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name=?1",
params![table],
|row| row.get(0),
)
.map_err(Error::SQLError)?;
if exists {
clone_schemas_from_source(conn, &[table])?;
present.push(table.to_string());
}
}
Ok(present)
}

/// Check if a table exists in the given schema prefix (empty for main, "src" for attached).
pub fn table_exists(conn: &Connection, schema: &str, table: &str) -> bool {
let master = if schema.is_empty() {
"sqlite_master".to_string()
} else {
format!("{schema}.sqlite_master")
};
conn.query_row(
&format!("SELECT COUNT(*) > 0 FROM {master} WHERE type='table' AND name=?1"),
params![table],
|row| row.get(0),
)
.unwrap_or(false)
}

/// Check bidirectional full-row EXCEPT equality.
/// Returns true if the two result sets are identical.
pub fn full_row_except_match(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool {
let extra_in_dst: i64 = conn
.query_row(
&format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"),
[],
|row| row.get(0),
)
.unwrap_or(1);
let extra_in_src: i64 = conn
.query_row(
&format!("SELECT COUNT(*) FROM ({src_sql} EXCEPT {dst_sql})"),
[],
|row| row.get(0),
)
.unwrap_or(1);
extra_in_dst == 0 && extra_in_src == 0
}

/// One-directional subset check: every row in `dst_sql` must exist in
/// `src_sql`, but `src_sql` may contain additional rows. Use this for
/// non-consensus tables that grow after the snapshot (e.g. signer_stats,
/// matured_rewards).
pub fn dst_subset_of_src(conn: &Connection, dst_sql: &str, src_sql: &str) -> bool {
let extra_in_dst: i64 = conn
.query_row(
&format!("SELECT COUNT(*) FROM ({dst_sql} EXCEPT {src_sql})"),
[],
|row| row.get(0),
)
.unwrap_or(1);
extra_in_dst == 0
}

/// Execute a slice of copy specs inside the current transaction.
/// Returns a vec of (table_name, rows_copied).
pub fn execute_copy_specs(
conn: &Connection,
specs: &[TableCopySpec],
) -> Result<Vec<(&'static str, u64)>, Error> {
let mut results = Vec::with_capacity(specs.len());
for spec in specs {
let t = Instant::now();
let sql = format!("INSERT INTO {} {}", spec.table, spec.source_sql);
let rows = conn.execute(&sql, []).map_err(Error::SQLError)? as u64;
info!(
" copy: {} ({} rows) in {:?}",
spec.table,
rows,
t.elapsed()
);
results.push((spec.table, rows));
}
Ok(results)
}

/// Check an optional table's match status.
/// Returns None if absent in both, Some(false) if present in one but not other,
/// Some(true/false) from full-row EXCEPT if present in both.
pub fn check_optional_table_match(
conn: &Connection,
table: &str,
src_filter: Option<&str>,
) -> Option<bool> {
let in_dst = table_exists(conn, "", table);
let in_src = table_exists(conn, "src", table);

match (in_dst, in_src) {
(false, false) => None,
(true, false) | (false, true) => Some(false),
(true, true) => {
let src_sql = match src_filter {
Some(filter) => format!("SELECT * FROM src.{table} {filter}"),
None => format!("SELECT * FROM src.{table}"),
};
Some(full_row_except_match(
conn,
&format!("SELECT * FROM {table}"),
&src_sql,
))
}
}
}

/// Collect the hex-encoded `MARFValue` of every leaf in the squashed trie.
///
/// Opens the MARF at `db_path` read-only, resolves the tip, and walks the
/// trie via `for_each_leaf`. Auto-detects external blobs.
///
/// Returns `(tip_block_hash, leaf_value_hashes)`.
pub fn collect_leaf_value_hashes<T: MarfTrieId>(
db_path: &str,
) -> Result<(T, HashSet<String>), Error> {
let external_blobs = std::path::Path::new(&format!("{db_path}.blobs")).exists();
let open_opts = MARFOpenOpts::new(TrieHashCalculationMode::Deferred, "noop", external_blobs);
let storage = TrieFileStorage::open_readonly(db_path, open_opts)?;
let mut marf = MARF::<T>::from_storage(storage);
let tip = trie_sql::get_latest_confirmed_block_hash::<T>(marf.sqlite_conn())?;

let mut hashes = HashSet::new();
marf.with_conn(|conn| {
MARF::for_each_leaf(conn, &tip, |_hash, value| {
hashes.insert(to_hex(&value.to_vec()));
Ok(())
})
})?;

Ok((tip, hashes))
}

/// Copy only the `__fork_storage` rows that are referenced by leaf nodes
/// in the squashed MARF trie. Non-canonical entries from forks are excluded.
///
/// Opens the squashed MARF read-only and walks the trie via `for_each_leaf`
/// to collect canonical leaf value hashes, then copies only the matching
/// `__fork_storage` rows from the source.
///
/// Falls back to a full copy if `marf_data` is absent (e.g. in test
/// fixtures that don't go through `squash_to_path`).
///
/// Returns the number of rows copied.
pub fn copy_canonical_fork_storage<T: MarfTrieId>(
conn: &Connection,
dst_path: &str,
) -> Result<u64, Error> {
// Check if the source even has __fork_storage (test fixtures may not).
let src_has_table: bool = conn
.query_row(
"SELECT COUNT(*) > 0 FROM src.sqlite_master WHERE type='table' AND name='__fork_storage'",
[],
|row| row.get(0),
)
.unwrap_or(false);

if !src_has_table {
info!(" copy_canonical_fork_storage: source has no __fork_storage, skipping");
return Ok(0);
}

// Ensure the destination table exists (clone schema from source).
clone_schemas_from_source(conn, &["__fork_storage"])?;

// If marf_data doesn't exist, fall back to full copy.
let has_marf_data: bool = conn
.query_row(
"SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='marf_data'",
[],
|row| row.get(0),
)
.unwrap_or(false);

if !has_marf_data {
let rows = conn
.execute(
"INSERT OR REPLACE INTO __fork_storage SELECT * FROM src.__fork_storage",
[],
)
.map_err(Error::SQLError)? as u64;
info!(" copy_canonical_fork_storage: no marf_data table, full copy ({rows} rows)");
return Ok(rows);
}

let t = Instant::now();

let (_tip, leaf_hashes) = collect_leaf_value_hashes::<T>(dst_path)?;
let insert_count = leaf_hashes.len() as u64;

// Build a temp table of canonical leaf value hashes.
conn.execute_batch("CREATE TEMP TABLE __squash_leaf_values (value_hash TEXT PRIMARY KEY)")
.map_err(Error::SQLError)?;

{
let mut stmt = conn
.prepare("INSERT OR IGNORE INTO __squash_leaf_values (value_hash) VALUES (?1)")
.map_err(Error::SQLError)?;
for hash in &leaf_hashes {
stmt.execute(params![hash]).map_err(Error::SQLError)?;
}
}
drop(leaf_hashes);

info!(
" copy_canonical_fork_storage: extracted {insert_count} leaf hashes in {:?}",
t.elapsed()
);

// Copy only the referenced rows.
let t2 = Instant::now();
let rows = conn
.execute(
"INSERT OR REPLACE INTO __fork_storage \
SELECT f.* FROM src.__fork_storage f \
INNER JOIN __squash_leaf_values lv ON f.value_hash = lv.value_hash",
[],
)
.map_err(Error::SQLError)? as u64;

conn.execute_batch("DROP TABLE IF EXISTS __squash_leaf_values")
.map_err(Error::SQLError)?;

info!(
" copy_canonical_fork_storage: copied {rows} rows (from {insert_count} leaves) in {:?}",
t2.elapsed()
);

Ok(rows)
}

pub fn checkpoint_destination_wal(conn: &Connection) -> Result<(), Error> {
let _: (i64, i64, i64) = conn
.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?))
})
.map_err(Error::SQLError)?;
Ok(())
}
Loading