-
Notifications
You must be signed in to change notification settings - Fork 756
feat: marf squash engine #7060
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
feat: marf squash engine #7060
Changes from 24 commits
7f94df2
04f3aad
d142bf5
60e19dc
3ac2225
a4007b4
eb62b95
8eb6275
5db9691
8b8832d
70eafc0
cb393af
d38f8bd
50d601b
56cba88
23ba282
ce22a2a
0d24e87
4e6dcad
c207c61
67a8ffa
6eb0265
88d8a06
06a84b3
f1736df
6e175b8
71ed7da
a642873
e820a0e
a7e5652
20739d6
70a1aa1
db15da7
35f9d01
cd0a433
13186f1
2289f6b
fcb3f3a
820e32e
6324b34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Add MARF squash engine (`squash_to_path`) and squash-aware trie lookups for root hashes and block heights |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ use rusqlite::{Connection, Transaction}; | |
| use stacks_common::types::chainstate::{TrieHash, TRIEHASH_ENCODED_SIZE}; | ||
| use stacks_common::util::hash::Sha512Trunc256Sum; | ||
|
|
||
| pub use super::squash::SquashStats; | ||
| use super::storage::ReopenedTrieStorageConnection; | ||
| use crate::chainstate::stacks::index::bits::{get_leaf_hash, get_node_hash}; | ||
| use crate::chainstate::stacks::index::node::{ | ||
|
|
@@ -35,7 +36,9 @@ use crate::chainstate::stacks::index::storage::{ | |
| TrieStorageTransaction, | ||
| }; | ||
| use crate::chainstate::stacks::index::trie::Trie; | ||
| use crate::chainstate::stacks::index::{Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof}; | ||
| use crate::chainstate::stacks::index::{ | ||
| trie_sql, Error, MARFValue, MarfTrieId, TrieLeaf, TrieMerkleProof, | ||
| }; | ||
| use crate::util_lib::db::Error as db_error; | ||
|
|
||
| pub const BLOCK_HASH_TO_HEIGHT_MAPPING_KEY: &str = "__MARF_BLOCK_HASH_TO_HEIGHT"; | ||
|
|
@@ -208,6 +211,10 @@ pub trait MarfConnection<T: MarfTrieId> { | |
| key: &str, | ||
| ) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> { | ||
| self.with_conn(|conn| { | ||
| // Squash-aware proofs are not currently supported. | ||
| if conn.is_squashed() { | ||
| return Err(Error::UnsupportedOnSquashedMarf("get_with_proof")); | ||
| } | ||
| let marf_value = match MARF::get_by_key(conn, block_hash, key)? { | ||
| None => return Ok(None), | ||
| Some(x) => x, | ||
|
|
@@ -223,6 +230,10 @@ pub trait MarfConnection<T: MarfTrieId> { | |
| hash: &TrieHash, | ||
| ) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> { | ||
| self.with_conn(|conn| { | ||
| // Squash-aware proofs are not currently supported. | ||
| if conn.is_squashed() { | ||
| return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash")); | ||
| } | ||
| let marf_value = match MARF::get_by_path(conn, block_hash, hash)? { | ||
| None => return Ok(None), | ||
| Some(x) => x, | ||
|
|
@@ -1133,6 +1144,20 @@ impl<T: MarfTrieId> MARF<T> { | |
| ) -> Result<Option<TrieLeaf>, Error> { | ||
| trace!("MARF::get_path({block_hash:?}) {path:?}"); | ||
|
|
||
| // In a squashed MARF, blocks below the squash height share the same blob, so reject | ||
| // historical reads from them. | ||
| if let Some(squash_height) = storage.squash_info().map(|info| info.height) { | ||
| if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)? | ||
| { | ||
| if h < squash_height { | ||
| return Err(Error::HistoricalReadInSquashedRange { | ||
| block_height: h, | ||
| squash_height, | ||
| }); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // a NotFoundError _here_ means that a block didn't exist | ||
| storage.open_block(block_hash).inspect_err(|_e| { | ||
| test_debug!("Failed to open block {block_hash:?}: {_e:?}"); | ||
|
|
@@ -1341,6 +1366,16 @@ impl<T: MarfTrieId> MARF<T> { | |
| } | ||
| } | ||
|
|
||
| // In a squashed MARF, OWN_BLOCK_HEIGHT_KEY returns the squash | ||
| // height H for every block in the squashed range. Use the | ||
| // side-table when available. | ||
| if storage.is_squashed() { | ||
| if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)? | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume that in the vast majority of cases, this function is going to becalled with the chain tip or very close to it. If that's true, wouldn't it be better if we tried the MARF first, and only fall back to SQL later?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is a valid point. In normal operations that will be the case. we'll mostly read from the squash during clarity lookups, block replay, and the RPC endpoints in general. Anyway I did the change , it was a bit more involved than I expected, because the sql lookup first removed a couple of edge cases 70a1aa1 |
||
| { | ||
| return Ok(Some(h)); | ||
| } | ||
| } | ||
|
|
||
| let marf_value = if block_hash == current_block_hash { | ||
| MARF::get_by_key(storage, current_block_hash, OWN_BLOCK_HEIGHT_KEY)? | ||
| } else { | ||
|
|
@@ -1390,6 +1425,19 @@ impl<T: MarfTrieId> MARF<T> { | |
| return Ok(Some(current_block_hash.clone())); | ||
| } | ||
|
|
||
| // Squashed MARFs keep historical height -> block mappings in | ||
| // `marf_squashed_blocks`, not in per-height trie state. When the | ||
| // caller is inside the squashed range, answer from the side table | ||
| // and preserve the usual "no future blocks" behavior. | ||
| if let Some(squash_height) = storage.squash_info().map(|info| info.height) { | ||
| if current_block_height <= squash_height { | ||
| if height > current_block_height { | ||
| return Ok(None); | ||
| } | ||
| return trie_sql::read_squash_block_hash::<T>(storage.sqlite_conn(), height); | ||
| } | ||
| } | ||
|
|
||
| let height_key = format!("{}::{}", BLOCK_HEIGHT_TO_HASH_MAPPING_KEY, height); | ||
|
|
||
| MARF::get_by_key(storage, current_block_hash, &height_key) | ||
|
|
@@ -1486,6 +1534,10 @@ impl<T: MarfTrieId> MARF<T> { | |
| key: &str, | ||
| ) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> { | ||
| let mut conn = self.storage.connection(); | ||
| // Squash-aware proofs are not currently supported. | ||
| if conn.is_squashed() { | ||
| return Err(Error::UnsupportedOnSquashedMarf("get_with_proof")); | ||
| } | ||
| let marf_value = match MARF::get_by_key(&mut conn, block_hash, key)? { | ||
| None => return Ok(None), | ||
| Some(x) => x, | ||
|
|
@@ -1500,6 +1552,10 @@ impl<T: MarfTrieId> MARF<T> { | |
| path: &TrieHash, | ||
| ) -> Result<Option<(MARFValue, TrieMerkleProof<T>)>, Error> { | ||
| let mut conn = self.storage.connection(); | ||
| // Squash-aware proofs are not currently supported. | ||
| if conn.is_squashed() { | ||
| return Err(Error::UnsupportedOnSquashedMarf("get_with_proof_from_hash")); | ||
| } | ||
| let marf_value = match MARF::get_by_path(&mut conn, block_hash, path)? { | ||
| None => return Ok(None), | ||
| Some(x) => x, | ||
|
|
@@ -1765,6 +1821,18 @@ impl<T: MarfTrieId> MARF<T> { | |
| where | ||
| F: FnMut(TrieHash, MARFValue) -> Result<(), Error>, | ||
| { | ||
| if let Some(squash_height) = storage.squash_info().map(|info| info.height) { | ||
| if let Some(h) = trie_sql::read_squash_block_height(storage.sqlite_conn(), block_hash)? | ||
| { | ||
| if h < squash_height { | ||
| return Err(Error::HistoricalReadInSquashedRange { | ||
| block_height: h, | ||
| squash_height, | ||
| }); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| let (original_block_hash, original_block_id) = storage.get_cur_block_and_id(); | ||
| let result = Self::for_each_leaf_inner(storage, block_hash, &mut handle_leaf); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not to start bikeshedding, but ... any good idea for a name for this function that makes it more obvious what's happening? "Update" is pretty generic and meaningless, especially considering how consequential the data modification is that it performs.
(I know you didn't actually add this function here, you just moved it)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed in e820a0e