-
Notifications
You must be signed in to change notification settings - Fork 26
Add merkle tree interfaces for reading & writing #472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,20 @@ | ||
| pub mod merkle_hash; | ||
| pub mod merkle_reader; | ||
| pub mod merkle_writer; | ||
| pub mod node; | ||
| pub mod node_type; | ||
|
|
||
| pub use crate::model::merkle_tree::merkle_hash::MerkleHash; | ||
| pub use crate::model::merkle_tree::merkle_reader::MerkleReader; | ||
| pub use crate::model::merkle_tree::merkle_writer::MerkleWriter; | ||
| pub use crate::model::merkle_tree::node::merkle_tree_node_cache; | ||
| pub use crate::model::merkle_tree::node_type::{ | ||
| MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode, | ||
| }; | ||
|
|
||
| /// A complete Merkle tree store supports reading and writing with a shared error type. | ||
| pub trait MerkleStore: MerkleReader + MerkleWriter<Error = <Self as MerkleReader>::Error> {} | ||
|
|
||
| /// Any type that implements the Merkle reading and writing traits is automatically an instance | ||
| /// of a MerkleStore, provided that the error types in both the reader & writer align. | ||
| impl<T> MerkleStore for T where T: MerkleReader + MerkleWriter<Error = <T as MerkleReader>::Error> {} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| use crate::error::IntoOxenError; | ||
| use crate::model::{ | ||
| MerkleHash, MerkleTreeNodeType, | ||
| merkle_tree::node::{EMerkleTreeNode, MerkleTreeNode}, | ||
| }; | ||
|
|
||
| /// Interface for read-only access to Merkle tree nodes. | ||
| pub trait MerkleReader: Send + Sync { | ||
| /// The error type for the Merkle tree's underlying storage layer. | ||
| /// | ||
| /// Backends may use whichever error type is natural for their storage | ||
| /// (e.g. `MerkleDbError` for the file backend). The `Into<OxenError>` | ||
| /// bound lets callers that return `Result<_, OxenError>` use `?` directly. | ||
| type Error: std::error::Error + IntoOxenError; | ||
|
|
||
| /// True if there is some node with the given hash. False otherwise. | ||
| /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. | ||
| fn exists(&self, hash: &MerkleHash) -> Result<bool, Self::Error>; | ||
|
|
||
| /// Retrieve the node record for the given hash, if it exists. None means no such node exists. | ||
| /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. | ||
| fn get_node(&self, hash: &MerkleHash) -> Result<Option<MerkleNodeRecord>, Self::Error>; | ||
|
|
||
| /// Retrieve the children of the node for the given hash, if it exists and if it is a directory node. | ||
| /// If the node represents a file, then an empty list is always returned. | ||
| /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. | ||
| fn get_children( | ||
| &self, | ||
| hash: &MerkleHash, | ||
| ) -> Result<Vec<(MerkleHash, MerkleTreeNode)>, Self::Error>; | ||
| } | ||
|
|
||
| /// Metadata returned when reading a single node. | ||
| pub struct MerkleNodeRecord { | ||
| hash: MerkleHash, | ||
| dtype: MerkleTreeNodeType, | ||
| parent_id: Option<MerkleHash>, | ||
| node: EMerkleTreeNode, | ||
| num_children: u64, | ||
| } | ||
|
|
||
| impl MerkleNodeRecord { | ||
| pub fn new( | ||
| hash: MerkleHash, | ||
| dtype: MerkleTreeNodeType, | ||
| parent_id: Option<MerkleHash>, | ||
| node: EMerkleTreeNode, | ||
| num_children: u64, | ||
| ) -> Self { | ||
| Self { | ||
| hash, | ||
| dtype, | ||
| parent_id, | ||
| node, | ||
| num_children, | ||
| } | ||
| } | ||
|
|
||
| pub fn hash(&self) -> &MerkleHash { | ||
| &self.hash | ||
| } | ||
|
|
||
| pub fn dtype(&self) -> &MerkleTreeNodeType { | ||
| &self.dtype | ||
| } | ||
|
|
||
| pub fn parent_id(&self) -> Option<&MerkleHash> { | ||
| self.parent_id.as_ref() | ||
| } | ||
|
|
||
| pub fn node(&self) -> &EMerkleTreeNode { | ||
| &self.node | ||
| } | ||
|
|
||
| pub fn num_children(&self) -> u64 { | ||
| self.num_children | ||
| } | ||
|
|
||
| /// Consume this record and return its `EMerkleTreeNode`, avoiding a clone | ||
| /// for callers that only need the owned node value. | ||
| pub fn into_node(self) -> EMerkleTreeNode { | ||
| self.node | ||
| } | ||
| } | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are there lifetimes here? The upcoming LMDB implementation will need to maintain a |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,93 @@ | ||||||||||||||||||||||
| use crate::error::IntoOxenError; | ||||||||||||||||||||||
| use crate::model::{MerkleHash, TMerkleTreeNode}; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Interface for writing to a Merkle tree store. | ||||||||||||||||||||||
| pub trait MerkleWriter: Send + Sync { | ||||||||||||||||||||||
| /// The error type for the Merkle tree's underlying storage layer. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// Backends may use whichever error type is natural for their storage | ||||||||||||||||||||||
| /// (e.g. [`MerkleDbError`] for the [`FileBackend`]). The `Into<OxenError>` | ||||||||||||||||||||||
| /// bound on the associated type propagates as an implied bound at every | ||||||||||||||||||||||
| /// use site, so generic callers can convert errors via | ||||||||||||||||||||||
| /// `?` with no additional `where` clauses. | ||||||||||||||||||||||
| type Error: std::error::Error + IntoOxenError; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// The write session that manages writing multiple nodes to the store. | ||||||||||||||||||||||
| type Session<'a>: MerkleWriteSession<Error = Self::Error> | ||||||||||||||||||||||
| where | ||||||||||||||||||||||
| Self: 'a; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Create a new write session to write many changed Merkle tree nodes to the store. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// To ensure that changes are persisted, callers must call [`MerkleWriteSession::finish`] on the returned session. | ||||||||||||||||||||||
| /// Correct use of the returned session is to create a [`NodeWriteSession`] for each node to be | ||||||||||||||||||||||
| /// written, and then call [`NodeWriteSession::finish`] on that session when complete with the node. | ||||||||||||||||||||||
| fn begin(&self) -> Result<Self::Session<'_>, Self::Error>; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// A write session for writing multiple nodes to the Merkle tree store. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// A [`MerkleWriteSession`] is used to create multiple [`NodeWriteSession`]s, each of which | ||||||||||||||||||||||
| /// represents a single node being written to the store. Typical usage is to create a single | ||||||||||||||||||||||
| /// [`MerkleWriteSession`] when committing repository changes. From this one write session, | ||||||||||||||||||||||
| /// callers will create multiple [`NodeWriteSession`]s to write the nodes they need to store. | ||||||||||||||||||||||
| /// Each [`NodeWriteSession`] must have its [`finish`] called to finalize the written node | ||||||||||||||||||||||
| /// information. Once all nodes have been written, the [`finish`] method of the [`MerkleWriteSession`] | ||||||||||||||||||||||
| /// must be called to persist the changes to the store. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// Persistence and eagerness of writes are implementation details. Implementations may choose | ||||||||||||||||||||||
| /// to buffer writes or write immediately to the store when [`create_node`] and [`add_child`] | ||||||||||||||||||||||
| /// are called. The invariant is that [`finish`] must be called to **ensure** that writes are | ||||||||||||||||||||||
| /// persisted. An implementation may choose to e.g. have a transaction mechanism to roll-back | ||||||||||||||||||||||
| /// changes on `Err`. However, implementations are not required to support this. | ||||||||||||||||||||||
| pub trait MerkleWriteSession { | ||||||||||||||||||||||
| /// The error type for the Merkle tree's underlying storage layer. | ||||||||||||||||||||||
| /// Must be convertible into an [`OxenError`]. | ||||||||||||||||||||||
| type Error: std::error::Error + IntoOxenError; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// The write session that manages writing a single node's information to the store. | ||||||||||||||||||||||
| type NodeSession<'b>: NodeWriteSession<Error = Self::Error> | ||||||||||||||||||||||
| where | ||||||||||||||||||||||
| Self: 'b; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Begin the process of writing the node to the Merkle tree store. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// The returned node write session is used to add children, if the node is a directory | ||||||||||||||||||||||
| /// or vnode. Callers are responsible for calling `finish` on the returned session | ||||||||||||||||||||||
| /// to ensure that their writes will be made available to the Merkle tree store. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// Note that any written nodes are not required to be persisted to the store until | ||||||||||||||||||||||
| /// _this_ write session's [`finish`] is called. | ||||||||||||||||||||||
| fn create_node<'b, N: TMerkleTreeNode>( | ||||||||||||||||||||||
| &'b self, | ||||||||||||||||||||||
| node: &N, | ||||||||||||||||||||||
| parent_id: Option<MerkleHash>, | ||||||||||||||||||||||
| ) -> Result<Self::NodeSession<'b>, Self::Error>; | ||||||||||||||||||||||
|
Comment on lines
+61
to
+65
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: Yes, write operations on RwTxn in the Rust heed crate require mutable access (&mut self). The Database::put method signature is pub fn put<'a, T>(&self, txn: &mut RwTxn<', ', T>, key: &'a KC::EItem, data: &'a DC::EItem) -> Result<>, explicitly taking &mut RwTxn for writes like put, delete, etc. All official examples use &mut wtxn for write operations, such as db.put(&mut wtxn, key, value)?. Yes, borrowing a mutable transaction into nested write-session objects is the idiomatic pattern. RwTxn supports nested transactions via RwTxn::nested, which takes a &mut RwTxn (parent) and returns a new RwTxn<'e, 'p> borrowing the parent's lifetime. Recent versions (e.g., 0.22.1-nested-rtxns) emphasize nested rtxns, and source code shows pub(crate) fn nested<'p: 'e>(env: &'e Env, parent: &'p mut RwTxn) -> Result<RwTxn<'e, 'p, T>>. This enables hierarchical transactions while maintaining LMDB's single-writer rule through borrowing. Citations:
🏁 Script executed: # Find the trait definition and understand the structure
find crates/lib/src/model/merkle_tree -type f -name "*.rs" | head -20Repository: Oxen-AI/Oxen Length of output: 913 🏁 Script executed: # Look at the merkle_writer.rs file to see the trait definition
head -100 crates/lib/src/model/merkle_tree/merkle_writer.rsRepository: Oxen-AI/Oxen Length of output: 4952 🏁 Script executed: # Search for create_node calls to understand how it's used
rg "create_node" crates/lib/src/model/merkle_tree/ -A 3 -B 1Repository: Oxen-AI/Oxen Length of output: 1189 🏁 Script executed: # Check if heed is a dependency
rg "heed" crates/lib/Cargo.tomlRepository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Search for implementations of MerkleWriteSession
rg "impl.*MerkleWriteSession" --type rustRepository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Search for heed usage anywhere in the codebase
rg "heed" --type rustRepository: Oxen-AI/Oxen Length of output: 1623 🏁 Script executed: # Look for NodeWriteSession implementations which might reveal transaction handling
rg "impl.*NodeWriteSession" --type rustRepository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Check Cargo.toml in different locations for dependencies
find . -name "Cargo.toml" -exec grep -l "heed\|lmdb\|rocksdb" {} \;Repository: Oxen-AI/Oxen Length of output: 155 🏁 Script executed: # Search for struct definitions that might implement the traits
rg "struct.*Session" crates/lib/src/model/merkle_tree/ -A 5Repository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Look at the full merkle_tree module structure
ls -la crates/lib/src/model/merkle_tree/Repository: Oxen-AI/Oxen Length of output: 537 🏁 Script executed: # Search for any create_node method calls in the codebase
rg "\.create_node\(" --type rust -B 2 -A 2Repository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Check if there are any imports or uses of MerkleWriteSession
rg "MerkleWriteSession\|MerkleWriter" --type rustRepository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Search across all crates
rg "impl MerkleWriteSession\|impl MerkleWriter" --type rustRepository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Search for Merkle in all files to understand usage
rg "Merkle" crates/lib/src/ -l | head -20Repository: Oxen-AI/Oxen Length of output: 795 🏁 Script executed: # Check if there's any git history or comments about this
rg "create_node\|NodeWriteSession\|MerkleWriter" crates/lib/src/ --type rust -B 1 -A 1Repository: Oxen-AI/Oxen Length of output: 38 🏁 Script executed: # Look at the full merkle_writer.rs file to see all trait definitions
cat crates/lib/src/model/merkle_tree/merkle_writer.rsRepository: Oxen-AI/Oxen Length of output: 4952 Change The current signature with Proposed API adjustment- fn create_node<'b, N: TMerkleTreeNode>(
- &'b self,
+ fn create_node<'b, N: TMerkleTreeNode>(
+ &'b mut self,
node: &N,
parent_id: Option<MerkleHash>,
) -> Result<Self::NodeSession<'b>, Self::Error>;📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Ensure that all content from all finished node write sessions have been written to the | ||||||||||||||||||||||
| /// Merkle tree store. Consumes the session: any active [`NodeWriteSession`]s borrowing from | ||||||||||||||||||||||
| /// this one must already have been finished (and thus dropped) before this is called — the | ||||||||||||||||||||||
| /// borrow checker enforces that invariant. | ||||||||||||||||||||||
| fn finish(self) -> Result<(), Self::Error>; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// A write session for a single node being constructed. | ||||||||||||||||||||||
| /// | ||||||||||||||||||||||
| /// Implementations may buffer the `node` and `children` information in memory or choose to write | ||||||||||||||||||||||
| /// the data to the store eagerly. However, if [`finish`] is called and returns `Ok`, then the | ||||||||||||||||||||||
| /// guarantee is that all node and child information must be persisted to the store. | ||||||||||||||||||||||
| pub trait NodeWriteSession { | ||||||||||||||||||||||
| /// The error type for the Merkle tree's underlying storage layer. | ||||||||||||||||||||||
| /// Must be convertible into an [`OxenError`]. | ||||||||||||||||||||||
| type Error: std::error::Error + IntoOxenError; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// The hash of the node being written in this session. | ||||||||||||||||||||||
| fn node_id(&self) -> &MerkleHash; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Add a child to the current node. | ||||||||||||||||||||||
| fn add_child<N: TMerkleTreeNode>(&mut self, child: &N) -> Result<(), Self::Error>; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /// Ensure the node and its children have been written to the Merkle tree store. Consumes | ||||||||||||||||||||||
| /// the node session; releases the borrow on the parent [`MerkleWriteSession`]. | ||||||||||||||||||||||
| fn finish(self) -> Result<(), Self::Error>; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.