diff --git a/crates/lib/src/model/merkle_tree.rs b/crates/lib/src/model/merkle_tree.rs index 004e5db7b..0efab83e0 100644 --- a/crates/lib/src/model/merkle_tree.rs +++ b/crates/lib/src/model/merkle_tree.rs @@ -1,9 +1,27 @@ pub mod merkle_hash; +pub mod merkle_reader; +pub mod merkle_writer; pub mod node; pub mod node_type; pub use crate::model::merkle_tree::merkle_hash::MerkleHash; +pub use crate::model::merkle_tree::merkle_reader::MerkleReader; +pub use crate::model::merkle_tree::merkle_writer::MerkleWriter; pub use crate::model::merkle_tree::node::merkle_tree_node_cache; pub use crate::model::merkle_tree::node_type::{ MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode, }; + +/// A complete Merkle tree store supports reading and writing. +/// +/// Object-safe via the dyn-compatible [`MerkleReader`] and [`MerkleWriter`]. +/// Both sides return [`OxenError`] at the trait surface, so callers can use `?` +/// anywhere they're already returning `Result<_, OxenError>`. +/// +/// [`OxenError`]: crate::error::OxenError +pub trait MerkleStore: MerkleReader + MerkleWriter {} + +/// Any type that implements both the Merkle reading and writing traits is +/// automatically a [`MerkleStore`]. The `?Sized` bound lets the marker apply +/// to `dyn MerkleStore` itself. +impl MerkleStore for T {} diff --git a/crates/lib/src/model/merkle_tree/merkle_reader.rs b/crates/lib/src/model/merkle_tree/merkle_reader.rs new file mode 100644 index 000000000..79499269e --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_reader.rs @@ -0,0 +1,60 @@ +use crate::error::OxenError; +use crate::model::{ + MerkleHash, + merkle_tree::node::{EMerkleTreeNode, MerkleTreeNode}, +}; + +/// Interface for read-only access to Merkle tree nodes representing commits & directories. +/// +/// The `exists`, `get_node`, and `get_children` methods only work on [`MerkleHash`] values +/// that map to commits or directories (and by extension, virtual directory nodes). +/// +/// A file node is _always_ stored in the `children` of some virtual node. To access a Merkle +/// tree node for a file, one needs to map from the repository relative filepath and a commit +/// hash to the right virtual node hash, call [`get_children`] on it, then iterate to the +/// file's corresponding [`MerkleTreeNode`]. +/// +/// Dyn-compatible: callers can store use this as `dyn MerkleReader`. +pub trait MerkleReader: Send + Sync { + /// True if there is some node with the given hash. False otherwise. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn exists(&self, hash: &MerkleHash) -> Result; + + /// Retrieve the node record for the given hash, if it exists. None means no such node exists. + /// Note that a file node's [`MerkleHash`] will result in None: file nodes are stored as _children_. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_node(&self, hash: &MerkleHash) -> Result, OxenError>; + + /// Retrieve the children of the node for the given hash, if it exists and if it is a directory node. + /// If the node represents a file, then an empty list is always returned. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_children( + &self, + hash: &MerkleHash, + ) -> Result, OxenError>; + + /// Load a [`MerkleTreeNode`] with full node info and 1-level (aka direct) children for any non-file node. + /// Note that this method must return `None` if [`get_node`] on the same hash would return `None`. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn read_full_node(&self, hash: &MerkleHash) -> Result, OxenError> { + let Some(node) = self.get_node(hash)? else { + return Ok(None); + }; + let children = self.get_children(hash)?; + Ok(Some(MerkleTreeNode { + hash: *hash, + node: node.node, + parent_id: node.parent_id, + children: children.into_iter().map(|(_, c)| c).collect(), + })) + } +} + +/// Data returned when reading a single node. +/// Always corresponds to either a commit, directory, or virtual directory Merkle tree node. +pub struct MerkleEntry { + /// The node content. + pub node: EMerkleTreeNode, + /// The parent of this node. Commit nodes are the only nodes that do not have parents. + pub parent_id: Option, +} diff --git a/crates/lib/src/model/merkle_tree/merkle_writer.rs b/crates/lib/src/model/merkle_tree/merkle_writer.rs new file mode 100644 index 000000000..e6d715cbe --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_writer.rs @@ -0,0 +1,84 @@ +use crate::error::OxenError; +use crate::model::{MerkleHash, TMerkleTreeNode}; + +/// Interface for writing to a Merkle tree store. +/// +/// Dyn-compatible: callers can store this as `Box` +/// or `&dyn MerkleWriter`. +pub trait MerkleWriter: Send + Sync { + /// Create a new write session to write many changed Merkle tree nodes to the store. + /// + /// The returned session is boxed and tied to `&self`'s lifetime. Callers must call + /// [`MerkleWriteSession::finish`] on it to ensure their writes have been persisted — + /// because `finish` consumes a `Box`, the natural shape `session.finish()?` + /// works directly on the box returned here. + /// + /// Correct use of the returned session is to create a [`NodeWriteSession`] for each + /// node to be written, and then call [`NodeWriteSession::finish`] on that session + /// when complete with the node. + fn begin<'a>(&'a self) -> Result, OxenError>; +} + +/// A write session for writing multiple nodes to the Merkle tree store. +/// +/// A [`MerkleWriteSession`] is used to create multiple [`NodeWriteSession`]s, each of which +/// represents a single node being written to the store. Typical usage is to create a single +/// [`MerkleWriteSession`] when committing repository changes. From this one write session, +/// callers will create multiple [`NodeWriteSession`]s to write the nodes they need to store. +/// Each [`NodeWriteSession`] must have its [`finish`] called to finalize the written node +/// information. Once all nodes have been written, the [`finish`] method of the [`MerkleWriteSession`] +/// must be called to persist the changes to the store. +/// +/// Persistence and eagerness of writes are implementation details. Implementations may choose +/// to buffer writes or write immediately to the store when [`create_node`] and [`add_child`] +/// are called. The invariant is that [`finish`] must be called to **ensure** that writes are +/// persisted. An implementation may choose to e.g. have a transaction mechanism to roll-back +/// changes on `Err`. However, implementations are not required to support this. +/// +/// Object-safe: lives behind `Box`. `finish` takes +/// `self: Box` so the trait is dyn-callable; the natural usage +/// `session.finish()?` on a `Box` value works directly. +pub trait MerkleWriteSession { + /// Begin the process of writing the node to the Merkle tree store. + /// + /// The returned node write session is used to add children, if the node is a directory + /// or vnode. Callers are responsible for calling `finish` on the returned session + /// to ensure that their writes will be made available to the Merkle tree store. + /// + /// Note that any written nodes are not required to be persisted to the store until + /// _this_ write session's [`finish`] is called. + fn create_node<'a>( + &'a self, + node: &dyn TMerkleTreeNode, + parent_id: Option, + ) -> Result, OxenError>; + + /// Ensure that all content from all finished node write sessions have been written to the + /// Merkle tree store. + /// + /// Consumes the boxed session via `self: Box` so the trait is object-safe. + /// Any active [`NodeWriteSession`]s borrowing from this one must already have been + /// finished before this is called — the borrow checker enforces that invariant. + fn finish(self: Box) -> Result<(), OxenError>; +} + +/// A write session for a single node being constructed. +/// +/// Implementations may buffer the `node` and `children` information in memory or choose to write +/// the data to the store eagerly. However, if [`finish`] is called and returns `Ok`, then the +/// guarantee is that all node and child information must be persisted to the store. +/// +/// Object-safe: lives behind `Box`. `finish` takes +/// `self: Box` for the same reason as [`MerkleWriteSession::finish`]. +pub trait NodeWriteSession { + /// The hash of the node being written in this session. + fn node_id(&self) -> &MerkleHash; + + /// Add a child to the current node. + fn add_child(&mut self, child: &dyn TMerkleTreeNode) -> Result<(), OxenError>; + + /// Ensure the node and its children have been written to the Merkle tree store. + /// Consumes the boxed session; releases the borrow on the parent + /// [`MerkleWriteSession`]. + fn finish(self: Box) -> Result<(), OxenError>; +}