diff --git a/crates/lib/src/model/merkle_tree.rs b/crates/lib/src/model/merkle_tree.rs index 004e5db7b..cddb2efaf 100644 --- a/crates/lib/src/model/merkle_tree.rs +++ b/crates/lib/src/model/merkle_tree.rs @@ -1,9 +1,20 @@ pub mod merkle_hash; +pub mod merkle_reader; +pub mod merkle_writer; pub mod node; pub mod node_type; pub use crate::model::merkle_tree::merkle_hash::MerkleHash; +pub use crate::model::merkle_tree::merkle_reader::MerkleReader; +pub use crate::model::merkle_tree::merkle_writer::MerkleWriter; pub use crate::model::merkle_tree::node::merkle_tree_node_cache; pub use crate::model::merkle_tree::node_type::{ MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode, }; + +/// A complete Merkle tree store supports reading and writing with a shared error type. +pub trait MerkleStore: MerkleReader + MerkleWriter::Error> {} + +/// Any type that implements the Merkle reading and writing traits is automatically an instance +/// of a MerkleStore, provided that the error types in both the reader & writer align. +impl MerkleStore for T where T: MerkleReader + MerkleWriter::Error> {} diff --git a/crates/lib/src/model/merkle_tree/merkle_reader.rs b/crates/lib/src/model/merkle_tree/merkle_reader.rs new file mode 100644 index 000000000..e36413798 --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_reader.rs @@ -0,0 +1,84 @@ +use crate::error::IntoOxenError; +use crate::model::{ + MerkleHash, MerkleTreeNodeType, + merkle_tree::node::{EMerkleTreeNode, MerkleTreeNode}, +}; + +/// Interface for read-only access to Merkle tree nodes. +pub trait MerkleReader: Send + Sync { + /// The error type for the Merkle tree's underlying storage layer. + /// + /// Backends may use whichever error type is natural for their storage + /// (e.g. `MerkleDbError` for the file backend). The `Into` + /// bound lets callers that return `Result<_, OxenError>` use `?` directly. + type Error: std::error::Error + IntoOxenError; + + /// True if there is some node with the given hash. False otherwise. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn exists(&self, hash: &MerkleHash) -> Result; + + /// Retrieve the node record for the given hash, if it exists. None means no such node exists. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_node(&self, hash: &MerkleHash) -> Result, Self::Error>; + + /// Retrieve the children of the node for the given hash, if it exists and if it is a directory node. + /// If the node represents a file, then an empty list is always returned. + /// An error is returned if there is some other failure in the Merkle tree's underlying storage layer. + fn get_children( + &self, + hash: &MerkleHash, + ) -> Result, Self::Error>; +} + +/// Metadata returned when reading a single node. +pub struct MerkleNodeRecord { + hash: MerkleHash, + dtype: MerkleTreeNodeType, + parent_id: Option, + node: EMerkleTreeNode, + num_children: u64, +} + +impl MerkleNodeRecord { + pub fn new( + hash: MerkleHash, + dtype: MerkleTreeNodeType, + parent_id: Option, + node: EMerkleTreeNode, + num_children: u64, + ) -> Self { + Self { + hash, + dtype, + parent_id, + node, + num_children, + } + } + + pub fn hash(&self) -> &MerkleHash { + &self.hash + } + + pub fn dtype(&self) -> &MerkleTreeNodeType { + &self.dtype + } + + pub fn parent_id(&self) -> Option<&MerkleHash> { + self.parent_id.as_ref() + } + + pub fn node(&self) -> &EMerkleTreeNode { + &self.node + } + + pub fn num_children(&self) -> u64 { + self.num_children + } + + /// Consume this record and return its `EMerkleTreeNode`, avoiding a clone + /// for callers that only need the owned node value. + pub fn into_node(self) -> EMerkleTreeNode { + self.node + } +} diff --git a/crates/lib/src/model/merkle_tree/merkle_writer.rs b/crates/lib/src/model/merkle_tree/merkle_writer.rs new file mode 100644 index 000000000..e733084b8 --- /dev/null +++ b/crates/lib/src/model/merkle_tree/merkle_writer.rs @@ -0,0 +1,93 @@ +use crate::error::IntoOxenError; +use crate::model::{MerkleHash, TMerkleTreeNode}; + +/// Interface for writing to a Merkle tree store. +pub trait MerkleWriter: Send + Sync { + /// The error type for the Merkle tree's underlying storage layer. + /// + /// Backends may use whichever error type is natural for their storage + /// (e.g. [`MerkleDbError`] for the [`FileBackend`]). The `Into` + /// bound on the associated type propagates as an implied bound at every + /// use site, so generic callers can convert errors via + /// `?` with no additional `where` clauses. + type Error: std::error::Error + IntoOxenError; + + /// The write session that manages writing multiple nodes to the store. + type Session<'a>: MerkleWriteSession + where + Self: 'a; + + /// Create a new write session to write many changed Merkle tree nodes to the store. + /// + /// To ensure that changes are persisted, callers must call [`MerkleWriteSession::finish`] on the returned session. + /// Correct use of the returned session is to create a [`NodeWriteSession`] for each node to be + /// written, and then call [`NodeWriteSession::finish`] on that session when complete with the node. + fn begin(&self) -> Result, Self::Error>; +} + +/// A write session for writing multiple nodes to the Merkle tree store. +/// +/// A [`MerkleWriteSession`] is used to create multiple [`NodeWriteSession`]s, each of which +/// represents a single node being written to the store. Typical usage is to create a single +/// [`MerkleWriteSession`] when committing repository changes. From this one write session, +/// callers will create multiple [`NodeWriteSession`]s to write the nodes they need to store. +/// Each [`NodeWriteSession`] must have its [`finish`] called to finalize the written node +/// information. Once all nodes have been written, the [`finish`] method of the [`MerkleWriteSession`] +/// must be called to persist the changes to the store. +/// +/// Persistence and eagerness of writes are implementation details. Implementations may choose +/// to buffer writes or write immediately to the store when [`create_node`] and [`add_child`] +/// are called. The invariant is that [`finish`] must be called to **ensure** that writes are +/// persisted. An implementation may choose to e.g. have a transaction mechanism to roll-back +/// changes on `Err`. However, implementations are not required to support this. +pub trait MerkleWriteSession { + /// The error type for the Merkle tree's underlying storage layer. + /// Must be convertible into an [`OxenError`]. + type Error: std::error::Error + IntoOxenError; + + /// The write session that manages writing a single node's information to the store. + type NodeSession<'b>: NodeWriteSession + where + Self: 'b; + + /// Begin the process of writing the node to the Merkle tree store. + /// + /// The returned node write session is used to add children, if the node is a directory + /// or vnode. Callers are responsible for calling `finish` on the returned session + /// to ensure that their writes will be made available to the Merkle tree store. + /// + /// Note that any written nodes are not required to be persisted to the store until + /// _this_ write session's [`finish`] is called. + fn create_node<'b, N: TMerkleTreeNode>( + &'b self, + node: &N, + parent_id: Option, + ) -> Result, Self::Error>; + + /// Ensure that all content from all finished node write sessions have been written to the + /// Merkle tree store. Consumes the session: any active [`NodeWriteSession`]s borrowing from + /// this one must already have been finished (and thus dropped) before this is called — the + /// borrow checker enforces that invariant. + fn finish(self) -> Result<(), Self::Error>; +} + +/// A write session for a single node being constructed. +/// +/// Implementations may buffer the `node` and `children` information in memory or choose to write +/// the data to the store eagerly. However, if [`finish`] is called and returns `Ok`, then the +/// guarantee is that all node and child information must be persisted to the store. +pub trait NodeWriteSession { + /// The error type for the Merkle tree's underlying storage layer. + /// Must be convertible into an [`OxenError`]. + type Error: std::error::Error + IntoOxenError; + + /// The hash of the node being written in this session. + fn node_id(&self) -> &MerkleHash; + + /// Add a child to the current node. + fn add_child(&mut self, child: &N) -> Result<(), Self::Error>; + + /// Ensure the node and its children have been written to the Merkle tree store. Consumes + /// the node session; releases the borrow on the parent [`MerkleWriteSession`]. + fn finish(self) -> Result<(), Self::Error>; +}