Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions crates/lib/src/model/merkle_tree.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,27 @@
pub mod merkle_hash;
pub mod merkle_reader;
pub mod merkle_writer;
pub mod node;
pub mod node_type;

pub use crate::model::merkle_tree::merkle_hash::MerkleHash;
pub use crate::model::merkle_tree::merkle_reader::MerkleReader;
pub use crate::model::merkle_tree::merkle_writer::MerkleWriter;
pub use crate::model::merkle_tree::node::merkle_tree_node_cache;
pub use crate::model::merkle_tree::node_type::{
MerkleTreeNodeIdType, MerkleTreeNodeType, TMerkleTreeNode,
};

/// A complete Merkle tree store supports reading and writing.
///
/// Object-safe via the dyn-compatible [`MerkleReader`] and [`MerkleWriter`].
/// Both sides return [`OxenError`] at the trait surface, so callers can use `?`
/// anywhere they're already returning `Result<_, OxenError>`.
///
/// [`OxenError`]: crate::error::OxenError
pub trait MerkleStore: MerkleReader + MerkleWriter {}

/// Any type that implements both the Merkle reading and writing traits is
/// automatically a [`MerkleStore`]. The `?Sized` bound lets the marker apply
/// to `dyn MerkleStore` itself.
impl<T: MerkleReader + MerkleWriter + ?Sized> MerkleStore for T {}
60 changes: 60 additions & 0 deletions crates/lib/src/model/merkle_tree/merkle_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use crate::error::OxenError;
use crate::model::{
MerkleHash,
merkle_tree::node::{EMerkleTreeNode, MerkleTreeNode},
};

/// Interface for read-only access to Merkle tree nodes representing commits & directories.
///
/// The `exists`, `get_node`, and `get_children` methods only work on [`MerkleHash`] values
/// that map to commits or directories (and by extension, virtual directory nodes).
///
/// A file node is _always_ stored in the `children` of some virtual node. To access a Merkle
/// tree node for a file, one needs to map from the repository relative filepath and a commit
/// hash to the right virtual node hash, call [`get_children`] on it, then iterate to the
/// file's corresponding [`MerkleTreeNode`].
///
/// Dyn-compatible: callers can store use this as `dyn MerkleReader`.
pub trait MerkleReader: Send + Sync {
/// True if there is some node with the given hash. False otherwise.
/// An error is returned if there is some other failure in the Merkle tree's underlying storage layer.
fn exists(&self, hash: &MerkleHash) -> Result<bool, OxenError>;

/// Retrieve the node record for the given hash, if it exists. None means no such node exists.
/// Note that a file node's [`MerkleHash`] will result in None: file nodes are stored as _children_.
/// An error is returned if there is some other failure in the Merkle tree's underlying storage layer.
fn get_node(&self, hash: &MerkleHash) -> Result<Option<MerkleEntry>, OxenError>;

/// Retrieve the children of the node for the given hash, if it exists and if it is a directory node.
/// If the node represents a file, then an empty list is always returned.
/// An error is returned if there is some other failure in the Merkle tree's underlying storage layer.
fn get_children(
Comment thread
malcolmgreaves marked this conversation as resolved.
&self,
hash: &MerkleHash,
) -> Result<Vec<(MerkleHash, MerkleTreeNode)>, OxenError>;
Comment thread
malcolmgreaves marked this conversation as resolved.

/// Load a [`MerkleTreeNode`] with full node info and 1-level (aka direct) children for any non-file node.
/// Note that this method must return `None` if [`get_node`] on the same hash would return `None`.
/// An error is returned if there is some other failure in the Merkle tree's underlying storage layer.
fn read_full_node(&self, hash: &MerkleHash) -> Result<Option<MerkleTreeNode>, OxenError> {
let Some(node) = self.get_node(hash)? else {
return Ok(None);
};
let children = self.get_children(hash)?;
Ok(Some(MerkleTreeNode {
hash: *hash,
node: node.node,
parent_id: node.parent_id,
children: children.into_iter().map(|(_, c)| c).collect(),
}))
}
}

/// Data returned when reading a single node.
/// Always corresponds to either a commit, directory, or virtual directory Merkle tree node.
pub struct MerkleEntry {
/// The node content.
pub node: EMerkleTreeNode,
/// The parent of this node. Commit nodes are the only nodes that do not have parents.
pub parent_id: Option<MerkleHash>,
}
84 changes: 84 additions & 0 deletions crates/lib/src/model/merkle_tree/merkle_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use crate::error::OxenError;
use crate::model::{MerkleHash, TMerkleTreeNode};

/// Interface for writing to a Merkle tree store.
///
/// Dyn-compatible: callers can store this as `Box<dyn MerkleWriter + '_>`
/// or `&dyn MerkleWriter`.
pub trait MerkleWriter: Send + Sync {
/// Create a new write session to write many changed Merkle tree nodes to the store.
///
/// The returned session is boxed and tied to `&self`'s lifetime. Callers must call
/// [`MerkleWriteSession::finish`] on it to ensure their writes have been persisted —
/// because `finish` consumes a `Box<Self>`, the natural shape `session.finish()?`
/// works directly on the box returned here.
///
/// Correct use of the returned session is to create a [`NodeWriteSession`] for each
/// node to be written, and then call [`NodeWriteSession::finish`] on that session
/// when complete with the node.
fn begin<'a>(&'a self) -> Result<Box<dyn MerkleWriteSession + 'a>, OxenError>;
}

/// A write session for writing multiple nodes to the Merkle tree store.
///
/// A [`MerkleWriteSession`] is used to create multiple [`NodeWriteSession`]s, each of which
/// represents a single node being written to the store. Typical usage is to create a single
/// [`MerkleWriteSession`] when committing repository changes. From this one write session,
/// callers will create multiple [`NodeWriteSession`]s to write the nodes they need to store.
/// Each [`NodeWriteSession`] must have its [`finish`] called to finalize the written node
/// information. Once all nodes have been written, the [`finish`] method of the [`MerkleWriteSession`]
/// must be called to persist the changes to the store.
///
/// Persistence and eagerness of writes are implementation details. Implementations may choose
/// to buffer writes or write immediately to the store when [`create_node`] and [`add_child`]
/// are called. The invariant is that [`finish`] must be called to **ensure** that writes are
/// persisted. An implementation may choose to e.g. have a transaction mechanism to roll-back
/// changes on `Err`. However, implementations are not required to support this.
///
/// Object-safe: lives behind `Box<dyn MerkleWriteSession + '_>`. `finish` takes
/// `self: Box<Self>` so the trait is dyn-callable; the natural usage
/// `session.finish()?` on a `Box<dyn ...>` value works directly.
pub trait MerkleWriteSession {
/// Begin the process of writing the node to the Merkle tree store.
///
/// The returned node write session is used to add children, if the node is a directory
/// or vnode. Callers are responsible for calling `finish` on the returned session
/// to ensure that their writes will be made available to the Merkle tree store.
///
/// Note that any written nodes are not required to be persisted to the store until
/// _this_ write session's [`finish`] is called.
fn create_node<'a>(
&'a self,
node: &dyn TMerkleTreeNode,
parent_id: Option<MerkleHash>,
) -> Result<Box<dyn NodeWriteSession + 'a>, OxenError>;

/// Ensure that all content from all finished node write sessions have been written to the
/// Merkle tree store.
///
/// Consumes the boxed session via `self: Box<Self>` so the trait is object-safe.
/// Any active [`NodeWriteSession`]s borrowing from this one must already have been
/// finished before this is called — the borrow checker enforces that invariant.
fn finish(self: Box<Self>) -> Result<(), OxenError>;
}

/// A write session for a single node being constructed.
///
/// Implementations may buffer the `node` and `children` information in memory or choose to write
/// the data to the store eagerly. However, if [`finish`] is called and returns `Ok`, then the
/// guarantee is that all node and child information must be persisted to the store.
///
/// Object-safe: lives behind `Box<dyn NodeWriteSession + '_>`. `finish` takes
/// `self: Box<Self>` for the same reason as [`MerkleWriteSession::finish`].
pub trait NodeWriteSession {
/// The hash of the node being written in this session.
fn node_id(&self) -> &MerkleHash;

/// Add a child to the current node.
fn add_child(&mut self, child: &dyn TMerkleTreeNode) -> Result<(), OxenError>;
Comment thread
malcolmgreaves marked this conversation as resolved.

/// Ensure the node and its children have been written to the Merkle tree store.
/// Consumes the boxed session; releases the borrow on the parent
/// [`MerkleWriteSession`].
fn finish(self: Box<Self>) -> Result<(), OxenError>;
}
Loading