diff --git a/crates/lib/src/api/client/tree.rs b/crates/lib/src/api/client/tree.rs index 3e036cafb..238ef4cbc 100644 --- a/crates/lib/src/api/client/tree.rs +++ b/crates/lib/src/api/client/tree.rs @@ -11,7 +11,6 @@ use tempfile::TempDir; use crate::api::client; use crate::constants::{NODES_DIR, OXEN_HIDDEN_DIR, TREE_DIR}; -use crate::core::db::merkle_node::merkle_node_db::node_db_prefix; use crate::core::progress::push_progress::PushProgress; use crate::core::v_latest::index::CommitMerkleTree; use crate::error::OxenError; @@ -72,7 +71,7 @@ pub async fn create_nodes( .join(NODES_DIR); for (i, node_hash) in nodes.iter().enumerate() { - let dir_prefix = node_db_prefix(node_hash); + let dir_prefix = node_hash.to_hex_hash().node_db_prefix(); let node_dir = node_path.join(&dir_prefix); // log::debug!( // "create_nodes appending objects dir {:?} to tar at path {:?}", diff --git a/crates/lib/src/core/commit_sync_status.rs b/crates/lib/src/core/commit_sync_status.rs index 4c196389b..7a1eb05a1 100644 --- a/crates/lib/src/core/commit_sync_status.rs +++ b/crates/lib/src/core/commit_sync_status.rs @@ -1,5 +1,4 @@ use crate::constants; -use crate::core::db::merkle_node::merkle_node_db::node_db_prefix; use crate::error::OxenError; use crate::model::LocalRepository; use crate::model::MerkleHash; @@ -45,7 +44,7 @@ pub fn mark_commit_as_synced( } fn commit_is_synced_file_path(repo: &LocalRepository, commit_hash: &MerkleHash) -> PathBuf { - let dir_prefix = node_db_prefix(commit_hash); + let dir_prefix = commit_hash.to_hex_hash().node_db_prefix(); repo.path .join(constants::OXEN_HIDDEN_DIR) .join(constants::TREE_DIR) diff --git a/crates/lib/src/core/db/merkle_node/merkle_node_db.rs b/crates/lib/src/core/db/merkle_node/merkle_node_db.rs index 4dd002371..14305e37d 100644 --- a/crates/lib/src/core/db/merkle_node/merkle_node_db.rs +++ b/crates/lib/src/core/db/merkle_node/merkle_node_db.rs @@ -57,6 +57,7 @@ use crate::constants; use crate::error::OxenError; use crate::model::LocalRepository; use crate::model::MerkleHash; +use crate::model::merkle_tree::node_type::InvalidMerkleTreeNodeType; use crate::util; use crate::model::merkle_tree::node::{ @@ -67,16 +68,9 @@ use crate::model::merkle_tree::node::{ const NODE_FILE: &str = "node"; const CHILDREN_FILE: &str = "children"; -pub fn node_db_prefix(hash: &MerkleHash) -> PathBuf { - let hash_str = hash.to_string(); - let dir_prefix_len = 3; - let dir_prefix = hash_str.chars().take(dir_prefix_len).collect::(); - let dir_suffix = hash_str.chars().skip(dir_prefix_len).collect::(); - Path::new(&dir_prefix).join(&dir_suffix) -} - +/// An absolute path to the directory for the Merkle node's `node` and `children` files. pub fn node_db_path(repo: &LocalRepository, hash: &MerkleHash) -> PathBuf { - let dir_prefix = node_db_prefix(hash); + let dir_prefix = hash.to_hex_hash().node_db_prefix(); repo.path .join(constants::OXEN_HIDDEN_DIR) .join(constants::TREE_DIR) @@ -84,6 +78,45 @@ pub fn node_db_path(repo: &LocalRepository, hash: &MerkleHash) -> PathBuf { .join(dir_prefix) } +/// Errors that the Merkle node database can encounter when reading and writing nodes. +#[derive(Debug, thiserror::Error)] +pub enum MerkleDbError { + // Errors encountered in the operation of the custom file format based Merkle tree store. + #[error("Must call open before closing")] + CloseBeforeOpen, + #[error("Cannot write to read-only db")] + ReadOnly, + #[error("Cannot write size after writing data")] + IllegalOperationWriteSizeFirst, + #[error("Must call open before writing")] + WriteBeforeOpen, + #[error("Must call open before reading")] + ReadBeforeOpen, + // wrappers + #[error("Error writing to a node or children file: {0}")] + Io(#[from] std::io::Error), + #[error("Cannot encode a Merkle node: {0}")] + Encode(#[from] rmp_serde::encode::Error), + #[error("Cannot decode a Merkle node: {0}")] + Decode(#[from] rmp_serde::decode::Error), + #[error("{0}")] + TypeMismatch(#[from] InvalidMerkleTreeNodeType), + #[error("Failed to create directory: {0}")] + DirCreate(Box), // TODO: replace with FsError from upcoming refactoring PR + #[error("Failed to open file: {0}")] + Open(Box), // TODO: replace with FsError from upcoming refactoring PR +} + +impl MerkleDbError { + fn dir_create(err: OxenError) -> Self { + Self::DirCreate(Box::new(err)) + } + + fn open(err: OxenError) -> Self { + Self::Open(Box::new(err)) + } +} + pub struct MerkleNodeLookup { pub data_type: u8, pub parent_id: u128, @@ -94,7 +127,7 @@ pub struct MerkleNodeLookup { } impl MerkleNodeLookup { - pub fn load(node_table_file: &mut File) -> Result { + pub fn load(node_table_file: &mut File) -> Result { // log::debug!("MerkleNodeLookup.load() {:?}", node_table_file); // Read the whole node into memory let mut file_data = Vec::new(); @@ -218,7 +251,7 @@ impl MerkleNodeDB { self.data.to_owned() } - pub fn node(&self) -> Result { + pub fn node(&self) -> Result { let node = Self::to_node(self.dtype, &self.data())?; Ok(node) } @@ -249,7 +282,10 @@ impl MerkleNodeDB { db_path.join(NODE_FILE).exists() && db_path.join(CHILDREN_FILE).exists() } - pub fn open_read_only(repo: &LocalRepository, hash: &MerkleHash) -> Result { + pub fn open_read_only( + repo: &LocalRepository, + hash: &MerkleHash, + ) -> Result { let path = node_db_path(repo, hash); Self::open(path, true) } @@ -258,10 +294,10 @@ impl MerkleNodeDB { repo: &LocalRepository, node: &N, parent_id: Option, - ) -> Result { + ) -> Result { let path = node_db_path(repo, &node.hash()); if !path.exists() { - util::fs::create_dir_all(&path)?; + util::fs::create_dir_all(&path).map_err(MerkleDbError::dir_create)?; } log::debug!("open_read_write merkle node db at {}", path.display()); let mut db = Self::open(path, false)?; @@ -269,12 +305,12 @@ impl MerkleNodeDB { Ok(db) } - pub fn open(path: impl AsRef, read_only: bool) -> Result { + pub fn open(path: impl AsRef, read_only: bool) -> Result { let path = path.as_ref(); // mkdir if not exists if !path.exists() { - util::fs::create_dir_all(path)?; + util::fs::create_dir_all(path).map_err(MerkleDbError::dir_create)?; } let node_path = path.join(NODE_FILE); @@ -290,8 +326,8 @@ impl MerkleNodeDB { Option, Option, ) = if read_only { - let mut node_file = util::fs::open_file(node_path)?; - let children_file = util::fs::open_file(children_path)?; + let mut node_file = util::fs::open_file(node_path).map_err(MerkleDbError::open)?; + let children_file = util::fs::open_file(children_path).map_err(MerkleDbError::open)?; // log::debug!("Opened merkle node db read_only at {}", path.display()); ( Some(MerkleNodeLookup::load(&mut node_file)?), @@ -326,19 +362,21 @@ impl MerkleNodeDB { }) } - pub fn close(&mut self) -> Result<(), OxenError> { + /// Closes the open node and children file handles. + /// WARNING: Sets the internal node_file, children_file, and lookup to None. + pub fn close(&mut self) -> Result<(), MerkleDbError> { if let Some(node_file) = &mut self.node_file { node_file.flush()?; node_file.sync_data()?; } else { - return Err(OxenError::basic_str("Must call open before closing")); + return Err(MerkleDbError::CloseBeforeOpen); } if let Some(children_file) = &mut self.children_file { children_file.flush()?; children_file.sync_data()?; } else { - return Err(OxenError::basic_str("Must call open before closing")); + return Err(MerkleDbError::CloseBeforeOpen); } self.node_file = None; @@ -348,21 +386,22 @@ impl MerkleNodeDB { } /// Write the base node info. + /// WARNING: Sets the internal dtype, node_id, parent_id of `self` to the values from `node`. fn write_node( &mut self, node: &N, parent_id: Option, - ) -> Result<(), OxenError> { + ) -> Result<(), MerkleDbError> { if self.read_only { - return Err(OxenError::basic_str("Cannot write to read-only db")); + return Err(MerkleDbError::ReadOnly); } if self.data_offset > 0 { - return Err(OxenError::basic_str("Cannot write size after writing data")); + return Err(MerkleDbError::IllegalOperationWriteSizeFirst); } let Some(node_file) = self.node_file.as_mut() else { - return Err(OxenError::basic_str("Must call open before writing")); + return Err(MerkleDbError::WriteBeforeOpen); }; // log::debug!("write_node node: {}", node); @@ -396,16 +435,16 @@ impl MerkleNodeDB { Ok(()) } - pub fn add_child(&mut self, item: &N) -> Result<(), OxenError> { + pub fn add_child(&mut self, item: &N) -> Result<(), MerkleDbError> { if self.read_only { - return Err(OxenError::basic_str("Cannot write to read-only db")); + return Err(MerkleDbError::ReadOnly); } let Some(node_file) = self.node_file.as_mut() else { - return Err(OxenError::basic_str("Must call open() before writing")); + return Err(MerkleDbError::WriteBeforeOpen); }; let Some(children_file) = self.children_file.as_mut() else { - return Err(OxenError::basic_str("Must call open() before writing")); + return Err(MerkleDbError::WriteBeforeOpen); }; let buf = item.to_msgpack_bytes()?; @@ -436,11 +475,11 @@ impl MerkleNodeDB { D: TMerkleTreeNode + de::DeserializeOwned, { let Some(lookup) = self.lookup.as_ref() else { - return Err(OxenError::basic_str("Must call open before reading")); + return Err(MerkleError::ReadBeforeOpen); }; let Some(mut children_file) = self.children_file.as_ref() else { - return Err(OxenError::basic_str("Must call open before writing")); + return Err(MerkleError::WriteBeforeOpen); }; // Find the offset and length of the data @@ -469,13 +508,13 @@ impl MerkleNodeDB { } */ - pub fn map(&mut self) -> Result, OxenError> { + pub fn map(&mut self) -> Result, MerkleDbError> { // log::debug!("Loading merkle node db map"); let Some(lookup) = self.lookup.as_ref() else { - return Err(OxenError::basic_str("Must call open before reading")); + return Err(MerkleDbError::ReadBeforeOpen); }; let Some(children_file) = self.children_file.as_mut() else { - return Err(OxenError::basic_str("Must call open before writing")); + return Err(MerkleDbError::WriteBeforeOpen); }; // Parse the node parent id diff --git a/crates/lib/src/core/node_sync_status.rs b/crates/lib/src/core/node_sync_status.rs index 796046541..778b5c1ac 100644 --- a/crates/lib/src/core/node_sync_status.rs +++ b/crates/lib/src/core/node_sync_status.rs @@ -1,5 +1,4 @@ use crate::constants; -use crate::core::db::merkle_node::merkle_node_db::node_db_prefix; use crate::error::OxenError; use crate::model::LocalRepository; use crate::model::MerkleHash; @@ -45,7 +44,7 @@ pub fn mark_node_as_synced( } fn node_is_synced_file_path(repo: &LocalRepository, node_hash: &MerkleHash) -> PathBuf { - let dir_prefix = node_db_prefix(node_hash); + let dir_prefix = node_hash.to_hex_hash().node_db_prefix(); repo.path .join(constants::OXEN_HIDDEN_DIR) .join(constants::TREE_DIR) diff --git a/crates/lib/src/error.rs b/crates/lib/src/error.rs index 4a528a9ba..d0449b03d 100644 --- a/crates/lib/src/error.rs +++ b/crates/lib/src/error.rs @@ -13,6 +13,7 @@ use std::path::Path; use std::path::PathBuf; use tokio::task::JoinError; +use crate::core::db::merkle_node::merkle_node_db::MerkleDbError; use crate::model::ParsedResource; use crate::model::RepoNew; use crate::model::Schema; @@ -176,6 +177,9 @@ pub enum OxenError { #[error("{0}")] MerkleTreeError(#[from] InvalidMerkleTreeNodeType), + #[error("{0}")] + MerkleDbError(#[from] MerkleDbError), + // // Schema (dataframes) // diff --git a/crates/lib/src/model/merkle_tree/merkle_hash.rs b/crates/lib/src/model/merkle_tree/merkle_hash.rs index 58430d95b..4c634d324 100644 --- a/crates/lib/src/model/merkle_tree/merkle_hash.rs +++ b/crates/lib/src/model/merkle_tree/merkle_hash.rs @@ -1,6 +1,7 @@ use serde::{Deserialize, Serialize}; use std::fmt; use std::hash::{Hash, Hasher}; +use std::path::{Path, PathBuf}; use std::str::FromStr; use utoipa::ToSchema; @@ -15,14 +16,17 @@ use crate::error::OxenError; pub struct MerkleHash(u128); impl MerkleHash { + #[inline(always)] pub fn new(hash: u128) -> Self { Self(hash) } + #[inline(always)] pub fn to_le_bytes(&self) -> [u8; 16] { self.0.to_le_bytes() } + #[inline(always)] pub fn to_u128(&self) -> u128 { self.0 } @@ -37,8 +41,16 @@ impl MerkleHash { str } } + + /// Encode the hash value as a hexadecimal string and wrap in zero-sized struct. + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + pub(crate) fn to_hex_hash(&self) -> HexHash { + HexHash::new(self) + } } +/// Parses a hexadecimal string into a `MerkleHash`. impl FromStr for MerkleHash { type Err = OxenError; @@ -48,6 +60,7 @@ impl FromStr for MerkleHash { } } +/// Parses a hexadecimal string into a `MerkleHash`. impl TryFrom for MerkleHash { type Error = OxenError; @@ -56,6 +69,7 @@ impl TryFrom for MerkleHash { } } +/// Writes the hash value in hexadecimal format. impl fmt::Display for MerkleHash { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:x}", self.0) @@ -74,6 +88,13 @@ impl Hash for MerkleHash { } } +impl From for MerkleHash { + fn from(value: HexHash) -> Self { + Self::from_str(&value.0) + .expect("Invariant violation: HexHash was not constructed from a valid MerkleHash!") + } +} + // This builds a custom serializer for MerkleHash that serializes to a string. // We use this format in the API responses. // The serializer it creates is compatible with serde's "with" attribute and @@ -85,3 +106,85 @@ serde_with::serde_conv!( |hash: &MerkleHash| hash.to_string(), |s: String| MerkleHash::try_from(s) ); + +/// A hexadecimal representation of a 128-bit [`MerkleHash`] value. +/// +/// Is a zero-sized struct around an owned `String`. Can only be created from a [`MerkleHash`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct HexHash(String); + +impl HexHash { + #[inline(always)] + pub fn new(hash: &MerkleHash) -> Self { + Self(hash.to_string()) + } + + /// Produces a relative path for the 2-level directory structure used to store Merkle nodes. + /// The first directory name is the first 3 characters of the hex-encoded hash. The second + /// is the remaining characters. + pub fn node_db_prefix(&self) -> PathBuf { + let hash_str = &self.0; + const DIR_PREFIX_LEN: usize = 3; + let dir_prefix = &hash_str[0..DIR_PREFIX_LEN]; + let dir_suffix = &hash_str[DIR_PREFIX_LEN..]; + Path::new(dir_prefix).join(dir_suffix) + } +} + +/// Writes the hexadecimal representation of the hash only. +impl std::fmt::Display for HexHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +/// Converts a `MerkleHash` into a `HexHash`. +impl From for HexHash { + fn from(value: MerkleHash) -> Self { + Self::new(&value) + } +} + +/// Convert a reference to a `MerkleHash` into a `HexHash`. +impl<'a> From<&'a MerkleHash> for HexHash { + fn from(value: &'a MerkleHash) -> Self { + Self::new(value) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hex_hash_conversions_and_node_db_prefix() { + for _ in [0..1000] { + let random_value: u128 = rand::random(); + let hash = MerkleHash::new(random_value); + + let hex = hash.to_hex_hash(); + assert_eq!(hex.to_string(), hash.to_string()); + + let convert_back_to_hash: MerkleHash = hex.clone().into(); + assert_eq!(convert_back_to_hash, hash); + + let convert_back_to_hex: HexHash = hash.into(); + assert_eq!(convert_back_to_hex, hex); + + let dir = hex.node_db_prefix(); + let prefix = dir + .parent() + .expect("dir should have a parent") + .to_str() + .expect("should have utf-8 name"); + let suffix = dir + .file_name() + .expect("dir should have a file name") + .to_str() + .expect("should have utf-8 name"); + assert_eq!(suffix.len(), hex.0.len() - 3); + assert_eq!(prefix.len(), 3); + assert_eq!(format!("{prefix}{suffix}"), hex.to_string()); + } + } +} diff --git a/crates/lib/src/model/merkle_tree/node/merkle_tree_node.rs b/crates/lib/src/model/merkle_tree/node/merkle_tree_node.rs index 5a4f62165..36de0ba88 100644 --- a/crates/lib/src/model/merkle_tree/node/merkle_tree_node.rs +++ b/crates/lib/src/model/merkle_tree/node/merkle_tree_node.rs @@ -80,7 +80,7 @@ impl MerkleTreeNode { log::warn!("no child node db: {hash:?}"); return Ok(Vec::new()); }; - node_db.map() + Ok(node_db.map()?) } /// Check if the node is a leaf node (i.e. it has no children) diff --git a/crates/lib/src/repositories/commits/commit_writer.rs b/crates/lib/src/repositories/commits/commit_writer.rs index 158e4b50a..4429349d4 100644 --- a/crates/lib/src/repositories/commits/commit_writer.rs +++ b/crates/lib/src/repositories/commits/commit_writer.rs @@ -893,7 +893,7 @@ fn r_create_dir_node( match &entry.node.node { EMerkleTreeNode::Directory(node) => { // If the dir has updates, we need a new dir db - let dir_path = entry.node.maybe_path().unwrap(); + let dir_path = entry.node.maybe_path()?; // log::debug!("Processing dir node {:?}", dir_path); let dir_node = if entries.contains_key(&dir_path) { let dir_node = diff --git a/crates/lib/src/repositories/tree.rs b/crates/lib/src/repositories/tree.rs index d78ecbf52..4e6334022 100644 --- a/crates/lib/src/repositories/tree.rs +++ b/crates/lib/src/repositories/tree.rs @@ -10,7 +10,7 @@ use tar::Archive; use crate::constants::{NODES_DIR, OXEN_HIDDEN_DIR, TREE_DIR}; use crate::core::commit_sync_status; use crate::core::db::merkle_node::MerkleNodeDB; -use crate::core::db::merkle_node::merkle_node_db::{node_db_path, node_db_prefix}; +use crate::core::db::merkle_node::merkle_node_db::node_db_path; use crate::core::node_sync_status; use crate::core::v_latest::index::CommitMerkleTree as CommitMerkleTreeLatest; use crate::core::v_latest::index::CommitMerkleTree; @@ -896,7 +896,7 @@ pub fn compress_nodes( // This will be the subdir within the tarball // so when we untar it, all the subdirs will be extracted to // tree/nodes/... - let dir_prefix = node_db_prefix(hash); + let dir_prefix = hash.to_hex_hash().node_db_prefix(); let tar_subdir = Path::new(TREE_DIR).join(NODES_DIR).join(dir_prefix); let node_dir = node_db_path(repository, hash); @@ -918,7 +918,7 @@ pub fn compress_node( // This will be the subdir within the tarball // so when we untar it, all the subdirs will be extracted to // tree/nodes/... - let dir_prefix = node_db_prefix(hash); + let dir_prefix = hash.to_hex_hash().node_db_prefix(); let tar_subdir = Path::new(TREE_DIR).join(NODES_DIR).join(dir_prefix); // zip up the node directory @@ -956,7 +956,7 @@ pub fn compress_commits( // This will be the subdir within the tarball // so when we untar it, all the subdirs will be extracted to // tree/nodes/... - let dir_prefix = node_db_prefix(&hash); + let dir_prefix = hash.to_hex_hash().node_db_prefix(); let tar_subdir = Path::new(TREE_DIR).join(NODES_DIR).join(dir_prefix); let node_dir = node_db_path(repository, &hash);