From e667ce0b90f1abb2a109523427a7a816ae468997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 13:31:18 +0200 Subject: [PATCH 01/19] feat: added annotation config --- Cargo.lock | 84 +++++++++++++ Cargo.toml | 2 + src/annotation/config.rs | 249 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 335 insertions(+) create mode 100644 src/annotation/config.rs diff --git a/Cargo.lock b/Cargo.lock index b07f0c9..526f920 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,18 +2,52 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "libc" version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libyaml-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e126dda6f34391ab7b444f9922055facc83c07a910da3eb16f1e4d9c45dc777" + [[package]] name = "once_cell" version = "1.21.4" @@ -25,7 +59,9 @@ name = "openvariant-core" version = "2.0.0" dependencies = [ "pyo3", + "serde", "thiserror", + "yaml_serde", ] [[package]] @@ -110,6 +146,41 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "syn" version = "2.0.117" @@ -152,3 +223,16 @@ name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "yaml_serde" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c7c1b1a6a7c8a6b2741a6c21a4f8918e51899b111cfa08d1288202656e3975" +dependencies = [ + "indexmap", + "itoa", + "libyaml-rs", + "ryu", + "serde", +] diff --git a/Cargo.toml b/Cargo.toml index d503dfd..578a934 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,4 +9,6 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.28.3", features = ["extension-module"] } +serde = { version = "1.0.228", features = ["derive"] } thiserror = "2" +yaml_serde = "0.10.4" diff --git a/src/annotation/config.rs b/src/annotation/config.rs new file mode 100644 index 0000000..4c1b635 --- /dev/null +++ b/src/annotation/config.rs @@ -0,0 +1,249 @@ +//! Core configuration types for OpenVariant annotation. +//! +//! This module provides serde-compatible Rust types that mirror the Python +//! enabling round-trip serialisation through YAML annotation definition files. + +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// Constants +pub const ANNOTATION_EXTENSION: &str = "yaml" +pub const DEFAULT_COLUMNS: &[&str] = [] +pub const DEFAULT_RECURSIVE: bool = false + +/// AnnotatioType + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum AnnotationType { + /// Assign a fixed literal value to every output record. + /// + /// ```yaml + /// - type: static + /// field: GENOME_BUILD + /// value: GRCh38 + /// ``` + Static, + + /// Copy the value from another column already present in the same record. + /// + /// ```yaml + /// - type: internal + /// field: ALT_ALLELE + /// fieldSource: ALT + /// ``` + Internal, + + /// Set the field to the **directory name** of the source file being processed. + /// + /// ```yaml + /// - type: dirname + /// field: STUDY_DIR + /// ``` + Dirname, + + /// Set the field to the **filename** (no path) of the source file being processed. + /// + /// ```yaml + /// - type: filename + /// field: SOURCE_FILE + /// ``` + Filename, + + /// Delegate value computation to an external Python plugin function. + /// + /// ```yaml + /// - type: plugin + /// field: COMPUTED_SCORE + /// plugin: my_package.scoring + /// function: compute_score + /// ``` + Plugin, + + /// Look up the value in an external delimited mapping file. + /// + /// ```yaml + /// - type: mapping + /// field: GENE_NAME + /// fileMapping: /data/gene_map.tsv + /// fieldMapping: ENSEMBL_ID + /// fieldValue: GENE_SYMBOL + /// ``` + Mapping, +} + +impl fmt::Display for AnnotationType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + AnnotationType::Static => "static", + AnnotationType::Internal => "internal", + AnnotationType::Dirname => "dirname", + AnnotationType::Filename => "filename", + AnnotationType::Plugin => "plugin", + AnnotationType::Mapping => "mapping", + }; + write!(f, "{s}") + } +} + +/// AnnotationDelimiter + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum AnnotationDelimiter { + /// Tab character (`\t`). + T, + /// Comma character (`,`). + C, +} + +impl AnnotationDelimiter { + /// Returns the delimiter as a `char`. + pub fn as_char(&self) -> char { + match self { + AnnotationDelimiter::T => '\t', + AnnotationDelimiter::C => ',', + } + } +} + +impl Default for AnnotationDelimiter { + fn default() -> Self { + AnnotationDelimiter::T + } +} + +impl fmt::Display for AnnotationDelimiter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_char()) + } +} + +/// AnnotationFormat + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum AnnotationFormat { + /// Tab-Separated Values — field separator is `\t`. + #[serde(rename = "TSV")] + Tsv, + /// Comma-Separated Values — field separator is `,`. + #[serde(rename = "CSV")] + Csv, +} + +impl AnnotationFormat { + /// Returns the field separator character for this format. + pub fn separator(&self) -> char { + match self { + AnnotationFormat::Tsv => '\t', + AnnotationFormat::Csv => ',', + } + } +} + +impl Default for AnnotationFormat { + fn default() -> Self { + AnnotationFormat::Tsv + } +} + +impl fmt::Display for AnnotationFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + AnnotationFormat::Tsv => "TSV", + AnnotationFormat::Csv => "CSV", + }; + write!(f, "{s}") + } +} + +/// AnnotationEntry + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AnnotationEntry { + /// The annotation strategy to apply (required). + #[serde(rename = "type")] + pub annotation_type: AnnotationType, + + /// Output column name that will receive the derived value (required, non-blank). + pub field: String, + + /// *(static, internal)* Literal value to assign. Accepts any YAML scalar or + /// structure (`string`, `int`, `float`, `bool`, `null`). + #[serde(skip_serializing_if = "Option::is_none")] + pub value: Option, + + /// *(internal, mapping)* Name of the source column to copy from. + #[serde(rename = "fieldSource", skip_serializing_if = "Option::is_none")] + pub field_source: Option, + + /// *(internal, filename, dirname)* Dotted Python module path of the plugin (e.g. `my_pkg.scoring`). + #[serde(skip_serializing_if = "Option::is_none")] + pub plugin: Option, + + /// *(plugin)* Lambda function that will be executed. + #[serde(skip_serializing_if = "Option::is_none")] + pub function: Option, + + /// Optional regular-expression applied to the derived value before storage. + /// The first capture group is used when present. + #[serde(skip_serializing_if = "Option::is_none")] + pub regex: Option, + + /// *(mapping)* Path to the external lookup file. + #[serde(rename = "fileMapping", skip_serializing_if = "Option::is_none")] + pub file_mapping: Option, + + /// *(mapping)* Column in the mapping file used as the lookup key. + #[serde(rename = "fieldMapping", skip_serializing_if = "Option::is_none")] + pub field_mapping: Option, + + /// *(mapping)* Column in the mapping file whose value is returned. + #[serde(rename = "fieldValue", skip_serializing_if = "Option::is_none")] + pub field_value: Option, +} + +/// ExcludeEntry + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ExcludeEntry { + /// The column name to exclude values from. + pub field: String, + /// The value that triggers exclusion (any YAML scalar). + pub value: serde_yaml::Value, +} + + +/// AnnotationConfig + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AnnotationConfig { + /// Glob pattern used to discover source files (e.g. `"**/*.vcf.gz"`). + #[serde(skip_serializing_if = "Option::is_none")] + pub pattern: Option, + + /// Recurse into sub-directories when scanning for source files. + /// Default: `false`. + #[serde(default)] + pub recursive: bool, + + /// Output file format. Default: `TSV`. + #[serde(default)] + pub format: AnnotationFormat, + + /// Field delimiter. Default: `T` (tab). + #[serde(default)] + pub delimiter: AnnotationDelimiter, + + /// Ordered list of columns to include in the output. + /// Empty list means "all columns". + #[serde(default)] + pub columns: Vec, + + /// The annotation rules to apply, in order. + #[serde(default)] + pub annotation: Vec, + + /// Records matching **any** exclude predicate are dropped. + #[serde(default)] + pub exclude: Vec, +} \ No newline at end of file From 66bec7a9625e7eb68748cf5072dc37407429f63f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 15:49:15 +0200 Subject: [PATCH 02/19] feat: added validator --- src/annotation/config.rs | 7 +- src/annotation/mod.rs | 14 +++ src/annotation/validator.rs | 228 ++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 +- 4 files changed, 249 insertions(+), 3 deletions(-) create mode 100644 src/annotation/mod.rs create mode 100644 src/annotation/validator.rs diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 4c1b635..bf3968a 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -1,7 +1,10 @@ //! Core configuration types for OpenVariant annotation. //! -//! This module provides serde-compatible Rust types that mirror the Python -//! enabling round-trip serialisation through YAML annotation definition files. +//! This file defines the core configuration types for OpenVariant annotation, including +//! `AnnotationType`, `AnnotationDelimiter`, `AnnotationFormat`, `AnnotationEntry`, +//! `ExcludeEntry`, and `AnnotationConfig`. These types are designed to be serde-compatible, +//! allowing for easy serialisation and deserialisation to and from YAML annotation definition files. The structures defined here mirror the +//! expected structure of the YAML files, enabling seamless integration with the OpenVariant annotation system. use serde::{Deserialize, Serialize}; use std::fmt; diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs new file mode 100644 index 0000000..6624816 --- /dev/null +++ b/src/annotation/mod.rs @@ -0,0 +1,14 @@ +pub mod config; + +pub use config::{ + AnnotationConfig, + AnnotationDelimiter, + AnnotationEntry, + AnnotationFormat, + AnnotationType, + ExcludeEntry, + Severity, + ValidationError, + parse_and_validate, + validate_config, +}; \ No newline at end of file diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs new file mode 100644 index 0000000..c92cf96 --- /dev/null +++ b/src/annotation/validator.rs @@ -0,0 +1,228 @@ + +//! Annotation validators for OpenVariant annotation. +//! +//! This module provides validation logic for ensuring that annotation configurations are valid and consistent. +//! It includes functions to validate annotation types, delimiters, formats, and overall configuration structures. +//! The validators are designed to be used during the deserialization process of YAML annotation definition files, +//! providing immediate feedback on any issues with the configuration. This helps maintain the integrity of the annotation system and +//! prevents runtime errors due to misconfigurations. + + +use std::fmt; + +#[derive(Debug, Clone, PartialEq)] +pub struct ValidationError { + /// Human-readable problem description. + pub message: String, + /// The YAML path that contains the problem, e.g. `"annotation[2].fieldSource"`. + pub path: String, + /// Severity: `Error` = invalid config; `Warning` = suspicious but not fatal. + pub severity: Severity, +} + +/// Severity of a [`ValidationError`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Severity { + /// The annotation file is invalid and cannot be used. + Error, + /// A field is present but will be silently ignored by the runtime. + Warning, +} + +impl fmt::Display for ValidationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let tag = match self.severity { + Severity::Error => "ERROR", + Severity::Warning => "WARN ", + }; + write!(f, "[{tag}] {}: {}", self.path, self.message) + } +} + +/// Parse YAML text and validate the resulting [`AnnotationConfig`]. +/// +/// Validation runs in three passes: +/// +/// 1. **Syntax** — `serde_yaml` rejects malformed YAML and unknown enum +/// variants, reporting the exact line/column of the problem. +/// 2. **Structural** — missing or blank required fields (`field`, type-specific +/// keys) and an empty `annotation` list are flagged as errors. +/// 3. **Semantic** — keys that exist but are irrelevant for the chosen +/// annotation type are reported as warnings so users can clean up their +/// YAML without being blocked. +/// +/// # Returns +/// +/// - `Ok(config)` — the config is structurally and semantically valid. +/// Warnings are *not* returned; use [`validate_config`] directly to +/// inspect them. +/// - `Err(errors)` — one or more `Severity::Error` diagnostics (warnings +/// included for context). +pub fn parse_and_validate(yaml: &str) -> Result> { + // Pass 1 — syntax + structural (serde) + let config: AnnotationConfig = serde_yaml::from_str(yaml).map_err(|e| { + // serde_yaml errors include line/column information in their Display. + vec![ValidationError { + message: format!("YAML parse error — {e}"), + path: "".into(), + severity: Severity::Error, + }] + })?; + + // Pass 2 + 3 — structural + semantic + let diagnostics = validate_config(&config); + let has_errors = diagnostics.iter().any(|d| d.severity == Severity::Error); + + if has_errors { + Err(diagnostics) + } else { + Ok(config) + } +} + +/// Validate an already-deserialised [`AnnotationConfig`]. +/// +/// Returns *all* diagnostics (errors and warnings) found. +/// An empty `Vec` means the configuration is fully valid, no errors found. +pub fn validate_config(config: &AnnotationConfig) -> Vec { + let mut diags: Vec = Vec::new(); + + // Empty file check — this is technically valid YAML but not a valid annotation config. + if config.annotation.is_empty() { + diags.push(err( + "", + "`annotation` list is empty — at least one entry is required", + )); + // Remaining checks are per-entry; bail early. + return diags; + } + + // Validate Annotation entries + for (i, entry) in config.annotation.iter().enumerate() { + let base = format!("annotation[{i}]"); + + // `field` must always be present and non-blank (serde already + // ensures it exists; we check for an accidental empty string). + if entry.field.trim().is_empty() { + diags.push(err(&base, "`field` must not be blank")); + } + + match &entry.annotation_type { + // Static + AnnotationType::Static => { + // Check that `value` is present (can be any YAML scalar or structure, so we don't require a specific type). + if entry.value.is_none() { + diags.push(err(&base, "`value` is required but missing")); + } + } + + // Internal + AnnotationType::Internal => { + check_required_str(&base, "fieldSource", entry.field_source.as_deref(), &mut diags); + //diags.extend(unused_warnings(&base, entry, &[ + // ("value", entry.value.is_some()), + // ("function", entry.function.is_some()), + //])); + } + + // Dirname / Filenanme + AnnotationType::Dirname | AnnotationType::Filename => { + //diags.extend(unused_warnings(&base, entry, &[ + // ("function", entry.function.is_some()), + // ("regex", entry.regex.is_some()), + //])); + } + + // Plugin + AnnotationType::Plugin => { + check_required_str(&base, "plugin", entry.plugin.as_deref(), &mut diags); + check_required_str(&base, "function", entry.function.as_deref(), &mut diags); + } + + // Mapping + AnnotationType::Mapping => { + check_required_str(&base, "fieldSource", entry.field_source.as_deref(), &mut diags); + check_required_str(&base, "fileMapping", entry.file_mapping.as_deref(), &mut diags); + check_required_str(&base, "fieldMapping", entry.field_mapping.as_deref(), &mut diags); + check_required_str(&base, "fieldValue", entry.field_value.as_deref(), &mut diags); + } + } + } + + // Validate Exclude entries + for (i, excl) in config.exclude.iter().enumerate() { + if excl.field.trim().is_empty() { + diags.push(err( + &format!("exclude[{i}]"), + "`field` must not be blank", + )); + } + } + + // Validate Column name uniqueness + let mut seen_fields: std::collections::HashSet<&str> = std::collections::HashSet::new(); + for entry in &config.annotation { + if !seen_fields.insert(entry.field.as_str()) { + diags.push(ValidationError { + message: format!( + "duplicate `field` name `{}` — each annotation field must be unique", + entry.field + ), + path: ".annotation".into(), + severity: Severity::Error, + }); + } + } + + diags +} + +fn err(path: &str, message: &str) -> ValidationError { + ValidationError { + message: message.into(), + path: path.into(), + severity: Severity::Error, + } +} + +fn warn(path: &str, message: &str) -> ValidationError { + ValidationError { + message: message.into(), + path: path.into(), + severity: Severity::Warning, + } +} + +/// Emit an error if `value` is `None` or blank. +fn check_required_str( + base: &str, + key: &str, + value: Option<&str>, + diags: &mut Vec, +) { + match value { + None => diags.push(err(base, &format!("`{key}` is required but missing"))), + Some(v) if v.trim().is_empty() => diags.push(err(base, &format!("`{key}` must not be blank"))), + _ => {} + } +} + +/// Emit a warning for every key in `fields` whose boolean flag is `true`. +fn unused_warnings( + base: &str, + _entry: &AnnotationEntry, + fields: &[(&str, bool)], +) -> Vec { + fields + .iter() + .filter(|(_, present)| *present) + .map(|(key, _)| { + warn( + &format!("{base}.{key}"), + &format!( + "`{key}` is not used by this annotation type and will be ignored" + ), + ) + }) + .collect() +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index b4bce7b..5c6b7c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ use pyo3::prelude::*; -//mod annotation; +pub mod annotation; + //mod filter; //mod reader; //mod walker; From 90a70b40efd47535ac2259b5e4264498618f70d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 15:56:53 +0200 Subject: [PATCH 03/19] fix: compilation errors --- Cargo.lock | 1 + src/annotation/config.rs | 39 ++++++---------- src/annotation/mod.rs | 14 ++---- src/annotation/validator.rs | 88 +++++++++++++++++++++++-------------- 4 files changed, 74 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 526f920..05465b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -159,6 +159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] diff --git a/src/annotation/config.rs b/src/annotation/config.rs index bf3968a..25ffe46 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize}; use std::fmt; /// Constants -pub const ANNOTATION_EXTENSION: &str = "yaml" -pub const DEFAULT_COLUMNS: &[&str] = [] -pub const DEFAULT_RECURSIVE: bool = false +pub const ANNOTATION_EXTENSION: &str = "yaml"; +pub const DEFAULT_COLUMNS: &[&str] = &[]; +pub const DEFAULT_RECURSIVE: bool = false; /// AnnotatioType @@ -78,12 +78,12 @@ pub enum AnnotationType { impl fmt::Display for AnnotationType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { - AnnotationType::Static => "static", + AnnotationType::Static => "static", AnnotationType::Internal => "internal", - AnnotationType::Dirname => "dirname", + AnnotationType::Dirname => "dirname", AnnotationType::Filename => "filename", - AnnotationType::Plugin => "plugin", - AnnotationType::Mapping => "mapping", + AnnotationType::Plugin => "plugin", + AnnotationType::Mapping => "mapping", }; write!(f, "{s}") } @@ -91,9 +91,10 @@ impl fmt::Display for AnnotationType { /// AnnotationDelimiter -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] pub enum AnnotationDelimiter { /// Tab character (`\t`). + #[default] T, /// Comma character (`,`). C, @@ -109,12 +110,6 @@ impl AnnotationDelimiter { } } -impl Default for AnnotationDelimiter { - fn default() -> Self { - AnnotationDelimiter::T - } -} - impl fmt::Display for AnnotationDelimiter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_char()) @@ -123,10 +118,11 @@ impl fmt::Display for AnnotationDelimiter { /// AnnotationFormat -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] pub enum AnnotationFormat { /// Tab-Separated Values — field separator is `\t`. #[serde(rename = "TSV")] + #[default] Tsv, /// Comma-Separated Values — field separator is `,`. #[serde(rename = "CSV")] @@ -143,12 +139,6 @@ impl AnnotationFormat { } } -impl Default for AnnotationFormat { - fn default() -> Self { - AnnotationFormat::Tsv - } -} - impl fmt::Display for AnnotationFormat { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = match self { @@ -173,7 +163,7 @@ pub struct AnnotationEntry { /// *(static, internal)* Literal value to assign. Accepts any YAML scalar or /// structure (`string`, `int`, `float`, `bool`, `null`). #[serde(skip_serializing_if = "Option::is_none")] - pub value: Option, + pub value: Option, /// *(internal, mapping)* Name of the source column to copy from. #[serde(rename = "fieldSource", skip_serializing_if = "Option::is_none")] @@ -212,10 +202,9 @@ pub struct ExcludeEntry { /// The column name to exclude values from. pub field: String, /// The value that triggers exclusion (any YAML scalar). - pub value: serde_yaml::Value, + pub value: yaml_serde::Value, } - /// AnnotationConfig #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -249,4 +238,4 @@ pub struct AnnotationConfig { /// Records matching **any** exclude predicate are dropped. #[serde(default)] pub exclude: Vec, -} \ No newline at end of file +} diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs index 6624816..2f6c314 100644 --- a/src/annotation/mod.rs +++ b/src/annotation/mod.rs @@ -1,14 +1,8 @@ pub mod config; +pub mod validator; pub use config::{ - AnnotationConfig, - AnnotationDelimiter, - AnnotationEntry, - AnnotationFormat, - AnnotationType, + AnnotationConfig, AnnotationDelimiter, AnnotationEntry, AnnotationFormat, AnnotationType, ExcludeEntry, - Severity, - ValidationError, - parse_and_validate, - validate_config, -}; \ No newline at end of file +}; +pub use validator::{Severity, ValidationError, parse_and_validate, validate_config}; diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index c92cf96..29628da 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -1,4 +1,3 @@ - //! Annotation validators for OpenVariant annotation. //! //! This module provides validation logic for ensuring that annotation configurations are valid and consistent. @@ -7,9 +6,10 @@ //! providing immediate feedback on any issues with the configuration. This helps maintain the integrity of the annotation system and //! prevents runtime errors due to misconfigurations. - use std::fmt; +use super::config::{AnnotationConfig, AnnotationEntry, AnnotationType}; + #[derive(Debug, Clone, PartialEq)] pub struct ValidationError { /// Human-readable problem description. @@ -32,7 +32,7 @@ pub enum Severity { impl fmt::Display for ValidationError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let tag = match self.severity { - Severity::Error => "ERROR", + Severity::Error => "ERROR", Severity::Warning => "WARN ", }; write!(f, "[{tag}] {}: {}", self.path, self.message) @@ -60,11 +60,11 @@ impl fmt::Display for ValidationError { /// included for context). pub fn parse_and_validate(yaml: &str) -> Result> { // Pass 1 — syntax + structural (serde) - let config: AnnotationConfig = serde_yaml::from_str(yaml).map_err(|e| { + let config: AnnotationConfig = yaml_serde::from_str(yaml).map_err(|e| { // serde_yaml errors include line/column information in their Display. vec![ValidationError { - message: format!("YAML parse error — {e}"), - path: "".into(), + message: format!("YAML parse error — {e}"), + path: "".into(), severity: Severity::Error, }] })?; @@ -118,7 +118,12 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { // Internal AnnotationType::Internal => { - check_required_str(&base, "fieldSource", entry.field_source.as_deref(), &mut diags); + check_required_str( + &base, + "fieldSource", + entry.field_source.as_deref(), + &mut diags, + ); //diags.extend(unused_warnings(&base, entry, &[ // ("value", entry.value.is_some()), // ("function", entry.function.is_some()), @@ -135,16 +140,36 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { // Plugin AnnotationType::Plugin => { - check_required_str(&base, "plugin", entry.plugin.as_deref(), &mut diags); + check_required_str(&base, "plugin", entry.plugin.as_deref(), &mut diags); check_required_str(&base, "function", entry.function.as_deref(), &mut diags); } // Mapping AnnotationType::Mapping => { - check_required_str(&base, "fieldSource", entry.field_source.as_deref(), &mut diags); - check_required_str(&base, "fileMapping", entry.file_mapping.as_deref(), &mut diags); - check_required_str(&base, "fieldMapping", entry.field_mapping.as_deref(), &mut diags); - check_required_str(&base, "fieldValue", entry.field_value.as_deref(), &mut diags); + check_required_str( + &base, + "fieldSource", + entry.field_source.as_deref(), + &mut diags, + ); + check_required_str( + &base, + "fileMapping", + entry.file_mapping.as_deref(), + &mut diags, + ); + check_required_str( + &base, + "fieldMapping", + entry.field_mapping.as_deref(), + &mut diags, + ); + check_required_str( + &base, + "fieldValue", + entry.field_value.as_deref(), + &mut diags, + ); } } } @@ -152,10 +177,7 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { // Validate Exclude entries for (i, excl) in config.exclude.iter().enumerate() { if excl.field.trim().is_empty() { - diags.push(err( - &format!("exclude[{i}]"), - "`field` must not be blank", - )); + diags.push(err(&format!("exclude[{i}]"), "`field` must not be blank")); } } @@ -164,11 +186,11 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { for entry in &config.annotation { if !seen_fields.insert(entry.field.as_str()) { diags.push(ValidationError { - message: format!( + message: format!( "duplicate `field` name `{}` — each annotation field must be unique", entry.field ), - path: ".annotation".into(), + path: ".annotation".into(), severity: Severity::Error, }); } @@ -179,37 +201,39 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { fn err(path: &str, message: &str) -> ValidationError { ValidationError { - message: message.into(), - path: path.into(), + message: message.into(), + path: path.into(), severity: Severity::Error, } } fn warn(path: &str, message: &str) -> ValidationError { ValidationError { - message: message.into(), - path: path.into(), + message: message.into(), + path: path.into(), severity: Severity::Warning, } } /// Emit an error if `value` is `None` or blank. fn check_required_str( - base: &str, - key: &str, - value: Option<&str>, - diags: &mut Vec, + base: &str, + key: &str, + value: Option<&str>, + diags: &mut Vec, ) { match value { - None => diags.push(err(base, &format!("`{key}` is required but missing"))), - Some(v) if v.trim().is_empty() => diags.push(err(base, &format!("`{key}` must not be blank"))), + None => diags.push(err(base, &format!("`{key}` is required but missing"))), + Some(v) if v.trim().is_empty() => { + diags.push(err(base, &format!("`{key}` must not be blank"))) + } _ => {} } } /// Emit a warning for every key in `fields` whose boolean flag is `true`. fn unused_warnings( - base: &str, + base: &str, _entry: &AnnotationEntry, fields: &[(&str, bool)], ) -> Vec { @@ -219,10 +243,8 @@ fn unused_warnings( .map(|(key, _)| { warn( &format!("{base}.{key}"), - &format!( - "`{key}` is not used by this annotation type and will be ignored" - ), + &format!("`{key}` is not used by this annotation type and will be ignored"), ) }) .collect() -} \ No newline at end of file +} From 0532e945b8b1acbb4d9f9d6c226aa8e3c01a7c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 16:09:21 +0200 Subject: [PATCH 04/19] feat: added first Rust test --- .github/workflows/openvariant_tester.yml | 10 ++++++--- src/lib.rs | 4 ++++ src/tests/mod.rs | 26 ++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 src/tests/mod.rs diff --git a/.github/workflows/openvariant_tester.yml b/.github/workflows/openvariant_tester.yml index 59b197f..da4bc61 100644 --- a/.github/workflows/openvariant_tester.yml +++ b/.github/workflows/openvariant_tester.yml @@ -40,16 +40,20 @@ jobs: uses: actions-rust-lang/setup-rust-toolchain@v1 - uses: astral-sh/setup-uv@v4 - + - name: Lint with Ruff run: | uv sync --extra linting --frozen --no-install-project uv run ruff check openvariant - - name: Test + - name: Test Python-based code run: | uv sync --extra tests --frozen - uv run pytest --cov . + uv run pytest --cov . + + - name: Test Rust-based code + run: | + make test - name: Upload coverage reports to Codecov with GitHub Action uses: codecov/codecov-action@v3 diff --git a/src/lib.rs b/src/lib.rs index 5c6b7c4..a98de6f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,10 @@ pub mod annotation; //mod py_bindings; pub mod error; + +#[cfg(test)] +mod tests; + #[pyfunction] fn hello() { println!("Hello, OpenVariant!"); diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..ab16344 --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,26 @@ +mod tests { + use crate::annotation::AnnotationType; + use serde::Serialize; + use std::fmt; + + + /// Deserialise from YAML, re-serialise, then deserialise again. + /// Asserts the three values are equal and returns the first parsed value. + fn round_trip(yaml: &str) -> T + where + T: serde::de::DeserializeOwned + Serialize + PartialEq + fmt::Debug, + { + let first: T = yaml_serde::from_str(yaml).expect("initial parse failed"); + let reserialized = yaml_serde::to_string(&first).expect("serialise failed"); + let second: T = yaml_serde::from_str(&reserialized).expect("re-parse failed"); + assert_eq!(first, second, "round-trip mismatch:\noriginal = {first:?}\nre-parsed = {second:?}"); + first + } + + // ── AnnotationType ──────────────────────────────────────────────────────── + + #[test] + fn annotation_type_static_round_trip() { + assert_eq!(round_trip::("static"), AnnotationType::Static); + } +} From 04892abf7507236e731313a18b25b47050a1280c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 16:12:16 +0200 Subject: [PATCH 05/19] =?UTF-8?q?fix:=20commented=20code=C3=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/annotation/validator.rs | 46 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 29628da..0c6acbb 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -207,13 +207,13 @@ fn err(path: &str, message: &str) -> ValidationError { } } -fn warn(path: &str, message: &str) -> ValidationError { - ValidationError { - message: message.into(), - path: path.into(), - severity: Severity::Warning, - } -} +//fn warn(path: &str, message: &str) -> ValidationError { + //ValidationError { + //message: message.into(), + //path: path.into(), + //severity: Severity::Warning, + //} +//} /// Emit an error if `value` is `None` or blank. fn check_required_str( @@ -232,19 +232,19 @@ fn check_required_str( } /// Emit a warning for every key in `fields` whose boolean flag is `true`. -fn unused_warnings( - base: &str, - _entry: &AnnotationEntry, - fields: &[(&str, bool)], -) -> Vec { - fields - .iter() - .filter(|(_, present)| *present) - .map(|(key, _)| { - warn( - &format!("{base}.{key}"), - &format!("`{key}` is not used by this annotation type and will be ignored"), - ) - }) - .collect() -} +//fn unused_warnings( + //base: &str, + //_entry: &AnnotationEntry, + //fields: &[(&str, bool)], +//) -> Vec { + //fields + //.iter() + //.filter(|(_, present)| *present) + //.map(|(key, _)| { + //warn( + //&format!("{base}.{key}"), + //&format!("`{key}` is not used by this annotation type and will be ignored"), + //) + //}) + //.collect() +//} From 0cf2f615385e7b2fb6f12fdb797ac307489b951b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Fri, 12 Jun 2026 16:14:45 +0200 Subject: [PATCH 06/19] fix: comment and import --- src/annotation/validator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 0c6acbb..156069e 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -8,7 +8,7 @@ use std::fmt; -use super::config::{AnnotationConfig, AnnotationEntry, AnnotationType}; +use super::config::{AnnotationConfig, AnnotationType}; #[derive(Debug, Clone, PartialEq)] pub struct ValidationError { @@ -231,7 +231,7 @@ fn check_required_str( } } -/// Emit a warning for every key in `fields` whose boolean flag is `true`. +// Emit a warning for every key in `fields` whose boolean flag is `true`. //fn unused_warnings( //base: &str, //_entry: &AnnotationEntry, From 6b6b87c275844166da486352b5e30dfe4b52ba6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Tue, 16 Jun 2026 12:57:56 +0200 Subject: [PATCH 07/19] feat: added test for annotation --- src/tests/mod.rs | 27 +-- src/tests/test_annotation/mod.rs | 2 + src/tests/test_annotation/test_config.rs | 176 ++++++++++++++++++++ src/tests/test_annotation/test_validator.rs | 129 ++++++++++++++ 4 files changed, 308 insertions(+), 26 deletions(-) create mode 100644 src/tests/test_annotation/mod.rs create mode 100644 src/tests/test_annotation/test_config.rs create mode 100644 src/tests/test_annotation/test_validator.rs diff --git a/src/tests/mod.rs b/src/tests/mod.rs index ab16344..99a7d0c 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,26 +1 @@ -mod tests { - use crate::annotation::AnnotationType; - use serde::Serialize; - use std::fmt; - - - /// Deserialise from YAML, re-serialise, then deserialise again. - /// Asserts the three values are equal and returns the first parsed value. - fn round_trip(yaml: &str) -> T - where - T: serde::de::DeserializeOwned + Serialize + PartialEq + fmt::Debug, - { - let first: T = yaml_serde::from_str(yaml).expect("initial parse failed"); - let reserialized = yaml_serde::to_string(&first).expect("serialise failed"); - let second: T = yaml_serde::from_str(&reserialized).expect("re-parse failed"); - assert_eq!(first, second, "round-trip mismatch:\noriginal = {first:?}\nre-parsed = {second:?}"); - first - } - - // ── AnnotationType ──────────────────────────────────────────────────────── - - #[test] - fn annotation_type_static_round_trip() { - assert_eq!(round_trip::("static"), AnnotationType::Static); - } -} +pub mod test_annotation; \ No newline at end of file diff --git a/src/tests/test_annotation/mod.rs b/src/tests/test_annotation/mod.rs new file mode 100644 index 0000000..050849a --- /dev/null +++ b/src/tests/test_annotation/mod.rs @@ -0,0 +1,2 @@ +pub mod test_config; +pub mod test_validator; \ No newline at end of file diff --git a/src/tests/test_annotation/test_config.rs b/src/tests/test_annotation/test_config.rs new file mode 100644 index 0000000..15e680e --- /dev/null +++ b/src/tests/test_annotation/test_config.rs @@ -0,0 +1,176 @@ + +use crate::annotation::{AnnotationType, AnnotationDelimiter, AnnotationFormat, AnnotationConfig}; +use serde::Serialize; +use std::fmt; + + +/// Deserialise from YAML, re-serialise, then deserialise again. +/// Asserts the three values are equal and returns the first parsed value. +fn round_trip(yaml: &str) -> T +where + T: serde::de::DeserializeOwned + Serialize + PartialEq + fmt::Debug, +{ + let first: T = yaml_serde::from_str(yaml).expect("initial parse failed"); + let reserialized = yaml_serde::to_string(&first).expect("serialise failed"); + let second: T = yaml_serde::from_str(&reserialized).expect("re-parse failed"); + assert_eq!(first, second, "round-trip mismatch:\noriginal = {first:?}\nre-parsed = {second:?}"); + first +} + +///ENUMS + +// AnnotationType, round-trip tests +#[test] +fn annotation_type_static_round_trip() { + assert_eq!(round_trip::("static"), AnnotationType::Static); +} + +#[test] +fn annotation_type_internal_round_trip() { + assert_eq!(round_trip::("internal"), AnnotationType::Internal); +} + +#[test] +fn annotation_type_dirname_round_trip() { + assert_eq!(round_trip::("dirname"), AnnotationType::Dirname); +} + +#[test] +fn annotation_type_filename_round_trip() { + assert_eq!(round_trip::("filename"), AnnotationType::Filename); +} + +#[test] +fn annotation_type_plugin_round_trip() { + assert_eq!(round_trip::("plugin"), AnnotationType::Plugin); +} + +#[test] +fn annotation_type_mapping_round_trip() { + assert_eq!(round_trip::("mapping"), AnnotationType::Mapping); +} + +#[test] +fn annotation_type_unknown_variant_is_error() { + let result = yaml_serde::from_str::("INVALID_TYPE"); + assert!(result.is_err(), "unknown variant must fail to parse"); +} + +#[test] +fn annotation_type_display_matches_yaml_key() { + // Display must produce the exact lowercase YAML key serde expects. + let cases = [ + (AnnotationType::Static, "static"), + (AnnotationType::Internal, "internal"), + (AnnotationType::Dirname, "dirname"), + (AnnotationType::Filename, "filename"), + (AnnotationType::Plugin, "plugin"), + (AnnotationType::Mapping, "mapping"), + ]; + for (variant, expected) in cases { + assert_eq!(variant.to_string(), expected); + } +} + +// AnnotationDelimiter, round-trip tests +#[test] +fn delimiter_t_round_trip() { + let d = round_trip::("T"); + assert_eq!(d, AnnotationDelimiter::T); + assert_eq!(d.as_char(), '\t'); +} + +#[test] +fn delimiter_c_round_trip() { + let d = round_trip::("C"); + assert_eq!(d, AnnotationDelimiter::C); + assert_eq!(d.as_char(), ','); +} + +#[test] +fn delimiter_unknown_variant_is_error() { + assert!(yaml_serde::from_str::("X").is_err()); +} + +// AnnotationFormat, round-trip tests +#[test] +fn format_tsv_round_trip() { + let f = round_trip::("TSV"); + assert_eq!(f, AnnotationFormat::Tsv); + assert_eq!(f.separator(), '\t'); + assert_eq!(f.to_string(), "TSV"); +} + +#[test] +fn format_csv_round_trip() { + let f = round_trip::("CSV"); + assert_eq!(f, AnnotationFormat::Csv); + assert_eq!(f.separator(), ','); + assert_eq!(f.to_string(), "CSV"); +} + +#[test] +fn format_lowercase_is_rejected() { + // YAML keys for format are uppercase; lowercase must be rejected. + assert!(yaml_serde::from_str::("tsv").is_err()); +} + +// Annotation Defaults +#[test] +fn config_defaults_are_applied_when_absent() { + let yaml = "annotation:\n - type: dirname\n field: DIR\n"; + let cfg: AnnotationConfig = yaml_serde::from_str(yaml).unwrap(); + assert_eq!(cfg.format, AnnotationFormat::Tsv); + assert_eq!(cfg.delimiter, AnnotationDelimiter::T); + assert!(!cfg.recursive); + assert!(cfg.columns.is_empty()); + assert!(cfg.exclude.is_empty()); + assert!(cfg.pattern.is_none()); +} + + +// Full config round-trip test +#[test] + fn full_config_round_trip() { + let yaml = r#" +pattern: "**/*.vcf.gz" +recursive: true +format: CSV +delimiter: C +columns: + - CHROM + - POS + - REF + - ALT +annotation: + - type: static + field: GENOME + value: GRCh38 + - type: internal + field: ALLELE + fieldSource: ALT + - type: dirname + field: STUDY_DIR + - type: filename + field: SAMPLE_FILE + - type: plugin + field: SCORE + plugin: scoring.module + function: compute + - type: mapping + field: GENE + fileMapping: /data/genes.tsv + fieldMapping: ENSEMBL + fieldValue: SYMBOL +exclude: + - field: FILTER + value: FAIL +"#; + let cfg = round_trip::(yaml); + assert_eq!(cfg.annotation.len(), 6); + assert_eq!(cfg.format, AnnotationFormat::Csv); + assert_eq!(cfg.delimiter, AnnotationDelimiter::C); + assert_eq!(cfg.columns, vec!["CHROM", "POS", "REF", "ALT"]); + assert!(cfg.recursive); + assert_eq!(cfg.exclude.len(), 1); + } \ No newline at end of file diff --git a/src/tests/test_annotation/test_validator.rs b/src/tests/test_annotation/test_validator.rs new file mode 100644 index 0000000..eedcf01 --- /dev/null +++ b/src/tests/test_annotation/test_validator.rs @@ -0,0 +1,129 @@ +use crate::annotation::validator::{parse_and_validate, Severity}; + +// Test parse_and_validate, non-error cases + +#[test] +fn validate_static_entry_ok() { + let yaml = "annotation:\n - type: static\n field: BUILD\n value: GRCh38\n"; + assert!(parse_and_validate(yaml).is_ok()); +} + +#[test] +fn validate_internal_entry_ok() { + let yaml = "annotation:\n - type: internal\n field: COPY\n fieldSource: REF\n"; + assert!(parse_and_validate(yaml).is_ok()); +} + +#[test] +fn validate_dirname_entry_ok() { + let yaml = "annotation:\n - type: dirname\n field: DIR\n"; + assert!(parse_and_validate(yaml).is_ok()); +} + +#[test] +fn validate_filename_entry_ok() { + let yaml = "annotation:\n - type: filename\n field: FILE\n"; + assert!(parse_and_validate(yaml).is_ok()); +} + +#[test] +fn validate_plugin_entry_ok() { + let yaml = "annotation:\n - type: plugin\n field: S\n plugin: pkg.mod\n function: fn_name\n"; + assert!(parse_and_validate(yaml).is_ok()); +} + +#[test] +fn validate_mapping_entry_ok() { + let yaml = r#" +annotation: + - type: mapping + field: GENE + fieldSource: ENSEMBL_ID + fileMapping: /data/map.tsv + fieldMapping: KEY + fieldValue: VAL +"#; + assert!(parse_and_validate(yaml).is_ok()); +} + + +// Test parse_and_validate, error cases +#[test] +fn validate_empty_annotation_list_is_error() { + let yaml = "annotation: []\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.severity == Severity::Error)); +} + +#[test] +fn validate_static_missing_value_is_error() { + let yaml = "annotation:\n - type: static\n field: BUILD\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.message.contains("`value` is required"))); } + +#[test] +#[allow(non_snake_case)] +fn validate_internal_missing_fieldSource_is_error() { + let yaml = "annotation:\n - type: internal\n field: X\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.message.contains("`fieldSource` is required"))); +} + +#[test] +fn validate_plugin_missing_function_is_error() { + let yaml = "annotation:\n - type: plugin\n field: S\n plugin: mod\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.message.contains("`function` is required"))); +} + +#[test] +fn validate_mapping_missing_keys_are_errors() { + let yaml = "annotation:\n - type: mapping\n field: G\n fileMapping: /f.tsv\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + let messages: Vec<&str> = errs.iter().map(|e| e.message.as_str()).collect(); + assert!(messages.iter().any(|m| m.contains("fieldMapping"))); + assert!(messages.iter().any(|m| m.contains("fieldValue"))); +} + +#[test] +fn validate_blank_field_is_error() { + let yaml = "annotation:\n - type: dirname\n field: ''\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.message.contains("`field` must not be blank"))); +} + +#[test] +fn validate_duplicate_field_names_are_errors() { + let yaml = r#" +annotation: + - type: dirname + field: DIR + - type: filename + field: DIR +"#; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.message.contains("duplicate `field` name"))); +} + +#[test] +fn validate_syntax_error_returned_as_parse_error() { + let yaml = "annotation:\n - type: [\nbad yaml"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs[0].message.contains("YAML parse error")); +} + +#[test] +fn validate_unused_fields_not_errors() { + // `fieldSource` is irrelevant for `static` + // must not block a config that is otherwise valid. + let yaml = r#" +annotation: + - type: static + field: BUILD + value: GRCh38 + fieldSource: SPURIOUS +"#; + // The config has an error because warnings from unused fields come back. + // Let's check severity explicitly. + assert!(parse_and_validate(yaml).is_ok()); +} \ No newline at end of file From 1c380215583cc07ebf10ac5a1b4e9d11ec33614b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Tue, 16 Jun 2026 14:00:38 +0200 Subject: [PATCH 08/19] feat: fix annotation type and add formatting test --- src/annotation/config.rs | 4 +- src/annotation/validator.rs | 37 +++-------- src/tests/test_annotation/test_config.rs | 9 ++- src/tests/test_annotation/test_validator.rs | 71 +++++++++++++++++---- 4 files changed, 77 insertions(+), 44 deletions(-) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 25ffe46..8d13d50 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -210,8 +210,8 @@ pub struct ExcludeEntry { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct AnnotationConfig { /// Glob pattern used to discover source files (e.g. `"**/*.vcf.gz"`). - #[serde(skip_serializing_if = "Option::is_none")] - pub pattern: Option, + #[serde(default)] + pub pattern: Vec, /// Recurse into sub-directories when scanning for source files. /// Default: `false`. diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 156069e..3d5beb5 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -87,6 +87,15 @@ pub fn parse_and_validate(yaml: &str) -> Result Vec { let mut diags: Vec = Vec::new(); + if config.pattern.is_empty() { + diags.push(err( + "", + "`pattern` is empty — at least one entry is required", + )); + // Remaining checks are per-entry; bail early. + return diags; + } + // Empty file check — this is technically valid YAML but not a valid annotation config. if config.annotation.is_empty() { diags.push(err( @@ -207,14 +216,6 @@ fn err(path: &str, message: &str) -> ValidationError { } } -//fn warn(path: &str, message: &str) -> ValidationError { - //ValidationError { - //message: message.into(), - //path: path.into(), - //severity: Severity::Warning, - //} -//} - /// Emit an error if `value` is `None` or blank. fn check_required_str( base: &str, @@ -229,22 +230,4 @@ fn check_required_str( } _ => {} } -} - -// Emit a warning for every key in `fields` whose boolean flag is `true`. -//fn unused_warnings( - //base: &str, - //_entry: &AnnotationEntry, - //fields: &[(&str, bool)], -//) -> Vec { - //fields - //.iter() - //.filter(|(_, present)| *present) - //.map(|(key, _)| { - //warn( - //&format!("{base}.{key}"), - //&format!("`{key}` is not used by this annotation type and will be ignored"), - //) - //}) - //.collect() -//} +} \ No newline at end of file diff --git a/src/tests/test_annotation/test_config.rs b/src/tests/test_annotation/test_config.rs index 15e680e..43041a7 100644 --- a/src/tests/test_annotation/test_config.rs +++ b/src/tests/test_annotation/test_config.rs @@ -118,14 +118,14 @@ fn format_lowercase_is_rejected() { // Annotation Defaults #[test] fn config_defaults_are_applied_when_absent() { - let yaml = "annotation:\n - type: dirname\n field: DIR\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: dirname\n field: DIR\n"; let cfg: AnnotationConfig = yaml_serde::from_str(yaml).unwrap(); assert_eq!(cfg.format, AnnotationFormat::Tsv); assert_eq!(cfg.delimiter, AnnotationDelimiter::T); assert!(!cfg.recursive); assert!(cfg.columns.is_empty()); assert!(cfg.exclude.is_empty()); - assert!(cfg.pattern.is_none()); + assert!(!cfg.pattern.is_empty()); } @@ -133,7 +133,8 @@ fn config_defaults_are_applied_when_absent() { #[test] fn full_config_round_trip() { let yaml = r#" -pattern: "**/*.vcf.gz" +pattern: + - "**/*.vcf.gz" recursive: true format: CSV delimiter: C @@ -167,6 +168,8 @@ exclude: value: FAIL "#; let cfg = round_trip::(yaml); + assert_eq!(cfg.pattern.len(), 1); + assert_eq!(cfg.pattern[0], "**/*.vcf.gz"); assert_eq!(cfg.annotation.len(), 6); assert_eq!(cfg.format, AnnotationFormat::Csv); assert_eq!(cfg.delimiter, AnnotationDelimiter::C); diff --git a/src/tests/test_annotation/test_validator.rs b/src/tests/test_annotation/test_validator.rs index eedcf01..c620a4f 100644 --- a/src/tests/test_annotation/test_validator.rs +++ b/src/tests/test_annotation/test_validator.rs @@ -4,37 +4,39 @@ use crate::annotation::validator::{parse_and_validate, Severity}; #[test] fn validate_static_entry_ok() { - let yaml = "annotation:\n - type: static\n field: BUILD\n value: GRCh38\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: static\n field: BUILD\n value: GRCh38\n"; assert!(parse_and_validate(yaml).is_ok()); } #[test] fn validate_internal_entry_ok() { - let yaml = "annotation:\n - type: internal\n field: COPY\n fieldSource: REF\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: internal\n field: COPY\n fieldSource: REF\n"; assert!(parse_and_validate(yaml).is_ok()); } #[test] fn validate_dirname_entry_ok() { - let yaml = "annotation:\n - type: dirname\n field: DIR\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: dirname\n field: DIR\n"; assert!(parse_and_validate(yaml).is_ok()); } #[test] fn validate_filename_entry_ok() { - let yaml = "annotation:\n - type: filename\n field: FILE\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: filename\n field: FILE\n"; assert!(parse_and_validate(yaml).is_ok()); } #[test] fn validate_plugin_entry_ok() { - let yaml = "annotation:\n - type: plugin\n field: S\n plugin: pkg.mod\n function: fn_name\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: plugin\n field: S\n plugin: pkg.mod\n function: fn_name\n"; assert!(parse_and_validate(yaml).is_ok()); } #[test] fn validate_mapping_entry_ok() { let yaml = r#" +pattern: + - "**/*.vcf.gz" annotation: - type: mapping field: GENE @@ -48,37 +50,51 @@ annotation: // Test parse_and_validate, error cases +#[test] +fn validate_empty_pattern_list_is_error() { + let yaml = "pattern: []\nannotation: []\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.severity == Severity::Error)); +} + +#[test] +fn validate_wrong_type_pattern_list_is_error() { + let yaml = "pattern: \"*.vcf\"\nannotation: []\n"; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs.iter().any(|e| e.severity == Severity::Error)); +} + #[test] fn validate_empty_annotation_list_is_error() { - let yaml = "annotation: []\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation: []\n"; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs.iter().any(|e| e.severity == Severity::Error)); } #[test] fn validate_static_missing_value_is_error() { - let yaml = "annotation:\n - type: static\n field: BUILD\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: static\n field: BUILD\n"; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs.iter().any(|e| e.message.contains("`value` is required"))); } #[test] #[allow(non_snake_case)] fn validate_internal_missing_fieldSource_is_error() { - let yaml = "annotation:\n - type: internal\n field: X\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: internal\n field: X\n"; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs.iter().any(|e| e.message.contains("`fieldSource` is required"))); } #[test] fn validate_plugin_missing_function_is_error() { - let yaml = "annotation:\n - type: plugin\n field: S\n plugin: mod\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: plugin\n field: S\n plugin: mod\n"; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs.iter().any(|e| e.message.contains("`function` is required"))); } #[test] fn validate_mapping_missing_keys_are_errors() { - let yaml = "annotation:\n - type: mapping\n field: G\n fileMapping: /f.tsv\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: mapping\n field: G\n fileMapping: /f.tsv\n"; let errs = parse_and_validate(yaml).unwrap_err(); let messages: Vec<&str> = errs.iter().map(|e| e.message.as_str()).collect(); assert!(messages.iter().any(|m| m.contains("fieldMapping"))); @@ -87,7 +103,7 @@ fn validate_mapping_missing_keys_are_errors() { #[test] fn validate_blank_field_is_error() { - let yaml = "annotation:\n - type: dirname\n field: ''\n"; + let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: dirname\n field: ''\n"; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs.iter().any(|e| e.message.contains("`field` must not be blank"))); } @@ -95,6 +111,8 @@ fn validate_blank_field_is_error() { #[test] fn validate_duplicate_field_names_are_errors() { let yaml = r#" +pattern: + - "**/*.vcf.gz" annotation: - type: dirname field: DIR @@ -115,8 +133,10 @@ fn validate_syntax_error_returned_as_parse_error() { #[test] fn validate_unused_fields_not_errors() { // `fieldSource` is irrelevant for `static` - // must not block a config that is otherwise valid. + // must not blo //]));ck a config that is otherwise valid. let yaml = r#" +pattern: + - "**/*.vcf.gz" annotation: - type: static field: BUILD @@ -126,4 +146,31 @@ annotation: // The config has an error because warnings from unused fields come back. // Let's check severity explicitly. assert!(parse_and_validate(yaml).is_ok()); +} + + +#[test] +fn validate_formatting_errors_returned_as_parse_errors() { + let yaml = r#" + pattern: + - "**/*.vcf.gz" +annotation: + - type: dirname + field: DIR +"#; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs[0].message.contains("YAML parse error")); +} + +#[test] +fn validate_formatting_errors_returned_as_parse_errors_annotation_entry() { + let yaml = r#" +pattern: + - "**/*.vcf.gz" +annotation: + - type: dirname + field: DIR +"#; + let errs = parse_and_validate(yaml).unwrap_err(); + assert!(errs[1].message.contains("YAML parse error")); } \ No newline at end of file From a5d8e408e3f2759b214cfc250ca966204b12df05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Tue, 16 Jun 2026 14:17:21 +0200 Subject: [PATCH 09/19] fix: test for fromatting --- src/annotation/validator.rs | 11 +---------- src/tests/test_annotation/test_validator.rs | 4 ++-- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 3d5beb5..06b978d 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -133,19 +133,10 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { entry.field_source.as_deref(), &mut diags, ); - //diags.extend(unused_warnings(&base, entry, &[ - // ("value", entry.value.is_some()), - // ("function", entry.function.is_some()), - //])); } // Dirname / Filenanme - AnnotationType::Dirname | AnnotationType::Filename => { - //diags.extend(unused_warnings(&base, entry, &[ - // ("function", entry.function.is_some()), - // ("regex", entry.regex.is_some()), - //])); - } + AnnotationType::Dirname | AnnotationType::Filename => { } // Plugin AnnotationType::Plugin => { diff --git a/src/tests/test_annotation/test_validator.rs b/src/tests/test_annotation/test_validator.rs index c620a4f..5b594d9 100644 --- a/src/tests/test_annotation/test_validator.rs +++ b/src/tests/test_annotation/test_validator.rs @@ -168,9 +168,9 @@ fn validate_formatting_errors_returned_as_parse_errors_annotation_entry() { pattern: - "**/*.vcf.gz" annotation: - - type: dirname + - type: dirname field: DIR "#; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs[1].message.contains("YAML parse error")); + assert!(errs[0].message.contains("YAML parse error")); } \ No newline at end of file From aa4919f1d046d8d8480961f5858cc9d04f27c1fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Tue, 16 Jun 2026 14:23:08 +0200 Subject: [PATCH 10/19] fix: makefile for fmt --- Makefile | 14 ++-- src/annotation/validator.rs | 4 +- src/lib.rs | 1 - src/tests/mod.rs | 2 +- src/tests/test_annotation/mod.rs | 2 +- src/tests/test_annotation/test_config.rs | 74 +++++++++++++-------- src/tests/test_annotation/test_validator.rs | 32 ++++++--- 7 files changed, 83 insertions(+), 46 deletions(-) diff --git a/Makefile b/Makefile index 675906c..09dff5e 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,16 @@ release: test: PYO3_PYTHON=$$(uv python find) cargo test +fmt: + PYO3_PYTHON=$$(uv python find) cargo fmt + lint: - PYO3_PYTHON=$$(uv python find) cargo clippy + PYO3_PYTHON=$$(uv python find) cargo clippy -- -D warnings -format: - PYO3_PYTHON=$$(uv python find) cargo fmt --check +fix: + PYO3_PYTHON=$$(uv python find) cargo clippy --fix --allow-dirty --allow-staged + PYO3_PYTHON=$$(uv python find) cargo fmt -check: lint format \ No newline at end of file +check: + PYO3_PYTHON=$$(uv python find) cargo fmt --check + PYO3_PYTHON=$$(uv python find) cargo clippy -- -D warnings \ No newline at end of file diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 06b978d..82a9964 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -136,7 +136,7 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { } // Dirname / Filenanme - AnnotationType::Dirname | AnnotationType::Filename => { } + AnnotationType::Dirname | AnnotationType::Filename => {} // Plugin AnnotationType::Plugin => { @@ -221,4 +221,4 @@ fn check_required_str( } _ => {} } -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index a98de6f..20da8a1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,6 @@ pub mod annotation; //mod py_bindings; pub mod error; - #[cfg(test)] mod tests; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 99a7d0c..21e31c2 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1 +1 @@ -pub mod test_annotation; \ No newline at end of file +pub mod test_annotation; diff --git a/src/tests/test_annotation/mod.rs b/src/tests/test_annotation/mod.rs index 050849a..c73b855 100644 --- a/src/tests/test_annotation/mod.rs +++ b/src/tests/test_annotation/mod.rs @@ -1,2 +1,2 @@ pub mod test_config; -pub mod test_validator; \ No newline at end of file +pub mod test_validator; diff --git a/src/tests/test_annotation/test_config.rs b/src/tests/test_annotation/test_config.rs index 43041a7..0f88e8d 100644 --- a/src/tests/test_annotation/test_config.rs +++ b/src/tests/test_annotation/test_config.rs @@ -1,9 +1,7 @@ - -use crate::annotation::{AnnotationType, AnnotationDelimiter, AnnotationFormat, AnnotationConfig}; +use crate::annotation::{AnnotationConfig, AnnotationDelimiter, AnnotationFormat, AnnotationType}; use serde::Serialize; use std::fmt; - /// Deserialise from YAML, re-serialise, then deserialise again. /// Asserts the three values are equal and returns the first parsed value. fn round_trip(yaml: &str) -> T @@ -13,7 +11,10 @@ where let first: T = yaml_serde::from_str(yaml).expect("initial parse failed"); let reserialized = yaml_serde::to_string(&first).expect("serialise failed"); let second: T = yaml_serde::from_str(&reserialized).expect("re-parse failed"); - assert_eq!(first, second, "round-trip mismatch:\noriginal = {first:?}\nre-parsed = {second:?}"); + assert_eq!( + first, second, + "round-trip mismatch:\noriginal = {first:?}\nre-parsed = {second:?}" + ); first } @@ -22,32 +23,50 @@ where // AnnotationType, round-trip tests #[test] fn annotation_type_static_round_trip() { - assert_eq!(round_trip::("static"), AnnotationType::Static); + assert_eq!( + round_trip::("static"), + AnnotationType::Static + ); } #[test] fn annotation_type_internal_round_trip() { - assert_eq!(round_trip::("internal"), AnnotationType::Internal); + assert_eq!( + round_trip::("internal"), + AnnotationType::Internal + ); } #[test] fn annotation_type_dirname_round_trip() { - assert_eq!(round_trip::("dirname"), AnnotationType::Dirname); + assert_eq!( + round_trip::("dirname"), + AnnotationType::Dirname + ); } #[test] fn annotation_type_filename_round_trip() { - assert_eq!(round_trip::("filename"), AnnotationType::Filename); + assert_eq!( + round_trip::("filename"), + AnnotationType::Filename + ); } #[test] fn annotation_type_plugin_round_trip() { - assert_eq!(round_trip::("plugin"), AnnotationType::Plugin); + assert_eq!( + round_trip::("plugin"), + AnnotationType::Plugin + ); } #[test] fn annotation_type_mapping_round_trip() { - assert_eq!(round_trip::("mapping"), AnnotationType::Mapping); + assert_eq!( + round_trip::("mapping"), + AnnotationType::Mapping + ); } #[test] @@ -60,12 +79,12 @@ fn annotation_type_unknown_variant_is_error() { fn annotation_type_display_matches_yaml_key() { // Display must produce the exact lowercase YAML key serde expects. let cases = [ - (AnnotationType::Static, "static"), + (AnnotationType::Static, "static"), (AnnotationType::Internal, "internal"), - (AnnotationType::Dirname, "dirname"), + (AnnotationType::Dirname, "dirname"), (AnnotationType::Filename, "filename"), - (AnnotationType::Plugin, "plugin"), - (AnnotationType::Mapping, "mapping"), + (AnnotationType::Plugin, "plugin"), + (AnnotationType::Mapping, "mapping"), ]; for (variant, expected) in cases { assert_eq!(variant.to_string(), expected); @@ -120,7 +139,7 @@ fn format_lowercase_is_rejected() { fn config_defaults_are_applied_when_absent() { let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: dirname\n field: DIR\n"; let cfg: AnnotationConfig = yaml_serde::from_str(yaml).unwrap(); - assert_eq!(cfg.format, AnnotationFormat::Tsv); + assert_eq!(cfg.format, AnnotationFormat::Tsv); assert_eq!(cfg.delimiter, AnnotationDelimiter::T); assert!(!cfg.recursive); assert!(cfg.columns.is_empty()); @@ -128,11 +147,10 @@ fn config_defaults_are_applied_when_absent() { assert!(!cfg.pattern.is_empty()); } - // Full config round-trip test #[test] - fn full_config_round_trip() { - let yaml = r#" +fn full_config_round_trip() { + let yaml = r#" pattern: - "**/*.vcf.gz" recursive: true @@ -167,13 +185,13 @@ exclude: - field: FILTER value: FAIL "#; - let cfg = round_trip::(yaml); - assert_eq!(cfg.pattern.len(), 1); - assert_eq!(cfg.pattern[0], "**/*.vcf.gz"); - assert_eq!(cfg.annotation.len(), 6); - assert_eq!(cfg.format, AnnotationFormat::Csv); - assert_eq!(cfg.delimiter, AnnotationDelimiter::C); - assert_eq!(cfg.columns, vec!["CHROM", "POS", "REF", "ALT"]); - assert!(cfg.recursive); - assert_eq!(cfg.exclude.len(), 1); - } \ No newline at end of file + let cfg = round_trip::(yaml); + assert_eq!(cfg.pattern.len(), 1); + assert_eq!(cfg.pattern[0], "**/*.vcf.gz"); + assert_eq!(cfg.annotation.len(), 6); + assert_eq!(cfg.format, AnnotationFormat::Csv); + assert_eq!(cfg.delimiter, AnnotationDelimiter::C); + assert_eq!(cfg.columns, vec!["CHROM", "POS", "REF", "ALT"]); + assert!(cfg.recursive); + assert_eq!(cfg.exclude.len(), 1); +} diff --git a/src/tests/test_annotation/test_validator.rs b/src/tests/test_annotation/test_validator.rs index 5b594d9..996675c 100644 --- a/src/tests/test_annotation/test_validator.rs +++ b/src/tests/test_annotation/test_validator.rs @@ -1,4 +1,4 @@ -use crate::annotation::validator::{parse_and_validate, Severity}; +use crate::annotation::validator::{Severity, parse_and_validate}; // Test parse_and_validate, non-error cases @@ -48,7 +48,6 @@ annotation: assert!(parse_and_validate(yaml).is_ok()); } - // Test parse_and_validate, error cases #[test] fn validate_empty_pattern_list_is_error() { @@ -75,21 +74,31 @@ fn validate_empty_annotation_list_is_error() { fn validate_static_missing_value_is_error() { let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: static\n field: BUILD\n"; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs.iter().any(|e| e.message.contains("`value` is required"))); } + assert!( + errs.iter() + .any(|e| e.message.contains("`value` is required")) + ); +} #[test] #[allow(non_snake_case)] fn validate_internal_missing_fieldSource_is_error() { let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: internal\n field: X\n"; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs.iter().any(|e| e.message.contains("`fieldSource` is required"))); + assert!( + errs.iter() + .any(|e| e.message.contains("`fieldSource` is required")) + ); } #[test] fn validate_plugin_missing_function_is_error() { let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: plugin\n field: S\n plugin: mod\n"; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs.iter().any(|e| e.message.contains("`function` is required"))); + assert!( + errs.iter() + .any(|e| e.message.contains("`function` is required")) + ); } #[test] @@ -105,7 +114,10 @@ fn validate_mapping_missing_keys_are_errors() { fn validate_blank_field_is_error() { let yaml = "pattern:\n - \"**/*.vcf.gz\"\nannotation:\n - type: dirname\n field: ''\n"; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs.iter().any(|e| e.message.contains("`field` must not be blank"))); + assert!( + errs.iter() + .any(|e| e.message.contains("`field` must not be blank")) + ); } #[test] @@ -120,7 +132,10 @@ annotation: field: DIR "#; let errs = parse_and_validate(yaml).unwrap_err(); - assert!(errs.iter().any(|e| e.message.contains("duplicate `field` name"))); + assert!( + errs.iter() + .any(|e| e.message.contains("duplicate `field` name")) + ); } #[test] @@ -148,7 +163,6 @@ annotation: assert!(parse_and_validate(yaml).is_ok()); } - #[test] fn validate_formatting_errors_returned_as_parse_errors() { let yaml = r#" @@ -173,4 +187,4 @@ annotation: "#; let errs = parse_and_validate(yaml).unwrap_err(); assert!(errs[0].message.contains("YAML parse error")); -} \ No newline at end of file +} From 68eefa7f308d4e4a227c7e4f64163f050ea8d657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:35:04 +0200 Subject: [PATCH 11/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/tests/test_annotation/test_validator.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tests/test_annotation/test_validator.rs b/src/tests/test_annotation/test_validator.rs index 996675c..0abc2fb 100644 --- a/src/tests/test_annotation/test_validator.rs +++ b/src/tests/test_annotation/test_validator.rs @@ -147,8 +147,8 @@ fn validate_syntax_error_returned_as_parse_error() { #[test] fn validate_unused_fields_not_errors() { - // `fieldSource` is irrelevant for `static` - // must not blo //]));ck a config that is otherwise valid. + // `fieldSource` is irrelevant for `static` entries, but it should not make an + // otherwise-valid config fail validation. let yaml = r#" pattern: - "**/*.vcf.gz" @@ -158,8 +158,6 @@ annotation: value: GRCh38 fieldSource: SPURIOUS "#; - // The config has an error because warnings from unused fields come back. - // Let's check severity explicitly. assert!(parse_and_validate(yaml).is_ok()); } From abff3e7d6aa8f1ed38ec74e631e89e1ecdb218d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:35:32 +0200 Subject: [PATCH 12/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/validator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 82a9964..d18487a 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -43,7 +43,7 @@ impl fmt::Display for ValidationError { /// /// Validation runs in three passes: /// -/// 1. **Syntax** — `serde_yaml` rejects malformed YAML and unknown enum +/// 1. **Syntax** — `yaml_serde` rejects malformed YAML and unknown enum /// variants, reporting the exact line/column of the problem. /// 2. **Structural** — missing or blank required fields (`field`, type-specific /// keys) and an empty `annotation` list are flagged as errors. From b9bee498c19201265275749416f09fc268fb046f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:36:57 +0200 Subject: [PATCH 13/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/validator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index d18487a..75bff80 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -61,7 +61,7 @@ impl fmt::Display for ValidationError { pub fn parse_and_validate(yaml: &str) -> Result> { // Pass 1 — syntax + structural (serde) let config: AnnotationConfig = yaml_serde::from_str(yaml).map_err(|e| { - // serde_yaml errors include line/column information in their Display. + // yaml_serde errors include line/column information in their Display. vec![ValidationError { message: format!("YAML parse error — {e}"), path: "".into(), From a195273e1b497d917c1b70f86d461e213ac57b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:38:59 +0200 Subject: [PATCH 14/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/validator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 75bff80..55631b9 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -135,7 +135,7 @@ pub fn validate_config(config: &AnnotationConfig) -> Vec { ); } - // Dirname / Filenanme + // Dirname / Filename AnnotationType::Dirname | AnnotationType::Filename => {} // Plugin From 73d0695cf1b5afea2c0d0e1436798d42754b8450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:39:24 +0200 Subject: [PATCH 15/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 8d13d50..2cabf1a 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -14,7 +14,7 @@ pub const ANNOTATION_EXTENSION: &str = "yaml"; pub const DEFAULT_COLUMNS: &[&str] = &[]; pub const DEFAULT_RECURSIVE: bool = false; -/// AnnotatioType +/// AnnotationType #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] From 006f649e5e649981a1e3875050bfdf2ee610c5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:50:15 +0200 Subject: [PATCH 16/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/config.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 2cabf1a..60a593a 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -68,6 +68,7 @@ pub enum AnnotationType { /// ```yaml /// - type: mapping /// field: GENE_NAME + /// fieldSource: ENSEMBL_ID /// fileMapping: /data/gene_map.tsv /// fieldMapping: ENSEMBL_ID /// fieldValue: GENE_SYMBOL From ee1fb3cde16bbb00b80f9d1669eecb5204919a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:50:43 +0200 Subject: [PATCH 17/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/config.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 60a593a..13c9b20 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -113,7 +113,11 @@ impl AnnotationDelimiter { impl fmt::Display for AnnotationDelimiter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_char()) + let s = match self { + AnnotationDelimiter::T => "T", + AnnotationDelimiter::C => "C", + }; + write!(f, "{s}") } } From 20da218a709d19563ecde88220f40cdb5cb558f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= <10314744+dmartmillan@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:59:17 +0200 Subject: [PATCH 18/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/annotation/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 13c9b20..c9c92d1 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -174,7 +174,7 @@ pub struct AnnotationEntry { #[serde(rename = "fieldSource", skip_serializing_if = "Option::is_none")] pub field_source: Option, - /// *(internal, filename, dirname)* Dotted Python module path of the plugin (e.g. `my_pkg.scoring`). + /// *(plugin)* Dotted Python module path of the plugin (e.g. `my_pkg.scoring`). #[serde(skip_serializing_if = "Option::is_none")] pub plugin: Option, From 163083f7a53a2e1d0de7c4de2bea49e8b2b3ac83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mart=C3=ADnez=20Mill=C3=A1n?= Date: Tue, 16 Jun 2026 15:04:33 +0200 Subject: [PATCH 19/19] fix: doc fixes --- src/annotation/config.rs | 2 +- src/annotation/validator.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/annotation/config.rs b/src/annotation/config.rs index 8d13d50..864c654 100644 --- a/src/annotation/config.rs +++ b/src/annotation/config.rs @@ -160,7 +160,7 @@ pub struct AnnotationEntry { /// Output column name that will receive the derived value (required, non-blank). pub field: String, - /// *(static, internal)* Literal value to assign. Accepts any YAML scalar or + /// *(static, optional - internal)* Literal value to assign. Accepts any YAML scalar or /// structure (`string`, `int`, `float`, `bool`, `null`). #[serde(skip_serializing_if = "Option::is_none")] pub value: Option, diff --git a/src/annotation/validator.rs b/src/annotation/validator.rs index 82a9964..3fedea3 100644 --- a/src/annotation/validator.rs +++ b/src/annotation/validator.rs @@ -48,8 +48,7 @@ impl fmt::Display for ValidationError { /// 2. **Structural** — missing or blank required fields (`field`, type-specific /// keys) and an empty `annotation` list are flagged as errors. /// 3. **Semantic** — keys that exist but are irrelevant for the chosen -/// annotation type are reported as warnings so users can clean up their -/// YAML without being blocked. +/// annotation type are not reported, and don't cause and error. /// /// # Returns ///