nf-core · camlloyd · Apr 15, 2026 · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/modules/nf-core/bio2zarr/vcfpartition/environment.yml b/modules/nf-core/bio2zarr/vcfpartition/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::bio2zarr=0.1.8"
diff --git a/modules/nf-core/bio2zarr/vcfpartition/main.nf b/modules/nf-core/bio2zarr/vcfpartition/main.nf
@@ -0,0 +1,35 @@
+process BIO2ZARR_VCFPARTITION {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/77/7713d869c8c8259c10701c95fc105bad8bcfbd6735de6941a47c9e6e26e9bb2f/data':
+        'community.wave.seqera.io/library/bio2zarr:0.1.8--c2c92dd3f64fb0f9' }"
+
+    input:
+    tuple val(meta), path(vcf), path(index)
+
+    output:
+    tuple val(meta), path("*.tsv"), emit: partitions
+    tuple val("${task.process}"), val('vcfpartition'), eval("vcfpartition --version | sed 's/.* //'"), topic: versions, emit: versions_vcfpartition
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    vcfpartition \\
+        ${args} \\
+        ${vcf} \\
+        > ${prefix}.tsv
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.tsv
+    """
+}
diff --git a/modules/nf-core/bio2zarr/vcfpartition/meta.yml b/modules/nf-core/bio2zarr/vcfpartition/meta.yml
@@ -0,0 +1,78 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "bio2zarr_vcfpartition"
+description: Outputs a set of region strings that partition indexed VCF/BCF files
+  for parallel processing.
+keywords:
+  - vcf
+  - bcf
+  - partition
+  - regions
+  - parallel
+  - genomics
+tools:
+  - "bio2zarr":
+      description: "Convert bioinformatics data to Zarr"
+      homepage: "https://sgkit-dev.github.io/bio2zarr/"
+      documentation: "https://sgkit-dev.github.io/bio2zarr"
+      tool_dev_url: "https://github.com/sgkit-dev/bio2zarr"
+      doi: "10.1101/2024.06.11.598241"
+      licence:
+        - "Apache-2.0"
+      identifier: biotools:bio2zarr
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - vcf:
+        type: file
+        description: Indexed VCF/BCF file to partition
+        pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016" # VCF
+          - edam: "http://edamontology.org/format_3020" # BCF
+    - index:
+        type: file
+        description: Index for the VCF/BCF file
+        pattern: "*.{tbi,csi}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3700" # Tabix index file format
+output:
+  partitions:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.tsv":
+          type: file
+          description: The output tab-delimited region strings and the file path
+          pattern: "*.{tsv}"
+          ontologies:
+            - edam: "http://edamontology.org/format_3475" # TSV
+  versions_vcfpartition:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - vcfpartition:
+          type: string
+          description: The name of the tool
+      - vcfpartition --version | sed 's/.* //':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - vcfpartition:
+          type: string
+          description: The name of the tool
+      - vcfpartition --version | sed 's/.* //':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@camlloyd"
+maintainers:
+  - "@camlloyd"
diff --git a/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test b/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test
@@ -0,0 +1,139 @@
+nextflow_process {
+
+    name "Test Process BIO2ZARR_VCFPARTITION"
+    script "../main.nf"
+    process "BIO2ZARR_VCFPARTITION"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "bio2zarr"
+    tag "bio2zarr/vcfpartition"
+
+    test("homo_sapiens - vcf - partitions") {
+
+        config "./nextflow.config"
+
+        when {
+            params {
+                module_args = '--num-partitions 2' // This is a target. Do not assume that the number of partitions you ask for is what you get!
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { snapshot(
+                    sanitizeOutput(process.out),
+                    file(process.out.partitions[0][1]).readLines().size(),
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - vcf - partitions - stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                module_args = '--num-partitions 2'
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { snapshot(
+                    sanitizeOutput(process.out),
+                    file(process.out.partitions[0][1]).readLines().size(),
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - vcf - size") {
+
+        config "./nextflow.config"
+
+        when {
+            params {
+                module_args = '--partition-size 10KB' // This is a target. Do not assume that the number of partitions you ask for is what you get!
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { snapshot(
+                    sanitizeOutput(process.out),
+                    file(process.out.partitions[0][1]).readLines().size(),
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+
+    }
+
+    test("homo_sapiens - vcf - size - stub") {
+
+        options "-stub"
+
+        when {
+            params {
+                module_args = '--partition-size 10KB'
+            }
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { snapshot(
+                    sanitizeOutput(process.out),
+                    file(process.out.partitions[0][1]).readLines().size(),
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+
+    }
+}