diff --git a/modules/nf-core/bio2zarr/vcfpartition/environment.yml b/modules/nf-core/bio2zarr/vcfpartition/environment.yml new file mode 100644 index 00000000000..843d05308a3 --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bio2zarr=0.1.8" diff --git a/modules/nf-core/bio2zarr/vcfpartition/main.nf b/modules/nf-core/bio2zarr/vcfpartition/main.nf new file mode 100644 index 00000000000..e18d582c0fc --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/main.nf @@ -0,0 +1,35 @@ +process BIO2ZARR_VCFPARTITION { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/77/7713d869c8c8259c10701c95fc105bad8bcfbd6735de6941a47c9e6e26e9bb2f/data': + 'community.wave.seqera.io/library/bio2zarr:0.1.8--c2c92dd3f64fb0f9' }" + + input: + tuple val(meta), path(vcf), path(index) + + output: + tuple val(meta), path("*.tsv"), emit: partitions + tuple val("${task.process}"), val('vcfpartition'), eval("vcfpartition --version | sed 's/.* //'"), topic: versions, emit: versions_vcfpartition + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + vcfpartition \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.tsv + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + """ +} diff --git a/modules/nf-core/bio2zarr/vcfpartition/meta.yml b/modules/nf-core/bio2zarr/vcfpartition/meta.yml new file mode 100644 index 00000000000..4a052602458 --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/meta.yml @@ -0,0 +1,78 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "bio2zarr_vcfpartition" +description: Outputs a set of region strings that partition indexed VCF/BCF files + for parallel processing. +keywords: + - vcf + - bcf + - partition + - regions + - parallel + - genomics +tools: + - "bio2zarr": + description: "Convert bioinformatics data to Zarr" + homepage: "https://sgkit-dev.github.io/bio2zarr/" + documentation: "https://sgkit-dev.github.io/bio2zarr" + tool_dev_url: "https://github.com/sgkit-dev/bio2zarr" + doi: "10.1101/2024.06.11.598241" + licence: + - "Apache-2.0" + identifier: biotools:bio2zarr +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - vcf: + type: file + description: Indexed VCF/BCF file to partition + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3020" # BCF + - index: + type: file + description: Index for the VCF/BCF file + pattern: "*.{tbi,csi}" + ontologies: + - edam: "http://edamontology.org/format_3700" # Tabix index file format +output: + partitions: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.tsv": + type: file + description: The output tab-delimited region strings and the file path + pattern: "*.{tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + versions_vcfpartition: + - - ${task.process}: + type: string + description: The name of the process + - vcfpartition: + type: string + description: The name of the tool + - vcfpartition --version | sed 's/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - vcfpartition: + type: string + description: The name of the tool + - vcfpartition --version | sed 's/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@camlloyd" +maintainers: + - "@camlloyd" diff --git a/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test b/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test new file mode 100644 index 00000000000..c0bd391113b --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test @@ -0,0 +1,139 @@ +nextflow_process { + + name "Test Process BIO2ZARR_VCFPARTITION" + script "../main.nf" + process "BIO2ZARR_VCFPARTITION" + + tag "modules" + tag "modules_nfcore" + tag "bio2zarr" + tag "bio2zarr/vcfpartition" + + test("homo_sapiens - vcf - partitions") { + + config "./nextflow.config" + + when { + params { + module_args = '--num-partitions 2' // This is a target. Do not assume that the number of partitions you ask for is what you get! + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { snapshot( + sanitizeOutput(process.out), + file(process.out.partitions[0][1]).readLines().size(), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + + } + + test("homo_sapiens - vcf - partitions - stub") { + + options "-stub" + + when { + params { + module_args = '--num-partitions 2' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { snapshot( + sanitizeOutput(process.out), + file(process.out.partitions[0][1]).readLines().size(), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + + } + + test("homo_sapiens - vcf - size") { + + config "./nextflow.config" + + when { + params { + module_args = '--partition-size 10KB' // This is a target. Do not assume that the number of partitions you ask for is what you get! + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { snapshot( + sanitizeOutput(process.out), + file(process.out.partitions[0][1]).readLines().size(), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + + } + + test("homo_sapiens - vcf - size - stub") { + + options "-stub" + + when { + params { + module_args = '--partition-size 10KB' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { snapshot( + sanitizeOutput(process.out), + file(process.out.partitions[0][1]).readLines().size(), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test.snap b/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test.snap new file mode 100644 index 00000000000..422e8698fb4 --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test.snap @@ -0,0 +1,146 @@ +{ + "homo_sapiens - vcf - partitions": { + "content": [ + { + "partitions": [ + [ + { + "id": "test" + }, + "test.tsv:md5,4ea22aa1d2390ca6487dbabdc298898f" + ] + ], + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + }, + 2, + { + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + } + ], + "timestamp": "2026-04-15T10:10:36.18376351", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - vcf - size": { + "content": [ + { + "partitions": [ + [ + { + "id": "test" + }, + "test.tsv:md5,0af30dd3982f3666e2298a76eaa5aae8" + ] + ], + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + }, + 3, + { + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + } + ], + "timestamp": "2026-04-15T10:10:52.744098158", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - vcf - size - stub": { + "content": [ + { + "partitions": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + }, + 0, + { + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + } + ], + "timestamp": "2026-04-15T09:45:36.377674305", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - vcf - partitions - stub": { + "content": [ + { + "partitions": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + }, + 0, + { + "versions_vcfpartition": [ + [ + "BIO2ZARR_VCFPARTITION", + "vcfpartition", + "0.1.8" + ] + ] + } + ], + "timestamp": "2026-04-15T09:45:21.273573681", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/bio2zarr/vcfpartition/tests/nextflow.config b/modules/nf-core/bio2zarr/vcfpartition/tests/nextflow.config new file mode 100644 index 00000000000..1347b9c4d13 --- /dev/null +++ b/modules/nf-core/bio2zarr/vcfpartition/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BIO2ZARR_VCFPARTITION' { + ext.args = params.module_args + } +}