Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/bio2zarr/vcfpartition/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::bio2zarr=0.1.8"
35 changes: 35 additions & 0 deletions modules/nf-core/bio2zarr/vcfpartition/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
process BIO2ZARR_VCFPARTITION {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/77/7713d869c8c8259c10701c95fc105bad8bcfbd6735de6941a47c9e6e26e9bb2f/data':
'community.wave.seqera.io/library/bio2zarr:0.1.8--c2c92dd3f64fb0f9' }"

input:
tuple val(meta), path(vcf), path(index)

output:
tuple val(meta), path("*.tsv"), emit: partitions
tuple val("${task.process}"), val('vcfpartition'), eval("vcfpartition --version | sed 's/.* //'"), topic: versions, emit: versions_vcfpartition

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
vcfpartition \\
${args} \\
${vcf} \\
> ${prefix}.tsv
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.tsv
"""
}
78 changes: 78 additions & 0 deletions modules/nf-core/bio2zarr/vcfpartition/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "bio2zarr_vcfpartition"
description: Outputs a set of region strings that partition indexed VCF/BCF files
for parallel processing.
keywords:
- vcf
- bcf
- partition
- regions
- parallel
- genomics
tools:
- "bio2zarr":
description: "Convert bioinformatics data to Zarr"
homepage: "https://sgkit-dev.github.io/bio2zarr/"
documentation: "https://sgkit-dev.github.io/bio2zarr"
tool_dev_url: "https://github.com/sgkit-dev/bio2zarr"
doi: "10.1101/2024.06.11.598241"
licence:
- "Apache-2.0"
identifier: biotools:bio2zarr
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- vcf:
type: file
description: Indexed VCF/BCF file to partition
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
ontologies:
- edam: "http://edamontology.org/format_3016" # VCF
- edam: "http://edamontology.org/format_3020" # BCF
- index:
type: file
description: Index for the VCF/BCF file
pattern: "*.{tbi,csi}"
ontologies:
- edam: "http://edamontology.org/format_3700" # Tabix index file format
output:
partitions:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.tsv":
type: file
description: The output tab-delimited region strings and the file path
pattern: "*.{tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
versions_vcfpartition:
- - ${task.process}:
type: string
description: The name of the process
- vcfpartition:
type: string
description: The name of the tool
- vcfpartition --version | sed 's/.* //':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- vcfpartition:
type: string
description: The name of the tool
- vcfpartition --version | sed 's/.* //':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@camlloyd"
maintainers:
- "@camlloyd"
139 changes: 139 additions & 0 deletions modules/nf-core/bio2zarr/vcfpartition/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
nextflow_process {

name "Test Process BIO2ZARR_VCFPARTITION"
script "../main.nf"
process "BIO2ZARR_VCFPARTITION"

tag "modules"
tag "modules_nfcore"
tag "bio2zarr"
tag "bio2zarr/vcfpartition"

test("homo_sapiens - vcf - partitions") {

config "./nextflow.config"

when {
params {
module_args = '--num-partitions 2' // This is a target. Do not assume that the number of partitions you ask for is what you get!
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
Comment thread
camlloyd marked this conversation as resolved.
).match() }
)
}

}

test("homo_sapiens - vcf - partitions - stub") {

options "-stub"

when {
params {
module_args = '--num-partitions 2'
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
Comment thread
camlloyd marked this conversation as resolved.
).match() }
)
}

}

test("homo_sapiens - vcf - size") {

config "./nextflow.config"

when {
params {
module_args = '--partition-size 10KB' // This is a target. Do not assume that the number of partitions you ask for is what you get!
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
Comment thread
camlloyd marked this conversation as resolved.
).match() }
)
}

}

test("homo_sapiens - vcf - size - stub") {

options "-stub"

when {
params {
module_args = '--partition-size 10KB'
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
Comment thread
camlloyd marked this conversation as resolved.
).match() }
)
}

}
}
Loading
Loading