Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/vcfpartition/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::bio2zarr=0.1.8"
35 changes: 35 additions & 0 deletions modules/nf-core/vcfpartition/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
process VCFPARTITION {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/77/7713d869c8c8259c10701c95fc105bad8bcfbd6735de6941a47c9e6e26e9bb2f/data':
'community.wave.seqera.io/library/bio2zarr:0.1.8--c2c92dd3f64fb0f9' }"

input:
tuple val(meta), path(vcf), path(index)

output:
tuple val(meta), path("*.tsv"), emit: partitions
tuple val("${task.process}"), val('vcfpartition'), eval("vcfpartition --version | sed 's/vcfpartition, version //'"), topic: versions, emit: versions_vcfpartition
Comment thread
camlloyd marked this conversation as resolved.
Outdated

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
vcfpartition \\
${args} \\
${vcf} \\
> ${prefix}.tsv
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.tsv
"""
}
79 changes: 79 additions & 0 deletions modules/nf-core/vcfpartition/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "vcfpartition"
description: Outputs a set of region strings that partition indexed VCF/BCF files
for parallel processing.
keywords:
- vcf
- bcf
- partition
- regions
- parallel
- genomics
tools:
- "vcfpartition":
description: "Utility to partition indexed VCF/BCF files into regions for parallel
processing, included in the bio2zarr package."
homepage: "https://sgkit-dev.github.io/bio2zarr/"
documentation: "https://sgkit-dev.github.io/bio2zarr/vcfpartition/overview.html"
tool_dev_url: "https://github.com/sgkit-dev/bio2zarr"
doi: "10.1101/2024.06.11.598241"
licence:
- "Apache-2.0"
identifier: biotools:bio2zarr
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- vcf:
type: file
description: Indexed VCF/BCF file to partition
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
ontologies:
- edam: "http://edamontology.org/format_3016" # VCF
- edam: "http://edamontology.org/format_3020" # BCF
- index:
type: file
description: Index for the VCF/BCF file
pattern: "*.{tbi,csi}"
ontologies:
- edam: "http://edamontology.org/format_3700" # Tabix index file format
output:
partitions:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.tsv":
type: file
description: The output tab-delimited region strings and the file path
pattern: "*.{tsv}"
ontologies:
- edam: "http://edamontology.org/format_3475" # TSV
versions_vcfpartition:
- - ${task.process}:
type: string
description: The name of the process
- vcfpartition:
type: string
description: The name of the tool
- vcfpartition --version | sed 's/vcfpartition, version //':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- vcfpartition:
type: string
description: The name of the tool
- vcfpartition --version | sed 's/vcfpartition, version //':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@camlloyd"
maintainers:
- "@camlloyd"
138 changes: 138 additions & 0 deletions modules/nf-core/vcfpartition/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
nextflow_process {

name "Test Process VCFPARTITION"
script "../main.nf"
process "VCFPARTITION"

tag "modules"
tag "modules_nfcore"
tag "vcfpartition"

test("homo_sapiens - vcf - partitions") {

config "./nextflow.config"

when {
params {
module_args = '--num-partitions 2' // This is a target. Do not assume that the number of partitions you ask for is what you get!
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}

}

test("homo_sapiens - vcf - partitions - stub") {

options "-stub"

when {
params {
module_args = '--num-partitions 2'
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}

}

test("homo_sapiens - vcf - size") {

config "./nextflow.config"

when {
params {
module_args = '--partition-size 10KB' // This is a target. Do not assume that the number of partitions you ask for is what you get!
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}

}

test("homo_sapiens - vcf - size - stub") {

options "-stub"

when {
params {
module_args = '--partition-size 10KB'
}
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ snapshot(
sanitizeOutput(process.out),
file(process.out.partitions[0][1]).readLines().size(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}

}
}
Loading
Loading