Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .gitignore
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't modify this. It shouldn't be necessary.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remember to revert this.

Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,18 @@ test_output/
tests/data/
work/
.github/CODEOWNERS-tmp
<<<<<<< HEAD
=======

# Local sandbox directories (dev/test artifacts, not for PR)
**/sandbox/

# Dorado pre-downloaded models — too large for git (use tests/data/models/ locally)
modules/nf-core/dorado/basecaller/tests/data/models/
>>>>>>> 5ea7c105e (Add module: dorado/basecaller)

# Local sandbox directories (dev/test artifacts, not for PR)
**/sandbox/

# Dorado pre-downloaded models — too large for git (use tests/data/models/ locally)
modules/nf-core/dorado/basecaller/tests/data/models/
8 changes: 8 additions & 0 deletions modules/nf-core/dorado/basecaller/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e

LABEL version="1.4.0" \
maintainer="@sahuno" \
description="Oxford Nanopore dorado basecaller v1.4.0 — for nf-core/modules" \
org.opencontainers.image.version="1.4.0" \
org.opencontainers.image.source="https://github.com/nanoporetech/dorado" \
org.opencontainers.image.licenses="Oxford Nanopore Technologies PLC. Public License Version 1.0"
11 changes: 11 additions & 0 deletions modules/nf-core/dorado/basecaller/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
# NOTE: dorado is not available on bioconda or conda-forge (ONTPL licence).
# This environment.yml is a placeholder. The module uses the Docker/Singularity
# container specified in main.nf (nanoporetech/dorado:shac8f..., v1.4.0).
# Track 2 TODO: submit dorado to bioconda to enable conda-based deployment.
channels:
- conda-forge
- bioconda
dependencies:
- samtools=1.21
54 changes: 54 additions & 0 deletions modules/nf-core/dorado/basecaller/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
process DORADO_BASECALLER {
tag "$meta.id"
label 'process_gpu'
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add the process_low and process_long label here too, because process_gpu only sets the GPU but nothing more.


// dorado is not on bioconda (ONTPL licence). Using
// Docker Hub image directly. SHA tag pins to v1.4.0; a semver tag is tracked in
// nanoporetech/dorado#1584. Same pattern as nf-core/parabricks modules.
Comment on lines +6 to +7
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Docker Hub image directly. SHA tag pins to v1.4.0; a semver tag is tracked in
// nanoporetech/dorado#1584. Same pattern as nf-core/parabricks modules.
// Docker Hub image directly. SHA tag pins to v1.4.0

conda null
container "docker.io/nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e"

input:
tuple val(meta), path(pod5) // pod5 file or directory of pod5 files
val(model) // combined model string e.g. "sup,5mCG_5hmCG@latest", "hac@v5.0.0"
tuple val(meta2), path(models_dir) // optional pre-downloaded models directory; pass [[],[]] to auto-download
tuple val(meta3), path(reference), path(fai) // optional reference FASTA for alignment; pass [[],[],[]] to skip

output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*_summary.tsv"), emit: summary , optional: true
tuple val("${task.process}"), val('dorado'), eval("dorado --version 2>&1 | head -1 | sed 's/^//'"), emit: versions_dorado, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def models_arg = models_dir ? "--models-directory ${models_dir}" : "--models-directory ."
def ref_arg = reference ? "--reference ${reference}" : ""

"""
dorado \\
basecaller \\
${args} \\
--device cuda:all \\
${models_arg} \\
${ref_arg} \\
${model} \\
${pod5} \\
> ${prefix}.bam
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}_summary.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: 1.4.0
END_VERSIONS
Comment on lines +48 to +52
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: 1.4.0
END_VERSIONS

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remember to delete this.

"""
}
133 changes: 133 additions & 0 deletions modules/nf-core/dorado/basecaller/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
name: dorado_basecaller
description: |
Basecall Oxford Nanopore pod5 files with automatic model selection, optional
modified base calling (e.g. 5mCG_5hmCG, 5mC, m6A), and optional alignment
to a reference genome using the dorado basecaller.
keywords:
- basecalling
- ont
- long-read
- methylation
- modified-bases
- pod5
- nanopore
tools:
- "dorado":
description: Oxford Nanopore's basecaller supporting automatic model selection,
modified base calling, and integrated alignment.
homepage: https://github.com/nanoporetech/dorado
documentation: https://software-docs.nanoporetech.com/dorado/latest/
tool_dev_url: https://github.com/nanoporetech/dorado
licence:
- "Oxford Nanopore Technologies PLC. Public License Version 1.0"
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- pod5:
type: file
description: |
A single pod5 file or a directory of pod5 files to basecall.
Use --recursive in task.ext.args to scan directories recursively.
pattern: "*.pod5"
ontologies: []
- model:
type: string
description: |
Combined model string for automatic model resolution. Use the format
"{speed},{mod}@{version}" to basecall with modification calling in a
single argument, e.g. "sup,5mCG_5hmCG@latest", "hac,5mCG_5hmCG@v5.0.0".
For basecalling only (no mod calls) use "{speed}@{version}" e.g.
"sup@latest", "hac@v5.0.0". Can also be a path to an existing model
directory. Models are auto-downloaded if not found in models_dir.
- - meta2:
type: map
description: |
Groovy Map containing models directory information
e.g. `[ id:'dorado_models' ]`
- models_dir:
type: directory
description: |
Optional directory containing pre-downloaded dorado models.
If not provided (pass [[],[]]), models are downloaded automatically
into the task work directory. Pre-downloading is strongly recommended
for HPC environments without internet access on compute nodes.
pattern: "*/"
ontologies: []
- - meta3:
type: map
description: |
Groovy Map containing reference genome information
e.g. `[ id:'hg38' ]`
- reference:
type: file
description: |
Optional reference FASTA for integrated alignment. If provided, dorado
aligns basecalled reads and outputs a mapped BAM. Pass [[],[],[]] to
produce an unmapped BAM.
pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
ontologies: []
- fai:
type: file
description: |
FASTA index (.fai) for the reference. Required when reference is provided.
pattern: "*.fai"
ontologies: []
output:
bam:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*.bam":
type: file
description: |
BAM file containing basecalled reads with base quality scores and
modification tags (MM/ML) when modified base calling is enabled.
Output sort order is SO:unknown (dorado does not sort).
Pipe to SAMTOOLS_SORT + SAMTOOLS_INDEX for coordinate-sorted, indexed BAMs.
Unmapped if no reference provided; mapped (unsorted) if reference provided.
pattern: "*.bam"
ontologies: []
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fill missing ontologies

summary:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*_summary.tsv":
type: file
description: Per-read summary TSV with alignment statistics. Present when
--emit-summary is set in task.ext.args.
pattern: "*_summary.tsv"
ontologies:
- edam: http://edamontology.org/format_3475
versions_dorado:
- - ${task.process}:
type: string
description: The name of the process
- dorado:
type: string
description: The name of the tool
- dorado --version 2>&1 | head -1 | sed 's/^//':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- dorado:
type: string
description: The name of the tool
- dorado --version 2>&1 | head -1 | sed 's/^//':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@sahuno"
maintainers:
- "@sahuno"
152 changes: 152 additions & 0 deletions modules/nf-core/dorado/basecaller/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
nextflow_process {

name "Test Process DORADO_BASECALLER"
script "../main.nf"
process "DORADO_BASECALLER"

tag "modules"
tag "modules_nfcore"
tag "dorado"
tag "dorado/basecaller"

// -------------------------------------------------------------------------
// Stub tests — run in CI without GPU or real basecalling
// -------------------------------------------------------------------------

test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference - stub") {

options "-stub"

when {
params {
module_args = ''
}
process {
"""
input[0] = [
[ id: 'HG002' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5',
checkIfExists: true)
]
input[1] = "sup,5mCG_5hmCG@latest"
input[2] = [[], []]
input[3] = [[], [], []]
"""
}
}

then {
assertAll(
{ assert process.success },
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use

{ assert snapshot(sanitizeOutput(process.out)).match() }

To make the snapshots cleaner.

{ assert snapshot(process.out).match() }
)
}
}

test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference - stub") {

options "-stub"

when {
params {
module_args = '--mm2-opts "-Y"'
}
process {
"""
input[0] = [
[ id: 'HG002_aligned' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5',
checkIfExists: true)
]
input[1] = "sup,5mCG_5hmCG@latest"
input[2] = [[], []]
input[3] = [
[ id: 'genome' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta',
checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai',
checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

// -------------------------------------------------------------------------
// Real GPU tests — require --tag "gpu" and a GPU node
// Run via SLURM: nf-test test ... --profile singularity,gpu --tag gpu
// -------------------------------------------------------------------------

test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference") {

tag "gpu"

when {
params {
module_args = ''
}
process {
"""
input[0] = [
[ id: 'HG002' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5',
checkIfExists: true)
]
input[1] = "sup,5mCG_5hmCG@latest"
input[2] = [[], []]
input[3] = [[], [], []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference") {

tag "gpu"

when {
params {
module_args = '--mm2-opts "-Y"'
}
process {
"""
input[0] = [
[ id: 'HG002_aligned' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5',
checkIfExists: true)
]
input[1] = "sup,5mCG_5hmCG@latest"
input[2] = [[], []]
input[3] = [
[ id: 'genome' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta',
checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai',
checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading