Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,9 @@ test_output/
tests/data/
work/
.github/CODEOWNERS-tmp

# Local sandbox directories (dev/test artifacts, not for PR)
**/sandbox/

# Dorado pre-downloaded models — too large for git (use tests/data/models/ locally)
modules/nf-core/dorado/basecaller/tests/data/models/
8 changes: 8 additions & 0 deletions modules/nf-core/dorado/basecaller/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e

LABEL version="1.4.0" \
maintainer="@sahuno" \
description="Oxford Nanopore dorado basecaller v1.4.0 — for nf-core/modules" \
org.opencontainers.image.version="1.4.0" \
org.opencontainers.image.source="https://github.com/nanoporetech/dorado" \
org.opencontainers.image.licenses="Oxford Nanopore Technologies PLC. Public License Version 1.0"
11 changes: 11 additions & 0 deletions modules/nf-core/dorado/basecaller/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
# NOTE: dorado is not available on bioconda or conda-forge (ONTPL licence).
# This environment.yml is a placeholder. The module uses the Docker/Singularity
# container specified in main.nf (nanoporetech/dorado:shac8f..., v1.4.0).
# Track 2 TODO: submit dorado to bioconda to enable conda-based deployment.
channels:
- conda-forge
- bioconda
dependencies:
- samtools=1.21
57 changes: 57 additions & 0 deletions modules/nf-core/dorado/basecaller/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
process DORADO_BASECALLER {
tag "$meta.id"
label 'process_gpu'

// dorado is not on bioconda (ONTPL licence). Using Docker Hub image directly —
// same pattern as nf-core/parabricks modules (nvcr.io/nvidia/...).
// sahuno/dorado:1.4.0 wraps nanoporetech/dorado v1.4.0 + samtools.
// Tracking ONT semantic version tags: nanoporetech/dorado#1584.
conda null
container "sahuno/dorado:1.4.0"

input:
tuple val(meta), path(pod5) // pod5 file or directory of pod5 files
val(model) // combined model string e.g. "sup,5mCG_5hmCG@latest", "hac@v5.0.0"
tuple val(meta2), path(models_dir) // optional pre-downloaded models directory; pass [[],[]] to auto-download
tuple val(meta3), path(reference), path(fai) // optional reference FASTA for alignment; pass [[],[],[]] to skip

output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*_summary.tsv"), emit: summary , optional: true
tuple val(meta), path("*.log") , emit: log , optional: true
tuple val("${task.process}"), val('dorado'), eval("dorado --version 2>&1 | head -1 | sed 's/^//'"), emit: versions_dorado, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def models_arg = models_dir ? "--models-directory ${models_dir}" : "--models-directory ."
def ref_arg = reference ? "--reference ${reference}" : ""

"""
dorado \\
basecaller \\
${args} \\
--device ${task.ext.device ?: 'cuda:all'} \\
${models_arg} \\
${ref_arg} \\
${model} \\
${pod5} \\
> ${prefix}.bam
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bam
touch ${prefix}_summary.tsv
touch ${prefix}.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: 1.4.0
END_VERSIONS
"""
}
130 changes: 130 additions & 0 deletions modules/nf-core/dorado/basecaller/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
name: dorado_basecaller
description: |
Basecall Oxford Nanopore pod5 files with automatic model selection, optional
modified base calling (e.g. 5mCG_5hmCG, 5mC, m6A), and optional alignment
to a reference genome using the dorado basecaller.
keywords:
- basecalling
- ont
- long-read
- methylation
- modified-bases
- pod5
- nanopore
tools:
- "dorado":
description: Oxford Nanopore's basecaller supporting automatic model selection,
modified base calling, and integrated alignment.
homepage: https://github.com/nanoporetech/dorado
documentation: https://software-docs.nanoporetech.com/dorado/latest/
tool_dev_url: https://github.com/nanoporetech/dorado
licence: ["Oxford Nanopore Technologies PLC. Public License Version 1.0"]
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- pod5:
type: file
description: |
A single pod5 file or a directory of pod5 files to basecall.
Use --recursive in task.ext.args to scan directories recursively.
pattern: "*.pod5"
ontologies: []
- - model:
type: string
description: |
Combined model string for automatic model resolution. Use the format
"{speed},{mod}@{version}" to basecall with modification calling in a
single argument, e.g. "sup,5mCG_5hmCG@latest", "hac,5mCG_5hmCG@v5.0.0".
For basecalling only (no mod calls) use "{speed}@{version}" e.g.
"sup@latest", "hac@v5.0.0". Can also be a path to an existing model
directory. Models are auto-downloaded if not found in models_dir.
- - meta2:
type: map
description: |
Groovy Map containing models directory information
e.g. `[ id:'dorado_models' ]`
- models_dir:
type: directory
description: |
Optional directory containing pre-downloaded dorado models.
If not provided (pass [[],[]]), models are downloaded automatically
into the task work directory. Pre-downloading is strongly recommended
for HPC environments without internet access on compute nodes.
pattern: "*/"
ontologies: []
- - meta3:
type: map
description: |
Groovy Map containing reference genome information
e.g. `[ id:'hg38' ]`
- reference:
type: file
description: |
Optional reference FASTA for integrated alignment. If provided, dorado
aligns basecalled reads and outputs a mapped BAM. Pass [[],[],[]] to
produce an unmapped BAM.
pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
ontologies: []
- fai:
type: file
description: |
FASTA index (.fai) for the reference. Required when reference is provided.
pattern: "*.fai"
ontologies: []
output:
bam:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*.bam":
type: file
description: |
BAM file containing basecalled reads with base quality scores and
modification tags (MM/ML) when modified base calling is enabled.
Output sort order is SO:unknown (dorado does not sort).
Pipe to SAMTOOLS_SORT + SAMTOOLS_INDEX for coordinate-sorted, indexed BAMs.
Unmapped if no reference provided; mapped (unsorted) if reference provided.
pattern: "*.bam"
ontologies: []
summary:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*_summary.tsv":
type: file
description: Per-read summary TSV with alignment statistics. Present when --emit-summary is set in task.ext.args.
pattern: "*_summary.tsv"
ontologies: []
log:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*.log":
type: file
description: Debug log file.
pattern: "*.log"
ontologies: []
versions_dorado:
- - ${task.process}:
type: string
description: The name of the process
- dorado:
type: string
description: The name of the tool
- "dorado --version 2>&1 | head -1 | sed 's/^//'":
type: eval
description: The expression to obtain the version of the tool
authors:
- "@sahuno"
maintainers:
- "@sahuno"
Binary file not shown.
Binary file not shown.
Loading
Loading