-
Notifications
You must be signed in to change notification settings - Fork 1k
Add module: dorado/basecaller #11122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
70ccae1
444a388
dddb168
d3bb8bd
da780b7
b3066a0
c07b4b8
c78a18c
67312f2
d95189f
2f99d98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| FROM nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e | ||
|
|
||
| LABEL version="1.4.0" \ | ||
| maintainer="@sahuno" \ | ||
| description="Oxford Nanopore dorado basecaller v1.4.0 — for nf-core/modules" \ | ||
| org.opencontainers.image.version="1.4.0" \ | ||
| org.opencontainers.image.source="https://github.com/nanoporetech/dorado" \ | ||
| org.opencontainers.image.licenses="Oxford Nanopore Technologies PLC. Public License Version 1.0" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| # NOTE: dorado is not available on bioconda or conda-forge (ONTPL licence). | ||
| # This environment.yml is a placeholder. The module uses the Docker/Singularity | ||
| # container specified in main.nf (nanoporetech/dorado:shac8f..., v1.4.0). | ||
| # Track 2 TODO: submit dorado to bioconda to enable conda-based deployment. | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - samtools=1.21 |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,54 @@ | ||||||||||||
| process DORADO_BASECALLER { | ||||||||||||
| tag "$meta.id" | ||||||||||||
| label 'process_gpu' | ||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add the |
||||||||||||
|
|
||||||||||||
| // dorado is not on bioconda (ONTPL licence). Using | ||||||||||||
| // Docker Hub image directly. SHA tag pins to v1.4.0; a semver tag is tracked in | ||||||||||||
| // nanoporetech/dorado#1584. Same pattern as nf-core/parabricks modules. | ||||||||||||
|
Comment on lines
+6
to
+7
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| conda null | ||||||||||||
| container "docker.io/nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e" | ||||||||||||
|
|
||||||||||||
| input: | ||||||||||||
| tuple val(meta), path(pod5) // pod5 file or directory of pod5 files | ||||||||||||
| val(model) // combined model string e.g. "sup,5mCG_5hmCG@latest", "hac@v5.0.0" | ||||||||||||
| tuple val(meta2), path(models_dir) // optional pre-downloaded models directory; pass [[],[]] to auto-download | ||||||||||||
| tuple val(meta3), path(reference), path(fai) // optional reference FASTA for alignment; pass [[],[],[]] to skip | ||||||||||||
|
|
||||||||||||
| output: | ||||||||||||
| tuple val(meta), path("*.bam") , emit: bam | ||||||||||||
| tuple val(meta), path("*_summary.tsv"), emit: summary , optional: true | ||||||||||||
| tuple val("${task.process}"), val('dorado'), eval("dorado --version 2>&1 | head -1 | sed 's/^//'"), emit: versions_dorado, topic: versions | ||||||||||||
|
|
||||||||||||
| when: | ||||||||||||
| task.ext.when == null || task.ext.when | ||||||||||||
|
|
||||||||||||
| script: | ||||||||||||
| def args = task.ext.args ?: '' | ||||||||||||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||||||||||||
| def models_arg = models_dir ? "--models-directory ${models_dir}" : "--models-directory ." | ||||||||||||
| def ref_arg = reference ? "--reference ${reference}" : "" | ||||||||||||
|
|
||||||||||||
| """ | ||||||||||||
| dorado \\ | ||||||||||||
| basecaller \\ | ||||||||||||
| ${args} \\ | ||||||||||||
| --device cuda:all \\ | ||||||||||||
| ${models_arg} \\ | ||||||||||||
| ${ref_arg} \\ | ||||||||||||
| ${model} \\ | ||||||||||||
| ${pod5} \\ | ||||||||||||
| > ${prefix}.bam | ||||||||||||
| """ | ||||||||||||
|
|
||||||||||||
| stub: | ||||||||||||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||||||||||||
| """ | ||||||||||||
| touch ${prefix}.bam | ||||||||||||
| touch ${prefix}_summary.tsv | ||||||||||||
|
|
||||||||||||
| cat <<-END_VERSIONS > versions.yml | ||||||||||||
| "${task.process}": | ||||||||||||
| dorado: 1.4.0 | ||||||||||||
| END_VERSIONS | ||||||||||||
|
Comment on lines
+48
to
+52
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remember to delete this. |
||||||||||||
| """ | ||||||||||||
| } | ||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| name: dorado_basecaller | ||
| description: | | ||
| Basecall Oxford Nanopore pod5 files with automatic model selection, optional | ||
| modified base calling (e.g. 5mCG_5hmCG, 5mC, m6A), and optional alignment | ||
| to a reference genome using the dorado basecaller. | ||
| keywords: | ||
| - basecalling | ||
| - ont | ||
| - long-read | ||
| - methylation | ||
| - modified-bases | ||
| - pod5 | ||
| - nanopore | ||
| tools: | ||
| - "dorado": | ||
| description: Oxford Nanopore's basecaller supporting automatic model selection, | ||
| modified base calling, and integrated alignment. | ||
| homepage: https://github.com/nanoporetech/dorado | ||
| documentation: https://software-docs.nanoporetech.com/dorado/latest/ | ||
| tool_dev_url: https://github.com/nanoporetech/dorado | ||
| licence: | ||
| - "Oxford Nanopore Technologies PLC. Public License Version 1.0" | ||
| identifier: "" | ||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'test', single_end:false ]` | ||
| - pod5: | ||
| type: file | ||
| description: | | ||
| A single pod5 file or a directory of pod5 files to basecall. | ||
| Use --recursive in task.ext.args to scan directories recursively. | ||
| pattern: "*.pod5" | ||
| ontologies: [] | ||
| - model: | ||
| type: string | ||
| description: | | ||
| Combined model string for automatic model resolution. Use the format | ||
| "{speed},{mod}@{version}" to basecall with modification calling in a | ||
| single argument, e.g. "sup,5mCG_5hmCG@latest", "hac,5mCG_5hmCG@v5.0.0". | ||
| For basecalling only (no mod calls) use "{speed}@{version}" e.g. | ||
| "sup@latest", "hac@v5.0.0". Can also be a path to an existing model | ||
| directory. Models are auto-downloaded if not found in models_dir. | ||
| - - meta2: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing models directory information | ||
| e.g. `[ id:'dorado_models' ]` | ||
| - models_dir: | ||
| type: directory | ||
| description: | | ||
| Optional directory containing pre-downloaded dorado models. | ||
| If not provided (pass [[],[]]), models are downloaded automatically | ||
| into the task work directory. Pre-downloading is strongly recommended | ||
| for HPC environments without internet access on compute nodes. | ||
| pattern: "*/" | ||
| ontologies: [] | ||
| - - meta3: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing reference genome information | ||
| e.g. `[ id:'hg38' ]` | ||
| - reference: | ||
| type: file | ||
| description: | | ||
| Optional reference FASTA for integrated alignment. If provided, dorado | ||
| aligns basecalled reads and outputs a mapped BAM. Pass [[],[],[]] to | ||
| produce an unmapped BAM. | ||
| pattern: "*.{fa,fasta,fa.gz,fasta.gz}" | ||
| ontologies: [] | ||
| - fai: | ||
| type: file | ||
| description: | | ||
| FASTA index (.fai) for the reference. Required when reference is provided. | ||
| pattern: "*.fai" | ||
| ontologies: [] | ||
| output: | ||
| bam: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'test', single_end:false ]` | ||
| - "*.bam": | ||
| type: file | ||
| description: | | ||
| BAM file containing basecalled reads with base quality scores and | ||
| modification tags (MM/ML) when modified base calling is enabled. | ||
| Output sort order is SO:unknown (dorado does not sort). | ||
| Pipe to SAMTOOLS_SORT + SAMTOOLS_INDEX for coordinate-sorted, indexed BAMs. | ||
| Unmapped if no reference provided; mapped (unsorted) if reference provided. | ||
| pattern: "*.bam" | ||
| ontologies: [] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fill missing ontologies |
||
| summary: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. `[ id:'test', single_end:false ]` | ||
| - "*_summary.tsv": | ||
| type: file | ||
| description: Per-read summary TSV with alignment statistics. Present when | ||
| --emit-summary is set in task.ext.args. | ||
| pattern: "*_summary.tsv" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_3475 | ||
| versions_dorado: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The name of the process | ||
| - dorado: | ||
| type: string | ||
| description: The name of the tool | ||
| - dorado --version 2>&1 | head -1 | sed 's/^//': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| topics: | ||
| versions: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The name of the process | ||
| - dorado: | ||
| type: string | ||
| description: The name of the tool | ||
| - dorado --version 2>&1 | head -1 | sed 's/^//': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| authors: | ||
| - "@sahuno" | ||
| maintainers: | ||
| - "@sahuno" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,152 @@ | ||
| nextflow_process { | ||
|
|
||
| name "Test Process DORADO_BASECALLER" | ||
| script "../main.nf" | ||
| process "DORADO_BASECALLER" | ||
|
|
||
| tag "modules" | ||
| tag "modules_nfcore" | ||
| tag "dorado" | ||
| tag "dorado/basecaller" | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Stub tests — run in CI without GPU or real basecalling | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference - stub") { | ||
|
|
||
| options "-stub" | ||
|
|
||
| when { | ||
| params { | ||
| module_args = '' | ||
| } | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id: 'HG002' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5', | ||
| checkIfExists: true) | ||
| ] | ||
| input[1] = "sup,5mCG_5hmCG@latest" | ||
| input[2] = [[], []] | ||
| input[3] = [[], [], []] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use To make the snapshots cleaner. |
||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference - stub") { | ||
|
|
||
| options "-stub" | ||
|
|
||
| when { | ||
| params { | ||
| module_args = '--mm2-opts "-Y"' | ||
| } | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id: 'HG002_aligned' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5', | ||
| checkIfExists: true) | ||
| ] | ||
| input[1] = "sup,5mCG_5hmCG@latest" | ||
| input[2] = [[], []] | ||
| input[3] = [ | ||
| [ id: 'genome' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', | ||
| checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', | ||
| checkIfExists: true) | ||
| ] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Real GPU tests — require --tag "gpu" and a GPU node | ||
| // Run via SLURM: nf-test test ... --profile singularity,gpu --tag gpu | ||
| // ------------------------------------------------------------------------- | ||
|
|
||
| test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference") { | ||
|
|
||
| tag "gpu" | ||
|
|
||
| when { | ||
| params { | ||
| module_args = '' | ||
| } | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id: 'HG002' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5', | ||
| checkIfExists: true) | ||
| ] | ||
| input[1] = "sup,5mCG_5hmCG@latest" | ||
| input[2] = [[], []] | ||
| input[3] = [[], [], []] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference") { | ||
|
|
||
| tag "gpu" | ||
|
|
||
| when { | ||
| params { | ||
| module_args = '--mm2-opts "-Y"' | ||
| } | ||
| process { | ||
| """ | ||
| input[0] = [ | ||
| [ id: 'HG002_aligned' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5', | ||
| checkIfExists: true) | ||
| ] | ||
| input[1] = "sup,5mCG_5hmCG@latest" | ||
| input[2] = [[], []] | ||
| input[3] = [ | ||
| [ id: 'genome' ], | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', | ||
| checkIfExists: true), | ||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', | ||
| checkIfExists: true) | ||
| ] | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| then { | ||
| assertAll( | ||
| { assert process.success }, | ||
| { assert snapshot(process.out).match() } | ||
| ) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't modify this. It shouldn't be necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remember to revert this.