diff --git a/.gitignore b/.gitignore index df5aafd3cc74..e795f5c44198 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,9 @@ test_output/ tests/data/ work/ .github/CODEOWNERS-tmp + +# Local sandbox directories (dev/test artifacts, not for PR) +**/sandbox/ + +# Dorado pre-downloaded models — too large for git (use tests/data/models/ locally) +modules/nf-core/dorado/basecaller/tests/data/models/ diff --git a/modules/nf-core/dorado/basecaller/Dockerfile b/modules/nf-core/dorado/basecaller/Dockerfile new file mode 100644 index 000000000000..34120a69fa04 --- /dev/null +++ b/modules/nf-core/dorado/basecaller/Dockerfile @@ -0,0 +1,8 @@ +FROM nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e + +LABEL version="1.4.0" \ + maintainer="@sahuno" \ + description="Oxford Nanopore dorado basecaller v1.4.0 — for nf-core/modules" \ + org.opencontainers.image.version="1.4.0" \ + org.opencontainers.image.source="https://github.com/nanoporetech/dorado" \ + org.opencontainers.image.licenses="Oxford Nanopore Technologies PLC. Public License Version 1.0" diff --git a/modules/nf-core/dorado/basecaller/environment.yml b/modules/nf-core/dorado/basecaller/environment.yml new file mode 100644 index 000000000000..c0b60c86d3f7 --- /dev/null +++ b/modules/nf-core/dorado/basecaller/environment.yml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# NOTE: dorado is not available on bioconda or conda-forge (ONTPL licence). +# This environment.yml is a placeholder. The module uses the Docker/Singularity +# container specified in main.nf (nanoporetech/dorado:shac8f..., v1.4.0). +# Track 2 TODO: submit dorado to bioconda to enable conda-based deployment. +channels: + - conda-forge + - bioconda +dependencies: + - samtools=1.21 diff --git a/modules/nf-core/dorado/basecaller/main.nf b/modules/nf-core/dorado/basecaller/main.nf new file mode 100644 index 000000000000..0c38dee21180 --- /dev/null +++ b/modules/nf-core/dorado/basecaller/main.nf @@ -0,0 +1,57 @@ +process DORADO_BASECALLER { + tag "$meta.id" + label 'process_gpu' + + // dorado is not on bioconda (ONTPL licence). Using Docker Hub image directly — + // same pattern as nf-core/parabricks modules (nvcr.io/nvidia/...). + // sahuno/dorado:1.4.0 wraps nanoporetech/dorado v1.4.0 + samtools. + // Tracking ONT semantic version tags: nanoporetech/dorado#1584. + conda null + container "sahuno/dorado:1.4.0" + + input: + tuple val(meta), path(pod5) // pod5 file or directory of pod5 files + val(model) // combined model string e.g. "sup,5mCG_5hmCG@latest", "hac@v5.0.0" + tuple val(meta2), path(models_dir) // optional pre-downloaded models directory; pass [[],[]] to auto-download + tuple val(meta3), path(reference), path(fai) // optional reference FASTA for alignment; pass [[],[],[]] to skip + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*_summary.tsv"), emit: summary , optional: true + tuple val(meta), path("*.log") , emit: log , optional: true + tuple val("${task.process}"), val('dorado'), eval("dorado --version 2>&1 | head -1 | sed 's/^//'"), emit: versions_dorado, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def models_arg = models_dir ? "--models-directory ${models_dir}" : "--models-directory ." + def ref_arg = reference ? "--reference ${reference}" : "" + + """ + dorado \\ + basecaller \\ + ${args} \\ + --device ${task.ext.device ?: 'cuda:all'} \\ + ${models_arg} \\ + ${ref_arg} \\ + ${model} \\ + ${pod5} \\ + > ${prefix}.bam + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}_summary.tsv + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dorado: 1.4.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/dorado/basecaller/meta.yml b/modules/nf-core/dorado/basecaller/meta.yml new file mode 100644 index 000000000000..57746e05976e --- /dev/null +++ b/modules/nf-core/dorado/basecaller/meta.yml @@ -0,0 +1,130 @@ +name: dorado_basecaller +description: | + Basecall Oxford Nanopore pod5 files with automatic model selection, optional + modified base calling (e.g. 5mCG_5hmCG, 5mC, m6A), and optional alignment + to a reference genome using the dorado basecaller. +keywords: + - basecalling + - ont + - long-read + - methylation + - modified-bases + - pod5 + - nanopore +tools: + - "dorado": + description: Oxford Nanopore's basecaller supporting automatic model selection, + modified base calling, and integrated alignment. + homepage: https://github.com/nanoporetech/dorado + documentation: https://software-docs.nanoporetech.com/dorado/latest/ + tool_dev_url: https://github.com/nanoporetech/dorado + licence: ["Oxford Nanopore Technologies PLC. Public License Version 1.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - pod5: + type: file + description: | + A single pod5 file or a directory of pod5 files to basecall. + Use --recursive in task.ext.args to scan directories recursively. + pattern: "*.pod5" + ontologies: [] + - - model: + type: string + description: | + Combined model string for automatic model resolution. Use the format + "{speed},{mod}@{version}" to basecall with modification calling in a + single argument, e.g. "sup,5mCG_5hmCG@latest", "hac,5mCG_5hmCG@v5.0.0". + For basecalling only (no mod calls) use "{speed}@{version}" e.g. + "sup@latest", "hac@v5.0.0". Can also be a path to an existing model + directory. Models are auto-downloaded if not found in models_dir. + - - meta2: + type: map + description: | + Groovy Map containing models directory information + e.g. `[ id:'dorado_models' ]` + - models_dir: + type: directory + description: | + Optional directory containing pre-downloaded dorado models. + If not provided (pass [[],[]]), models are downloaded automatically + into the task work directory. Pre-downloading is strongly recommended + for HPC environments without internet access on compute nodes. + pattern: "*/" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference genome information + e.g. `[ id:'hg38' ]` + - reference: + type: file + description: | + Optional reference FASTA for integrated alignment. If provided, dorado + aligns basecalled reads and outputs a mapped BAM. Pass [[],[],[]] to + produce an unmapped BAM. + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + ontologies: [] + - fai: + type: file + description: | + FASTA index (.fai) for the reference. Required when reference is provided. + pattern: "*.fai" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.bam": + type: file + description: | + BAM file containing basecalled reads with base quality scores and + modification tags (MM/ML) when modified base calling is enabled. + Output sort order is SO:unknown (dorado does not sort). + Pipe to SAMTOOLS_SORT + SAMTOOLS_INDEX for coordinate-sorted, indexed BAMs. + Unmapped if no reference provided; mapped (unsorted) if reference provided. + pattern: "*.bam" + ontologies: [] + summary: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*_summary.tsv": + type: file + description: Per-read summary TSV with alignment statistics. Present when --emit-summary is set in task.ext.args. + pattern: "*_summary.tsv" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.log": + type: file + description: Debug log file. + pattern: "*.log" + ontologies: [] + versions_dorado: + - - ${task.process}: + type: string + description: The name of the process + - dorado: + type: string + description: The name of the tool + - "dorado --version 2>&1 | head -1 | sed 's/^//'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/dorado/basecaller/tests/data/HG002_PAW70337_giab_10reads.pod5 b/modules/nf-core/dorado/basecaller/tests/data/HG002_PAW70337_giab_10reads.pod5 new file mode 100644 index 000000000000..462ac3d41ce9 Binary files /dev/null and b/modules/nf-core/dorado/basecaller/tests/data/HG002_PAW70337_giab_10reads.pod5 differ diff --git a/modules/nf-core/dorado/basecaller/tests/data/test.pod5 b/modules/nf-core/dorado/basecaller/tests/data/test.pod5 new file mode 100755 index 000000000000..b39744dd5bcc Binary files /dev/null and b/modules/nf-core/dorado/basecaller/tests/data/test.pod5 differ diff --git a/modules/nf-core/dorado/basecaller/tests/main.nf.test b/modules/nf-core/dorado/basecaller/tests/main.nf.test new file mode 100644 index 000000000000..2768e6009e8c --- /dev/null +++ b/modules/nf-core/dorado/basecaller/tests/main.nf.test @@ -0,0 +1,191 @@ +nextflow_process { + + name "Test Process DORADO_BASECALLER" + script "../main.nf" + process "DORADO_BASECALLER" + + tag "modules" + tag "modules_nfcore" + tag "dorado" + tag "dorado/basecaller" + + // ------------------------------------------------------------------------- + // Stub tests — run in CI without GPU or real basecalling + // ------------------------------------------------------------------------- + + test("homo sapiens - pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id: 'test' ], + file("${projectDir}/modules/nf-core/dorado/basecaller/tests/data/test.pod5", checkIfExists: true) + ] + input[1] = "sup,5mCG_5hmCG@latest" + input[2] = [[], []] + input[3] = [[], [], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo sapiens - pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference - stub") { + + options "-stub" + + when { + params { + module_args = '--mm2-opts "-Y"' + test_reference = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta' + test_fai = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta.fai' + } + process { + """ + input[0] = [ + [ id: 'test_aligned' ], + file("${projectDir}/modules/nf-core/dorado/basecaller/tests/data/test.pod5", checkIfExists: true) + ] + input[1] = "sup,5mCG_5hmCG@latest" + input[2] = [[], []] + input[3] = [ + [ id: 'hg38' ], + file(params.test_reference, checkIfExists: true), + file(params.test_fai, checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + // ------------------------------------------------------------------------- + // Track 2 stub test — uses public GIAB HG002 pod5 from nf-core/test-datasets + // Pending: nf-core/test-datasets#1968 merge before this URL resolves + // ------------------------------------------------------------------------- + + test("homo sapiens - GIAB HG002 pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id: 'HG002' ], + file("${params.modules_testdata_base_path}genomics/homo_sapiens/nanopore/pod5/HG002_PAW70337_giab_10reads.pod5", + checkIfExists: true) + ] + input[1] = "sup,5mCG_5hmCG@latest" + input[2] = [[], []] + input[3] = [[], [], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + // ------------------------------------------------------------------------- + // Real GPU tests — require --tag "gpu" and componc_gpu_batch partition + // Run via SLURM: nf-test test ... --profile singularity,gpu --tag gpu + // ------------------------------------------------------------------------- + + test("homo sapiens - pod5, sup,5mCG_5hmCG@latest, with models_dir, no reference") { + + tag "gpu" + + when { + params { + module_args = '' + models_dir = "${projectDir}/modules/nf-core/dorado/basecaller/tests/data/models" + } + process { + """ + input[0] = [ + [ id: 'test' ], + file("${projectDir}/modules/nf-core/dorado/basecaller/tests/data/test.pod5", checkIfExists: true) + ] + input[1] = "sup,5mCG_5hmCG@latest" + input[2] = [ + [ id: 'dorado_models' ], + file(params.models_dir, checkIfExists: true) + ] + input[3] = [[], [], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo sapiens - pod5, sup,5mCG_5hmCG@latest, with models_dir, with reference") { + + tag "gpu" + + when { + params { + module_args = '--mm2-opts "-Y"' + models_dir = "${projectDir}/modules/nf-core/dorado/basecaller/tests/data/models" + test_reference = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta' + test_fai = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta.fai' + } + process { + """ + input[0] = [ + [ id: 'test_aligned' ], + file("${projectDir}/modules/nf-core/dorado/basecaller/tests/data/test.pod5", checkIfExists: true) + ] + input[1] = "sup,5mCG_5hmCG@latest" + input[2] = [ + [ id: 'dorado_models' ], + file(params.models_dir, checkIfExists: true) + ] + input[3] = [ + [ id: 'hg38' ], + file(params.test_reference, checkIfExists: true), + file(params.test_fai, checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/dorado/basecaller/tests/main.nf.test.snap b/modules/nf-core/dorado/basecaller/tests/main.nf.test.snap new file mode 100644 index 000000000000..b8a8d4705bab --- /dev/null +++ b/modules/nf-core/dorado/basecaller/tests/main.nf.test.snap @@ -0,0 +1,432 @@ +{ + "homo sapiens - pod5, sup,5mCG_5hmCG@latest, no models_dir, no reference - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test" + }, + "test_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-05T14:17:51.703252465", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - pod5, sup@latest, no models_dir, no reference - stub - with mm2-opts": { + "content": [ + { + "0": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_mm2" + }, + "test_mm2_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bai": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_mm2" + }, + "test_mm2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test_mm2" + }, + "test_mm2_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-04T23:27:16.661656309", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo sapiens - pod5, sup,5mCG_5hmCG@latest, with models_dir, no reference": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,711b6b3a32fe2a61516fef2d368c8714" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,711b6b3a32fe2a61516fef2d368c8714" + ] + ], + "log": [ + + ], + "summary": [ + + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-05T14:19:15.17694766", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - pod5, sup,5mCG_5hmCG@latest, no models_dir, with reference - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_aligned" + }, + "test_aligned_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bam": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test_aligned" + }, + "test_aligned_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-05T14:17:58.184322514", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - pod5, sup,5mCG_5hmCG@latest, with models_dir, with reference": { + "content": [ + { + "0": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.bam:md5,dee8d367b18e48bbf5b5f3d70648f758" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bam": [ + [ + { + "id": "test_aligned" + }, + "test_aligned.bam:md5,dee8d367b18e48bbf5b5f3d70648f758" + ] + ], + "log": [ + + ], + "summary": [ + + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-05T14:20:25.970992947", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - pod5, sup@latest, no models_dir, no reference - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ], + "bai": [ + [ + { + "id": "test" + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "test" + }, + "test_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_dorado": [ + [ + "DORADO_BASECALLER", + "dorado", + "1.4.0+ba44a01" + ] + ] + } + ], + "timestamp": "2026-04-04T23:27:08.448808645", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/dorado/basecaller/tests/nextflow.config b/modules/nf-core/dorado/basecaller/tests/nextflow.config new file mode 100644 index 000000000000..76d9247b6741 --- /dev/null +++ b/modules/nf-core/dorado/basecaller/tests/nextflow.config @@ -0,0 +1,26 @@ +process { + withName: 'DORADO_BASECALLER' { + ext.args = params.module_args ?: '' + ext.device = 'cuda:all' + // For local MSKCC HPC testing: override with pre-built SIF (faster than pulling Docker Hub). + // Remove this line to test with sahuno/dorado:1.4.0 pulled from Docker Hub via Singularity. + container = '/data1/greenbab/software/images/dorado_compiled_v1.4.0.sif' + } +} + +params { + // Pre-downloaded dorado models — avoids internet access on compute nodes + // Contains: dna_r10.4.1_e8.2_400bps_sup@v5.2.0 + 5mCG_5hmCG@v1 + 5mCG_5hmCG@v2 + models_dir = "${projectDir}/modules/nf-core/dorado/basecaller/tests/data/models" + + // hg38 reference for aligned BAM tests (Test 4) + test_reference = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta' + test_fai = '/data1/greenbab/database/hg38/v0/Homo_sapiens_assembly38.fasta.fai' +} + +// Prevent quay.io/ prefix — local SIF path is absolute, no registry needed +singularity.registry = '' +docker.registry = '' + +// Expose GPU to Singularity container (required for dorado CUDA access) +singularity.runOptions = '--nv' diff --git a/modules/nf-core/modkit/localize/environment.yml b/modules/nf-core/modkit/localize/environment.yml new file mode 100644 index 000000000000..62b9786305ae --- /dev/null +++ b/modules/nf-core/modkit/localize/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - ont-modkit=0.6.1 diff --git a/modules/nf-core/modkit/localize/main.nf b/modules/nf-core/modkit/localize/main.nf new file mode 100644 index 000000000000..08075195e596 --- /dev/null +++ b/modules/nf-core/modkit/localize/main.nf @@ -0,0 +1,52 @@ +process MODKIT_LOCALIZE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0': + 'biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }" + + input: + tuple val(meta), path(bedmethyl), path(tbi) + tuple val(meta2), path(sizes) + tuple val(meta3), path(regions) + + output: + tuple val(meta), path("*.tsv") , emit: tsv , optional: true + tuple val(meta), path("*.html") , emit: chart , optional: true + tuple val(meta), path("*.log") , emit: log , optional: true + tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def chart = "--chart ${prefix}.localize.html --name ${prefix}" + + """ + modkit \\ + localize \\ + $args \\ + --threads ${task.cpus} \\ + --genome-sizes $sizes \\ + --regions $regions \\ + $chart \\ + --force \\ + -o ${prefix}.localize.tsv \\ + $bedmethyl + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + touch ${prefix}.localize.tsv + touch ${prefix}.localize.html + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/modkit/localize/meta.yml b/modules/nf-core/modkit/localize/meta.yml new file mode 100644 index 000000000000..9aaf756f66e6 --- /dev/null +++ b/modules/nf-core/modkit/localize/meta.yml @@ -0,0 +1,117 @@ +name: modkit_localize +description: Investigate patterns of base modifications by aggregating pileup counts localized around genomic features of interest +keywords: + - methylation + - ont + - long-read + - localize + - enrichment +tools: + - "modkit": + description: A bioinformatics tool for working with modified bases in Oxford + Nanopore sequencing data + homepage: https://github.com/nanoporetech/modkit + documentation: https://github.com/nanoporetech/modkit + tool_dev_url: https://github.com/nanoporetech/modkit + licence: ["Oxford Nanopore Technologies PLC. Public License Version 1.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bedmethyl: + type: file + description: bgzip-compressed bedMethyl file from modkit pileup + pattern: "*.bed.gz" + ontologies: [] + - tbi: + type: file + description: Tabix index for the bedMethyl file + pattern: "*.bed.gz.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genome sizes information + e.g. `[ id:'hg38' ]` + - sizes: + type: file + description: TSV of genome sizes with columns chrom and size_in_bp + pattern: "*.{sizes,tsv,txt,genome}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing regions BED file information + e.g. `[ id:'CpG_islands' ]` + - regions: + type: file + description: | + BED file of regions to calculate enrichment around. These serve + as the points from which the window number of bases is centered + (e.g. CpG islands, gene bodies, repeat elements). + pattern: "*.bed" + ontologies: [] +output: + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.tsv": + type: file + description: TSV output with percent-modification vs. offset from feature midpoints + pattern: "*.tsv" + ontologies: [] + chart: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.html": + type: file + description: HTML document with interactive chart showing percent-modification + vs. offset + pattern: "*.html" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.log": + type: file + description: File for debug logs to be written to + pattern: "*.log" + ontologies: [] + versions_modkit: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/modkit/localize/plot/environment.yml b/modules/nf-core/modkit/localize/plot/environment.yml new file mode 100644 index 000000000000..46e866cde105 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-base=4.4.3 + - conda-forge::r-data.table=1.16.4 + - conda-forge::r-ggplot2=3.5.1 + - conda-forge::r-zoo=1.8_12 diff --git a/modules/nf-core/modkit/localize/plot/main.nf b/modules/nf-core/modkit/localize/plot/main.nf new file mode 100644 index 000000000000..397b88fed758 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/main.nf @@ -0,0 +1,53 @@ +process MODKIT_LOCALIZE_PLOT { + tag "$meta.id" + label 'process_single' + + // R packages (r-base, r-data.table, r-ggplot2, r-zoo) via Seqera Wave. + // URI built from environment.yml — freeze pending Docker Hub credentials setup. + // Track 2: replace with stable community.wave.seqera.io URI once frozen. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://wave.seqera.io/wt/ea44ebb9e07a/wave/build:r-base-4.4.3_r-data.table-1.16.4_r-ggplot2-3.5.1_r-zoo-1.8_12--1b699ccd4840da3d' : + 'wave.seqera.io/wt/ea44ebb9e07a/wave/build:r-base-4.4.3_r-data.table-1.16.4_r-ggplot2-3.5.1_r-zoo-1.8_12--1b699ccd4840da3d' }" + + input: + tuple val(meta), path(tsvs) + tuple val(meta2), path(samplesheet) + + output: + tuple val(meta), path("figures/png/*.png") , emit: png , optional: true + tuple val(meta), path("figures/pdf/*.pdf") , emit: pdf , optional: true + tuple val(meta), path("figures/svg/*.svg") , emit: svg , optional: true + tuple val(meta), path("*_combined.tsv") , emit: combined_tsv , optional: true + tuple val(meta), path("*_summary.tsv") , emit: summary_tsv , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'plot_localize_composite.R' + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + dir.create('figures/png', recursive = TRUE) + dir.create('figures/pdf', recursive = TRUE) + dir.create('figures/svg', recursive = TRUE) + file.create('figures/png/${prefix}_composite_lines_loess.png') + file.create('figures/pdf/${prefix}_composite_lines_loess.pdf') + file.create('figures/svg/${prefix}_composite_lines_loess.svg') + file.create('${prefix}_combined.tsv') + file.create('${prefix}_summary.tsv') + + r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version) + ), + 'versions.yml' + ) + """ +} diff --git a/modules/nf-core/modkit/localize/plot/meta.yml b/modules/nf-core/modkit/localize/plot/meta.yml new file mode 100644 index 000000000000..fb091da3ad7a --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/meta.yml @@ -0,0 +1,100 @@ +name: modkit_localize_plot +description: Composite visualization of modkit localize results overlaying multiple + samples colored by condition with configurable smoothing +keywords: + - methylation + - ont + - long-read + - localize + - visualization + - plot +tools: + - "r-ggplot2": + description: An implementation of the Grammar of Graphics in R + homepage: https://ggplot2.tidyverse.org + documentation: https://ggplot2.tidyverse.org/reference + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing analysis information + e.g. `[ id:'CpG_islands_5mC' ]` + - tsvs: + type: file + description: One or more TSV files from modkit localize, each containing + columns mod_code, offset, n_valid, n_mod, percent_modified + pattern: "*.tsv" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing samplesheet information + e.g. `[ id:'samplesheet' ]` + - samplesheet: + type: file + description: CSV or TSV with columns sample_id and condition. sample_id must + match substrings in the TSV filenames to assign conditions + pattern: "*.{csv,tsv}" + ontologies: [] +output: + png: + - - meta: + type: map + description: | + Groovy Map containing analysis information + - "figures/png/*.png": + type: file + description: PNG format composite plots + pattern: "*.png" + ontologies: [] + pdf: + - - meta: + type: map + description: | + Groovy Map containing analysis information + - "figures/pdf/*.pdf": + type: file + description: PDF format composite plots + pattern: "*.pdf" + ontologies: [] + svg: + - - meta: + type: map + description: | + Groovy Map containing analysis information + - "figures/svg/*.svg": + type: file + description: SVG format composite plots + pattern: "*.svg" + ontologies: [] + combined_tsv: + - - meta: + type: map + description: | + Groovy Map containing analysis information + - "*_combined.tsv": + type: file + description: Combined TSV with all samples annotated with condition + pattern: "*_combined.tsv" + ontologies: [] + summary_tsv: + - - meta: + type: map + description: | + Groovy Map containing analysis information + - "*_summary.tsv": + type: file + description: Per-condition summary TSV with mean and SE at each offset + pattern: "*_summary.tsv" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/modkit/localize/plot/templates/plot_localize_composite.R b/modules/nf-core/modkit/localize/plot/templates/plot_localize_composite.R new file mode 100644 index 000000000000..51f59ce85c83 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/templates/plot_localize_composite.R @@ -0,0 +1,318 @@ +#!/usr/bin/env Rscript + +# Author: Samuel Ahuno +# Date: 2026-04-04 +# Purpose: Composite plot of modkit localize results — overlay samples colored by condition +# with configurable smoothing (loess default, rolling_mean, binned) + +################################################ +## Functions ## +################################################ + +#' Parse out options from a string without recourse to optparse +#' +#' @param x Long-form argument list like --opt1 val1 --opt2 val2 +#' @return named list of options and values similar to optparse + +parse_args <- function(x){ + args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] + args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) + args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z}) + parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) + parsed_args[ ( ! parsed_args %in% c('', 'null')) & ! is.na(parsed_args)] +} + +################################################ +## Libraries ## +################################################ + +suppressPackageStartupMessages({ + library(data.table) + library(ggplot2) + library(zoo) +}) + +################################################ +## Parse options from ext.args ## +################################################ + +opt <- list( + prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), + samplesheet = '$samplesheet', + smooth = 'loess', + loess_span = 0.1, + rolling_window = 50, + bin_size = 50 +) + +# Override defaults with ext.args +args_opt <- parse_args('$task.ext.args') + +if ('smooth' %in% names(args_opt)) opt\$smooth <- args_opt\$smooth +if ('loess_span' %in% names(args_opt)) opt\$loess_span <- as.numeric(args_opt\$loess_span) +if ('rolling_window' %in% names(args_opt)) opt\$rolling_window <- as.integer(args_opt\$rolling_window) +if ('bin_size' %in% names(args_opt)) opt\$bin_size <- as.integer(args_opt\$bin_size) + +cat("Options:\\n") +cat(paste0(" prefix: ", opt\$prefix, "\\n")) +cat(paste0(" samplesheet: ", opt\$samplesheet, "\\n")) +cat(paste0(" smooth: ", opt\$smooth, "\\n")) +cat(paste0(" loess_span: ", opt\$loess_span, "\\n")) +cat(paste0(" rolling_window: ", opt\$rolling_window, "\\n")) +cat(paste0(" bin_size: ", opt\$bin_size, "\\n")) + +################################################ +## Create output directories ## +################################################ + +fig_dir <- "figures" +dir.create(file.path(fig_dir, "png"), showWarnings = FALSE, recursive = TRUE) +dir.create(file.path(fig_dir, "pdf"), showWarnings = FALSE, recursive = TRUE) +dir.create(file.path(fig_dir, "svg"), showWarnings = FALSE, recursive = TRUE) + +################################################ +## Load samplesheet and TSV files ## +################################################ + +# Samplesheet must have columns: sample_id, condition +# Optionally: tsv_path (if not provided, TSVs are matched by filename) +ss <- fread(opt\$samplesheet) +stopifnot("sample_id" %in% names(ss)) +stopifnot("condition" %in% names(ss)) + +cat(paste0("Samplesheet: ", nrow(ss), " samples, conditions: ", + paste(unique(ss\$condition), collapse = ", "), "\\n")) + +# Find all TSV files in current directory (staged by Nextflow) +tsv_files <- list.files(".", pattern = "\\\\.tsv\$", full.names = TRUE) +cat(paste0("Found ", length(tsv_files), " TSV files\\n")) + +# Load and annotate each TSV +dat_list <- lapply(tsv_files, function(f) { + d <- fread(f) + fname <- basename(f) + # Match to samplesheet by checking if sample_id is contained in filename + matched <- ss[sapply(ss\$sample_id, function(sid) grepl(sid, fname, fixed = TRUE)), ] + if (nrow(matched) == 0) { + warning(paste0("No samplesheet match for: ", fname, " — skipping")) + return(NULL) + } + d[, `:=`( + sample_id = matched\$sample_id[1], + condition = matched\$condition[1], + filename = fname + )] + return(d) +}) + +dat <- rbindlist(dat_list[!sapply(dat_list, is.null)]) +cat(paste0("Combined data: ", nrow(dat), " rows x ", ncol(dat), " columns\\n")) +cat(paste0("Matched samples: ", dat[, uniqueN(sample_id)], "\\n")) + +if (nrow(dat) == 0) { + stop("No data loaded — check samplesheet sample_id values match TSV filenames") +} + +################################################ +## Per-condition summary ## +################################################ + +dat_summary <- dat[, .( + mean_pct = mean(percent_modified, na.rm = TRUE), + se_pct = sd(percent_modified, na.rm = TRUE) / sqrt(.N), + n_samples = .N +), by = .(condition, offset)] +setorder(dat_summary, condition, offset) + +################################################ +## Smoothing functions ## +################################################ + +apply_smoothing <- function(dt, method, roll_w, bin_sz, l_span) { + if (method == "loess") { + cat(paste0("Applying LOESS smoothing (span = ", l_span, ")\\n")) + out <- dt[, { + fit_mean <- loess(mean_pct ~ offset, data = .SD, span = l_span) + # se_pct is NA when only 1 sample per condition; fall back to zero ribbon + if (all(is.na(se_pct))) { + smooth_se_vals <- rep(0, nrow(.SD)) + } else { + sd_sub <- .SD[!is.na(se_pct)] + fit_se <- loess(se_pct ~ offset, data = sd_sub, span = l_span) + smooth_se_vals <- predict(fit_se, newdata = .SD) + } + .(offset = offset, + smooth_pct = predict(fit_mean), + smooth_se = smooth_se_vals) + }, by = condition] + + } else if (method == "rolling_mean") { + cat(paste0("Applying rolling mean (window = ", roll_w, " bp)\\n")) + out <- dt[, { + .(offset = offset, + smooth_pct = rollmean(mean_pct, k = roll_w, fill = NA, align = "center"), + smooth_se = rollmean(se_pct, k = roll_w, fill = NA, align = "center")) + }, by = condition] + + } else if (method == "binned") { + cat(paste0("Applying offset binning (bin size = ", bin_sz, " bp)\\n")) + out <- dt[, { + bin_mid <- floor(offset / bin_sz) * bin_sz + bin_sz / 2 + .SD[, .(smooth_pct = mean(mean_pct, na.rm = TRUE), + smooth_se = mean(se_pct, na.rm = TRUE)), + by = .(offset = bin_mid)] + }, by = condition] + + } else { + stop(paste0("Unknown smoothing method: ", method)) + } + + return(out[!is.na(smooth_pct)]) +} + +dat_smooth <- apply_smoothing(dat_summary, opt\$smooth, opt\$rolling_window, opt\$bin_size, opt\$loess_span) +cat(paste0("Smoothed data: ", nrow(dat_smooth), " rows\\n")) + +################################################ +## Build labels ## +################################################ + +method_label <- switch(opt\$smooth, + loess = paste0("LOESS (span = ", opt\$loess_span, ")"), + rolling_mean = paste0("Rolling mean (", opt\$rolling_window, " bp window)"), + binned = paste0("Binned (", opt\$bin_size, " bp bins)") +) + +method_suffix <- switch(opt\$smooth, + loess = "loess", + rolling_mean = paste0("rollmean", opt\$rolling_window, "bp"), + binned = paste0("binned", opt\$bin_size, "bp") +) + +# Detect unique conditions for dynamic color mapping (Okabe-Ito palette) +conditions <- sort(unique(dat\$condition)) +okabe_ito <- c("#D55E00", "#0072B2", "#009E73", "#E69F00", "#56B4E9", "#CC79A7", "#F0E442", "#999999") +condition_colors <- setNames(okabe_ito[seq_along(conditions)], conditions) + +n_per_cond <- dat[, .(n = uniqueN(sample_id)), by = condition] +cond_label <- paste(n_per_cond\$n, n_per_cond\$condition, collapse = " + ") + +################################################ +## Plot 1: Per-sample lines (smoothed) ## +################################################ + +cat("Generating composite line plot\\n") + +if (opt\$smooth == "loess") { + p1 <- ggplot(dat, aes(x = offset, y = percent_modified, group = sample_id, color = condition)) + + geom_smooth(method = "loess", span = opt\$loess_span, se = FALSE, linewidth = 0.8, alpha = 0.6) + + scale_color_manual(values = condition_colors, name = "Condition") + + labs( + title = paste0("Methylation Around Features (", opt\$prefix, ")"), + subtitle = paste0("Per-sample ", method_label, " | ", cond_label), + x = "Offset from feature midpoint (bp)", + y = "Percent modified (%)" + ) + + theme_bw(base_size = 25, base_family = "Arial") + + theme(plot.title = element_text(face = "bold"), + legend.position = "top", + panel.grid.minor = element_blank()) +} else { + dat_per_sample <- dat[, { + setorder(.SD, offset) + if (opt\$smooth == "rolling_mean") { + .(offset = offset, + smooth_pct = rollmean(percent_modified, k = opt\$rolling_window, fill = NA, align = "center")) + } else { + bin_mid <- floor(offset / opt\$bin_size) * opt\$bin_size + opt\$bin_size / 2 + .SD[, .(smooth_pct = mean(percent_modified, na.rm = TRUE)), by = .(offset = bin_mid)] + } + }, by = .(sample_id, condition)] + dat_per_sample <- dat_per_sample[!is.na(smooth_pct)] + + p1 <- ggplot(dat_per_sample, aes(x = offset, y = smooth_pct, group = sample_id, color = condition)) + + geom_line(alpha = 0.6, linewidth = 0.8) + + scale_color_manual(values = condition_colors, name = "Condition") + + labs( + title = paste0("Methylation Around Features (", opt\$prefix, ")"), + subtitle = paste0("Per-sample ", method_label, " | ", cond_label), + x = "Offset from feature midpoint (bp)", + y = "Percent modified (%)" + ) + + theme_bw(base_size = 25, base_family = "Arial") + + theme(plot.title = element_text(face = "bold"), + legend.position = "top", + panel.grid.minor = element_blank()) +} + +################################################ +## Plot 2: Mean +/- SE ribbon (smoothed) ## +################################################ + +cat("Generating mean +/- SE ribbon plot\\n") + +p2 <- ggplot(dat_smooth, aes(x = offset, y = smooth_pct, color = condition, fill = condition)) + + geom_ribbon(aes(ymin = smooth_pct - smooth_se, ymax = smooth_pct + smooth_se), + alpha = 0.25, color = NA) + + geom_line(linewidth = 1) + + scale_color_manual(values = condition_colors, name = "Condition") + + scale_fill_manual(values = condition_colors, name = "Condition") + + labs( + title = paste0("Methylation Around Features (", opt\$prefix, ")"), + subtitle = paste0("Mean +/- SE | ", method_label, " | ", cond_label), + x = "Offset from feature midpoint (bp)", + y = "Percent modified (%)" + ) + + theme_bw(base_size = 25, base_family = "Arial") + + theme(plot.title = element_text(face = "bold"), + legend.position = "top", + panel.grid.minor = element_blank()) + +################################################ +## Save figures ## +################################################ + +cat("Saving figures\\n") + +save_fig <- function(p, prefix, w = 12, h = 8) { + ggsave(file.path(fig_dir, "png", paste0(prefix, ".png")), p, width = w, height = h, dpi = 300) + ggsave(file.path(fig_dir, "pdf", paste0(prefix, ".pdf")), p, width = w, height = h, device = cairo_pdf) + ggsave(file.path(fig_dir, "svg", paste0(prefix, ".svg")), p, width = w, height = h) + cat(paste0(" Saved: ", prefix, " (png, pdf, svg)\\n")) +} + +save_fig(p1, paste0(opt\$prefix, "_composite_lines_", method_suffix)) +save_fig(p2, paste0(opt\$prefix, "_mean_ribbon_", method_suffix)) + +################################################ +## Save combined data ## +################################################ + +fwrite(dat[, .(sample_id, condition, mod_code, offset, n_valid, n_mod, percent_modified)], + paste0(opt\$prefix, "_combined.tsv"), sep = "\t") +cat(paste0("Saved combined TSV: ", nrow(dat), " rows\\n")) + +fwrite(dat_summary, paste0(opt\$prefix, "_summary.tsv"), sep = "\t") +cat(paste0("Saved summary TSV: ", nrow(dat_summary), " rows\\n")) + +################################################ +## Versions ## +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +dt.version <- as.character(packageVersion('data.table')) +gg.version <- as.character(packageVersion('ggplot2')) +zoo.version <- as.character(packageVersion('zoo')) + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' r-data.table:', dt.version), + paste(' r-ggplot2:', gg.version), + paste(' r-zoo:', zoo.version) + ), + 'versions.yml' +) + +cat("Done\\n") diff --git a/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv b/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv new file mode 100644 index 000000000000..2973c34f1f84 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv @@ -0,0 +1,3 @@ +sample_id,condition +tumor_sample1,Tumor +wholeblood_sample2,WholeBlood diff --git a/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv b/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv new file mode 100644 index 000000000000..6b934c872fed --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv @@ -0,0 +1,102 @@ +mod_code offset n_valid n_mod percent_modified +m -500 55 21 39.36 +m -490 51 20 39.23 +m -480 89 36 41.49 +m -470 31 14 48.20 +m -460 31 12 39.11 +m -450 47 18 39.54 +m -440 91 37 41.01 +m -430 45 21 47.08 +m -420 73 28 38.75 +m -410 48 15 32.88 +m -400 20 7 36.74 +m -390 40 16 41.95 +m -380 39 15 39.35 +m -370 47 17 36.63 +m -360 31 14 45.49 +m -350 68 27 40.11 +m -340 97 48 49.49 +m -330 53 25 47.32 +m -320 88 39 44.90 +m -310 35 12 34.79 +m -300 90 42 47.51 +m -290 57 24 42.27 +m -280 66 30 46.82 +m -270 93 35 37.86 +m -260 49 22 45.13 +m -250 57 26 46.76 +m -240 32 17 55.35 +m -230 68 30 45.26 +m -220 66 30 45.58 +m -210 40 21 54.50 +m -200 54 24 45.87 +m -190 29 14 51.46 +m -180 51 24 47.36 +m -170 40 19 48.58 +m -160 91 43 47.47 +m -150 48 25 53.06 +m -140 27 13 49.10 +m -130 49 22 44.92 +m -120 71 41 58.47 +m -110 54 25 47.15 +m -100 92 60 66.13 +m -90 60 36 61.17 +m -80 78 45 58.72 +m -70 38 24 63.66 +m -60 91 52 58.11 +m -50 88 55 62.67 +m -40 94 55 58.77 +m -30 71 47 66.21 +m -20 37 18 48.69 +m -10 85 62 73.08 +m 0 34 17 51.61 +m 10 39 23 60.30 +m 20 74 39 53.65 +m 30 96 51 53.24 +m 40 79 50 63.82 +m 50 87 52 60.82 +m 60 21 12 58.41 +m 70 34 21 64.28 +m 80 54 29 54.77 +m 90 63 32 51.06 +m 100 78 46 60.10 +m 110 20 11 58.73 +m 120 53 34 64.31 +m 130 84 42 50.82 +m 140 33 17 53.39 +m 150 100 46 46.19 +m 160 97 47 49.18 +m 170 45 25 57.33 +m 180 89 48 54.60 +m 190 87 48 55.91 +m 200 82 44 54.11 +m 210 22 9 44.19 +m 220 59 29 50.45 +m 230 50 24 49.24 +m 240 30 16 55.15 +m 250 30 14 48.64 +m 260 88 38 43.41 +m 270 36 12 34.82 +m 280 90 42 47.43 +m 290 41 19 47.19 +m 300 74 30 41.75 +m 310 47 24 52.46 +m 320 45 22 49.62 +m 330 59 22 38.15 +m 340 67 23 35.52 +m 350 76 34 45.74 +m 360 51 23 45.55 +m 370 48 18 37.72 +m 380 90 37 41.76 +m 390 49 20 41.08 +m 400 100 40 40.06 +m 410 27 10 40.16 +m 420 62 25 41.83 +m 430 29 14 51.09 +m 440 82 29 36.25 +m 450 47 18 39.86 +m 460 93 30 32.41 +m 470 93 35 38.19 +m 480 72 22 31.49 +m 490 44 18 41.59 +m 500 65 30 46.16 diff --git a/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv b/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv new file mode 100644 index 000000000000..62121162c2c3 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv @@ -0,0 +1,102 @@ +mod_code offset n_valid n_mod percent_modified +m -500 74 17 24.17 +m -490 26 3 11.63 +m -480 32 8 25.41 +m -470 33 9 27.64 +m -460 51 11 23.18 +m -450 37 8 22.45 +m -440 74 19 26.06 +m -430 29 6 22.59 +m -420 76 19 25.49 +m -410 32 7 23.99 +m -400 26 2 11.38 +m -390 21 3 17.08 +m -380 31 4 15.71 +m -370 41 12 29.65 +m -360 72 12 16.79 +m -350 71 12 17.86 +m -340 27 5 21.84 +m -330 69 15 21.95 +m -320 53 11 22.34 +m -310 56 16 30.21 +m -300 74 13 18.80 +m -290 91 18 20.42 +m -280 82 13 15.86 +m -270 27 7 26.32 +m -260 94 26 27.92 +m -250 60 14 24.83 +m -240 27 6 23.79 +m -230 87 27 31.58 +m -220 40 11 28.59 +m -210 43 14 33.08 +m -200 28 8 30.30 +m -190 50 11 22.77 +m -180 71 18 25.52 +m -170 51 19 38.19 +m -160 94 36 38.51 +m -150 73 19 26.37 +m -140 94 27 29.05 +m -130 53 15 29.74 +m -120 46 15 32.79 +m -110 53 17 33.19 +m -100 70 22 32.21 +m -90 78 32 41.61 +m -80 60 25 42.62 +m -70 21 10 48.51 +m -60 78 26 33.41 +m -50 32 10 34.26 +m -40 29 10 34.86 +m -30 36 12 33.83 +m -20 64 24 38.43 +m -10 67 31 47.49 +m 0 56 18 33.00 +m 10 58 26 45.14 +m 20 98 46 47.75 +m 30 87 42 48.63 +m 40 21 8 38.38 +m 50 33 11 35.78 +m 60 37 12 32.93 +m 70 90 33 36.98 +m 80 39 18 47.80 +m 90 63 22 35.75 +m 100 46 19 42.76 +m 110 84 26 31.54 +m 120 82 20 25.48 +m 130 26 8 33.66 +m 140 31 13 43.86 +m 150 25 6 25.88 +m 160 20 4 24.30 +m 170 53 15 29.20 +m 180 40 12 32.03 +m 190 74 21 28.62 +m 200 91 19 21.90 +m 210 39 11 29.48 +m 220 89 23 26.94 +m 230 90 27 30.85 +m 240 38 10 26.64 +m 250 66 15 23.67 +m 260 25 6 25.08 +m 270 51 13 26.72 +m 280 33 7 21.47 +m 290 72 13 19.20 +m 300 99 27 27.80 +m 310 50 11 22.33 +m 320 40 4 10.65 +m 330 72 22 30.80 +m 340 23 4 20.17 +m 350 72 18 26.20 +m 360 51 16 31.39 +m 370 33 6 20.03 +m 380 68 20 29.56 +m 390 48 13 27.55 +m 400 45 6 13.45 +m 410 59 13 22.73 +m 420 49 7 15.32 +m 430 71 15 21.57 +m 440 62 17 27.51 +m 450 55 10 19.88 +m 460 64 14 22.05 +m 470 88 14 16.96 +m 480 62 10 16.20 +m 490 53 11 22.39 +m 500 42 8 19.16 diff --git a/modules/nf-core/modkit/localize/plot/tests/main.nf.test b/modules/nf-core/modkit/localize/plot/tests/main.nf.test new file mode 100644 index 000000000000..465d0e9ec3f9 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/main.nf.test @@ -0,0 +1,148 @@ +nextflow_process { + + name "Test Process MODKIT_LOCALIZE_PLOT" + script "../main.nf" + process "MODKIT_LOCALIZE_PLOT" + + tag "modules" + tag "modules_nfcore" + tag "modkit" + tag "modkit/localize" + tag "modkit/localize/plot" + + // ------------------------------------------------------------------------- + // Stub test — runs without container, validates output file structure + // ------------------------------------------------------------------------- + + test("homo sapiens - 2 localize TSVs, Tumor vs WholeBlood, loess - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id: 'CpG_islands_5mC' ], + [ + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv", checkIfExists: true), + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv", checkIfExists: true) + ] + ] + input[1] = [ + [ id: 'samplesheet' ], + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + // ------------------------------------------------------------------------- + // Real tests — require R container (r-base, r-ggplot2, r-data.table, r-zoo) + // ------------------------------------------------------------------------- + + test("homo sapiens - 2 localize TSVs, Tumor vs WholeBlood, loess smoothing") { + + when { + params { + module_args = '--smooth loess --loess_span 0.1' + } + process { + """ + input[0] = [ + [ id: 'CpG_islands_5mC' ], + [ + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv", checkIfExists: true), + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv", checkIfExists: true) + ] + ] + input[1] = [ + [ id: 'samplesheet' ], + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.png }, + { assert process.out.pdf }, + { assert process.out.svg } + ) + } + } + + test("homo sapiens - 2 localize TSVs, Tumor vs WholeBlood, rolling_mean smoothing") { + + when { + params { + module_args = '--smooth rolling_mean --rolling_window 30' + } + process { + """ + input[0] = [ + [ id: 'CpG_islands_5mC' ], + [ + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv", checkIfExists: true), + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv", checkIfExists: true) + ] + ] + input[1] = [ + [ id: 'samplesheet' ], + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.png } + ) + } + } + + test("homo sapiens - 2 localize TSVs, Tumor vs WholeBlood, binned smoothing") { + + when { + params { + module_args = '--smooth binned --bin_size 50' + } + process { + """ + input[0] = [ + [ id: 'CpG_islands_5mC' ], + [ + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/tumor_sample1.localize.tsv", checkIfExists: true), + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/wholeblood_sample2.localize.tsv", checkIfExists: true) + ] + ] + input[1] = [ + [ id: 'samplesheet' ], + file("${projectDir}/modules/nf-core/modkit/localize/plot/tests/data/samplesheet.csv", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.png } + ) + } + } +} diff --git a/modules/nf-core/modkit/localize/plot/tests/main.nf.test.snap b/modules/nf-core/modkit/localize/plot/tests/main.nf.test.snap new file mode 100644 index 000000000000..2d1279bacf49 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/main.nf.test.snap @@ -0,0 +1,99 @@ +{ + "homo sapiens - 2 localize TSVs, Tumor vs WholeBlood, loess - stub": { + "content": [ + { + "0": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_combined.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,94ae1432c4a888ad60cf7a3a4d726cef" + ], + "combined_tsv": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_combined.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_tsv": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "svg": [ + [ + { + "id": "CpG_islands_5mC" + }, + "CpG_islands_5mC_composite_lines_loess.svg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,94ae1432c4a888ad60cf7a3a4d726cef" + ] + } + ], + "timestamp": "2026-04-05T19:56:08.672658776", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/modkit/localize/plot/tests/nextflow.config b/modules/nf-core/modkit/localize/plot/tests/nextflow.config new file mode 100644 index 000000000000..30471dfb2f23 --- /dev/null +++ b/modules/nf-core/modkit/localize/plot/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: 'MODKIT_LOCALIZE_PLOT' { + ext.args = params.module_args ?: '' + } +} + +// Prevent quay.io/ prefix — Wave URI is not on quay.io +singularity.registry = '' +docker.registry = '' diff --git a/modules/nf-core/modkit/localize/tests/main.nf.test b/modules/nf-core/modkit/localize/tests/main.nf.test new file mode 100644 index 000000000000..31cf26099aa8 --- /dev/null +++ b/modules/nf-core/modkit/localize/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_process { + + name "Test Process MODKIT_LOCALIZE" + script "../main.nf" + process "MODKIT_LOCALIZE" + + tag "modules" + tag "modules_nfcore" + tag "modkit" + tag "modkit/localize" + tag "modkit/pileup" + tag "tabix/tabix" + + setup { + run("MODKIT_PILEUP") { + script "../../pileup/main.nf" + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) + ] + input[1] = [[],[],[]] + input[2] = [[],[]] + """ + } + } + + run("TABIX_TABIX") { + script "../../../tabix/tabix/main.nf" + process { + """ + input[0] = MODKIT_PILEUP.out.bedgz + """ + } + } + } + + test("homo sapiens - bedmethyl, fai as sizes, with regions bed") { + + when { + params { + // Use low min-coverage and small window to work with sparse test data + module_args = '--min-coverage 1 --window 1000' + } + process { + """ + input[0] = MODKIT_PILEUP.out.bedgz + .join(TABIX_TABIX.out.index) + .map { meta, bed, tbi -> [ meta, bed, tbi ] } + input[1] = [ + [ id: 'hg38' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = Channel.of('chr22\\t0\\t1000') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo sapiens - bedmethyl, fai as sizes, with regions bed - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = MODKIT_PILEUP.out.bedgz + .join(TABIX_TABIX.out.index) + .map { meta, bed, tbi -> [ meta, bed, tbi ] } + input[1] = [ + [ id: 'hg38' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = Channel.of('chr22\\t0\\t1000') + .collectFile(name: 'chr22.bed', newLine: true) + .map { file -> [ [ id:'chr22' ], file ] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/modkit/localize/tests/main.nf.test.snap b/modules/nf-core/modkit/localize/tests/main.nf.test.snap new file mode 100644 index 000000000000..665be32f2238 --- /dev/null +++ b/modules/nf-core/modkit/localize/tests/main.nf.test.snap @@ -0,0 +1,138 @@ +{ + "homo sapiens - bedmethyl, fai as sizes, with regions bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.localize.tsv:md5,ac76b53c5b5420238b05d5892f2a8650" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.localize.html:md5,d493c990d63446c2d2c56fe9f6b8814c" + ] + ], + "2": [ + + ], + "3": [ + [ + "MODKIT_LOCALIZE", + "modkit", + "0.6.1" + ] + ], + "chart": [ + [ + { + "id": "test" + }, + "test.localize.html:md5,d493c990d63446c2d2c56fe9f6b8814c" + ] + ], + "log": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.localize.tsv:md5,ac76b53c5b5420238b05d5892f2a8650" + ] + ], + "versions_modkit": [ + [ + "MODKIT_LOCALIZE", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-04T21:28:53.66618517", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo sapiens - bedmethyl, fai as sizes, with regions bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.localize.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.localize.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "MODKIT_LOCALIZE", + "modkit", + "0.6.1" + ] + ], + "chart": [ + [ + { + "id": "test" + }, + "test.localize.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.localize.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_modkit": [ + [ + "MODKIT_LOCALIZE", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-04T21:26:37.015023747", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/modkit/localize/tests/nextflow.config b/modules/nf-core/modkit/localize/tests/nextflow.config new file mode 100644 index 000000000000..b4828bda05e8 --- /dev/null +++ b/modules/nf-core/modkit/localize/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'MODKIT_LOCALIZE' { + ext.args = params.module_args ?: '' + } + withName: 'TABIX_TABIX' { + ext.args = '-p bed' + } +}