diff --git a/assets/software_references.yml b/assets/software_references.yml index f80aa0e84..c8786b640 100644 --- a/assets/software_references.yml +++ b/assets/software_references.yml @@ -195,3 +195,6 @@ tool: perl-math-cdf: citation: "perl-math-cdf" bibliography: "" + mitorsaw: + citation: "Mitorsaw" + bibliography: "" diff --git a/conf/modules/call_mitochondrial_variants.config b/conf/modules/call_mitochondrial_variants.config new file mode 100644 index 000000000..609f8e1a7 --- /dev/null +++ b/conf/modules/call_mitochondrial_variants.config @@ -0,0 +1,33 @@ + /* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + + +process { + + withName: '.*:CALL_MITOCHONDRIAL_VARIANTS:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*:CALL_MITOCHONDRIAL_VARIANTS:DEEPVARIANT_RUNDEEPVARIANT' { + tag = {"${meta.id}_${meta.region.baseName}"} + ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" } + ext.args = { + [ + "--sample_name=${meta.id}", + "--model_type=${params.deepvariant_model_type}", + meta.sex == 1 ? '--haploid_contigs="chrX,chrY"' : '', + ].join(' ') + } + } +} diff --git a/conf/modules/gvcf_glnexus_norm_variants.config b/conf/modules/gvcf_glnexus_norm_variants.config index b51dad7b2..0b25854bc 100644 --- a/conf/modules/gvcf_glnexus_norm_variants.config +++ b/conf/modules/gvcf_glnexus_norm_variants.config @@ -19,9 +19,9 @@ process { } withName: '.*:GVCF_GLNEXUS_NORM_VARIANTS:VCFEXPRESS' { - ext.prefix = { "${meta.id}_${meta.sv_caller}_found_in" } + ext.prefix = { "${meta.id}_${meta.caller}_found_in" } ext.suffix = { "bcf.gz" } - ext.args = { "-s \'FOUND_IN=return \"${meta.sv_caller}\"\' -e \"return true\"" } + ext.args = { "-s \'FOUND_IN=return \"${meta.caller}\"\' -e \"return true\"" } } withName: '.*:GVCF_GLNEXUS_NORM_VARIANTS:GLNEXUS' { @@ -51,4 +51,11 @@ process { ext.prefix = { intervals ? "${meta.id}_${intervals}_sentieon" : "${meta.id}_sentieon" } } + withName: '.*:GVCF_GLNEXUS_NORM_VARIANTS:BCFTOOLS_MERGE' { + ext.args = [ + '--force-single' + ].join(' ') + ext.prefix = { "${meta.id}_merged" } + } + } diff --git a/main.nf b/main.nf index 75d74a74c..6cd624e5f 100644 --- a/main.nf +++ b/main.nf @@ -94,6 +94,7 @@ workflow GENOMICMEDICINESWEDEN_NALLO { val_phaser val_plot_chromograph_autozygosity val_plot_chromograph_coverage + val_preset val_pre_vep_snv_filter_expression val_run_methbat val_run_modkit @@ -197,6 +198,7 @@ workflow GENOMICMEDICINESWEDEN_NALLO { val_phaser, val_plot_chromograph_autozygosity, val_plot_chromograph_coverage, + val_preset, val_pre_vep_snv_filter_expression, val_run_methbat, val_run_modkit, @@ -324,6 +326,7 @@ workflow { params.phaser, params.plot_chromograph_autozygosity, params.plot_chromograph_coverage, + params.preset, params.pre_vep_snv_filter_expression, params.run_methbat, params.run_modkit, diff --git a/modules.json b/modules.json index 650bd643b..17f489f79 100644 --- a/modules.json +++ b/modules.json @@ -236,6 +236,11 @@ "git_sha": "14980f759266eec42dac401fcafeb83d6c957b41", "installed_by": ["modules"] }, + "mitorsaw/haplotype": { + "branch": "master", + "git_sha": "ae0ea82513abe5818d3c0b1026e78ca15c3bbf4f", + "installed_by": ["modules"] + }, "modkit/bedmethyltobigwig": { "branch": "master", "git_sha": "3d81317a30d1016b533982d6b84df07713ae520a", diff --git a/modules/nf-core/mitorsaw/haplotype/environment.yml b/modules/nf-core/mitorsaw/haplotype/environment.yml new file mode 100644 index 000000000..cd5933e2c --- /dev/null +++ b/modules/nf-core/mitorsaw/haplotype/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mitorsaw=0.2.7" diff --git a/modules/nf-core/mitorsaw/haplotype/main.nf b/modules/nf-core/mitorsaw/haplotype/main.nf new file mode 100644 index 000000000..db143b760 --- /dev/null +++ b/modules/nf-core/mitorsaw/haplotype/main.nf @@ -0,0 +1,78 @@ +process MITORSAW_HAPLOTYPE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mitorsaw:0.2.7--h9ee0642_0': + 'biocontainers/mitorsaw:0.2.7--h9ee0642_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta), path(fai) + val(include_hap_stats) + val(include_debug_output) + + output: + tuple val(meta), path("*${prefix}.vcf.gz"), emit: vcf + tuple val(meta), path("*${prefix}.vcf.gz.tbi"), emit: tbi + tuple val(meta), path("${prefix}.json"), emit: stats, optional: true + tuple val(meta), path("${prefix}_debug/coverage_stats.json"), emit: coverage_stats, optional: true + tuple val(meta), path("${prefix}_debug/sequences_chrM.fa"), emit: sequences_chrM, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_alignments.bam"), emit: custom_alignments_bam, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_alignments.bam.bai"), emit: custom_alignments_bai, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_igv_session.xml"), emit: igv_session, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_reference.fa"), emit: custom_ref_fa, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_reference.fa.fai"), emit: custom_ref_fai, optional: true + tuple val(meta), path("${prefix}_debug/mito_igv_custom/custom_regions.bed"), emit: custom_regions, optional: true + + tuple val("${task.process}"), val('mitorsaw'), eval("mitorsaw --version | sed 's/mitorsaw //'"), topic: versions, emit: versions_mitorsaw + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def output_hap_stats = include_hap_stats ? "--output-hap-stats ${prefix}.json" : '' + def debug_output = include_debug_output ? "--output-debug ${prefix}_debug" : '' + + """ + mitorsaw haplotype \\ + $args \\ + --reference ${fasta} \\ + --bam ${bam} \\ + --output-vcf ${prefix}.vcf.gz \\ + ${output_hap_stats} \\ + ${debug_output} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def touch_hap_stats = include_hap_stats ? "touch ${prefix}.json" : '' + def debug_dir = "${prefix}_debug" + def igv_dir = "${debug_dir}/mito_igv_custom" + def mkdir_debug = include_debug_output ? "mkdir -p ${igv_dir}" : '' + def debug_files = [ + "${debug_dir}/coverage_stats.json", + "${debug_dir}/sequences_chrM.fa", + "${igv_dir}/custom_alignments.bam", + "${igv_dir}/custom_alignments.bam.bai", + "${igv_dir}/custom_igv_session.xml", + "${igv_dir}/custom_reference.fa", + "${igv_dir}/custom_reference.fa.fai", + "${igv_dir}/custom_regions.bed" + ] + def touch_debug_files = include_debug_output ? "touch ${debug_files.join(' ')}" : '' + """ + echo $args + + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + ${touch_hap_stats} + ${mkdir_debug} + ${touch_debug_files} + """ +} diff --git a/modules/nf-core/mitorsaw/haplotype/meta.yml b/modules/nf-core/mitorsaw/haplotype/meta.yml new file mode 100644 index 000000000..f1dc893e7 --- /dev/null +++ b/modules/nf-core/mitorsaw/haplotype/meta.yml @@ -0,0 +1,210 @@ +name: "mitorsaw_haplotype" +description: Mitorsaw analyses mitochondrial variants and identifies + heteroplasmy and homoplasmy +keywords: + - heteroplasmy + - homoplasmy + - mitochondrial + - mitorsaw + - haplotype +tools: + - mitorsaw: + description: "A tool for mitochondrial analysis for HiFi sequencing data" + homepage: "https://github.com/PacificBiosciences/mitorsaw" + documentation: "https://github.com/PacificBiosciences/mitorsaw/tree/main/docs" + tool_dev_url: "https://github.com/PacificBiosciences/mitorsaw" + licence: ["Pacific Biosciences Software License Agreement"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - bai: + type: file + description: BAM index file + pattern: "*.bai" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'genome' ]` + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta}" + ontologies: [] + - fai: + type: file + description: Reference genome index file + pattern: "*.fai" + ontologies: [] + - include_hap_stats: + type: boolean + description: Whether to include haplotype statistics output file + default: false + - include_debug_output: + type: boolean + description: Whether to generate debug output files + default: false + +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - "*${prefix}.vcf.gz": + type: file + description: Compressed VCF file containing haplotype information + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - "*${prefix}.vcf.gz.tbi": + type: file + description: Index file for the compressed VCF file + pattern: "*.vcf.gz.tbi" + ontologies: [] + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.json: + type: file + description: Mitorsaw statistics in JSON format + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + coverage_stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/coverage_stats.json: + type: file + description: Coverage statistics in JSON format + pattern: "coverage_stats.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + sequences_chrM: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/sequences_chrM.fa: + type: file + description: Haplotype sequences in FASTA format + pattern: "sequences_chrM.fa" + ontologies: [] + custom_alignments_bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_alignments.bam: + type: file + description: Custom alignments in BAM format + pattern: "custom_alignments.bam" + ontologies: [] + custom_alignments_bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_alignments.bam.bai: + type: file + description: Custom alignments index file + pattern: "custom_alignments.bam.bai" + ontologies: [] + igv_session: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_igv_session.xml: + type: file + description: IGV session file in XML format + pattern: "custom_igv_session.xml" + ontologies: + - edam: http://edamontology.org/format_2332 # XML + custom_ref_fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_reference.fa: + type: file + description: Custom reference genome in FASTA format + pattern: "custom_reference.fa" + ontologies: [] + custom_ref_fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_reference.fa.fai: + type: file + description: Custom reference genome index file + pattern: "custom_reference.fa.fai" + ontologies: [] + custom_regions: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}_debug/mito_igv_custom/custom_regions.bed: + type: file + description: Custom regions in BED format + pattern: "custom_regions.bed" + ontologies: [] + versions_mitorsaw: + - - ${task.process}: + type: string + description: The name of the process + - mitorsaw: + type: string + description: The name of the tool + - mitorsaw --version | sed 's/mitorsaw //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - mitorsaw: + type: string + description: The name of the tool + - mitorsaw --version | sed 's/mitorsaw //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@eliottBo" +maintainers: + - "@eliottBo" diff --git a/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test b/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test new file mode 100644 index 000000000..b92aca23c --- /dev/null +++ b/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test @@ -0,0 +1,199 @@ +nextflow_process { + + name "Test Process MITORSAW_HAPLOTYPE" + script "../main.nf" + process "MITORSAW_HAPLOTYPE" + + tag "modules" + tag "modules_nfcore" + tag "mitorsaw" + tag "mitorsaw/haplotype" + + test("homo_sapiens - bam") { + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = false + input[3] = false + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = false + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam - hap_stats") { + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam - hap_stats - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam - debug_files") { + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam - debug_files - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'sample_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/200080.GRCh38.haplotagged.chrM.downsampled.bam.bai', checkIfExists: true), + ] + input[1] = [ + [ id:'sample_1'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/raredisease/reference/Homo_sapiens_assembly38_chr20_chrM.fasta.fai', checkIfExists: true), + ] + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test.snap b/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test.snap new file mode 100644 index 000000000..24feba93c --- /dev/null +++ b/modules/nf-core/mitorsaw/haplotype/tests/main.nf.test.snap @@ -0,0 +1,848 @@ +{ + "homo_sapiens - bam - hap_stats - stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + [ + { + "id": "sample_1" + }, + "sample_1.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "coverage_stats": [ + + ], + "custom_alignments_bai": [ + + ], + "custom_alignments_bam": [ + + ], + "custom_ref_fa": [ + + ], + "custom_ref_fai": [ + + ], + "custom_regions": [ + + ], + "igv_session": [ + + ], + "sequences_chrM": [ + + ], + "stats": [ + [ + { + "id": "sample_1" + }, + "sample_1.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:46:15.780146311" + }, + "homo_sapiens - bam - debug_files - stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "sample_1" + }, + "custom_regions.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "sample_1" + }, + "coverage_stats.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "sample_1" + }, + "sequences_chrM.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "sample_1" + }, + "custom_igv_session.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "coverage_stats": [ + [ + { + "id": "sample_1" + }, + "coverage_stats.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "custom_alignments_bai": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "custom_alignments_bam": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "custom_ref_fa": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "custom_ref_fai": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "custom_regions": [ + [ + { + "id": "sample_1" + }, + "custom_regions.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_session": [ + [ + { + "id": "sample_1" + }, + "custom_igv_session.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sequences_chrM": [ + [ + { + "id": "sample_1" + }, + "sequences_chrM.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:59:36.603127961" + }, + "homo_sapiens - bam - hap_stats": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,928072f78d56a38a28ed9f0965be93a5" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,bf5f131aec0c14a8273e15a1f84efa76" + ] + ], + "10": [ + + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + [ + { + "id": "sample_1" + }, + "sample_1.json:md5,40ce87c755aa6af2e1b1c5395ab50152" + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "coverage_stats": [ + + ], + "custom_alignments_bai": [ + + ], + "custom_alignments_bam": [ + + ], + "custom_ref_fa": [ + + ], + "custom_ref_fai": [ + + ], + "custom_regions": [ + + ], + "igv_session": [ + + ], + "sequences_chrM": [ + + ], + "stats": [ + [ + { + "id": "sample_1" + }, + "sample_1.json:md5,40ce87c755aa6af2e1b1c5395ab50152" + ] + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,bf5f131aec0c14a8273e15a1f84efa76" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,928072f78d56a38a28ed9f0965be93a5" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:46:10.302018786" + }, + "homo_sapiens - bam - debug_files": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,f5d7621e9f51bfbbc96c5b25f6b0e334" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,6539aa978661ff9501d33c7dadc4f4fa" + ] + ], + "10": [ + [ + { + "id": "sample_1" + }, + "custom_regions.bed:md5,18a863068ed755ad17917adf1f606d86" + ] + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "sample_1" + }, + "coverage_stats.json:md5,69873c0b7c918cf5b499532f4d6f4187" + ] + ], + "4": [ + [ + { + "id": "sample_1" + }, + "sequences_chrM.fa:md5,ab16890c0b716b322092030915994924" + ] + ], + "5": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam:md5,f6c7c3e5b7d47ee622c60d0c2617b226" + ] + ], + "6": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam.bai:md5,c4367b2552730a60394ec82d0cfcdb1a" + ] + ], + "7": [ + [ + { + "id": "sample_1" + }, + "custom_igv_session.xml:md5,6f716172581d49ead45212ac2829e5fe" + ] + ], + "8": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa:md5,702da0ffb3e70b72b769b7a74dca62de" + ] + ], + "9": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa.fai:md5,03acb7f948e5cfee5604cb591bfa02c2" + ] + ], + "coverage_stats": [ + [ + { + "id": "sample_1" + }, + "coverage_stats.json:md5,69873c0b7c918cf5b499532f4d6f4187" + ] + ], + "custom_alignments_bai": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam.bai:md5,c4367b2552730a60394ec82d0cfcdb1a" + ] + ], + "custom_alignments_bam": [ + [ + { + "id": "sample_1" + }, + "custom_alignments.bam:md5,f6c7c3e5b7d47ee622c60d0c2617b226" + ] + ], + "custom_ref_fa": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa:md5,702da0ffb3e70b72b769b7a74dca62de" + ] + ], + "custom_ref_fai": [ + [ + { + "id": "sample_1" + }, + "custom_reference.fa.fai:md5,03acb7f948e5cfee5604cb591bfa02c2" + ] + ], + "custom_regions": [ + [ + { + "id": "sample_1" + }, + "custom_regions.bed:md5,18a863068ed755ad17917adf1f606d86" + ] + ], + "igv_session": [ + [ + { + "id": "sample_1" + }, + "custom_igv_session.xml:md5,6f716172581d49ead45212ac2829e5fe" + ] + ], + "sequences_chrM": [ + [ + { + "id": "sample_1" + }, + "sequences_chrM.fa:md5,ab16890c0b716b322092030915994924" + ] + ], + "stats": [ + + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,6539aa978661ff9501d33c7dadc4f4fa" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,f5d7621e9f51bfbbc96c5b25f6b0e334" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:46:24.871861003" + }, + "homo_sapiens - bam": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,33489879283c66421422713049b690fa" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,5c81312b7ae36b80af6b0d03f2bc8ccb" + ] + ], + "10": [ + + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "coverage_stats": [ + + ], + "custom_alignments_bai": [ + + ], + "custom_alignments_bam": [ + + ], + "custom_ref_fa": [ + + ], + "custom_ref_fai": [ + + ], + "custom_regions": [ + + ], + "igv_session": [ + + ], + "sequences_chrM": [ + + ], + "stats": [ + + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,5c81312b7ae36b80af6b0d03f2bc8ccb" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,33489879283c66421422713049b690fa" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:45:56.442960353" + }, + "homo_sapiens - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + + ], + "11": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "coverage_stats": [ + + ], + "custom_alignments_bai": [ + + ], + "custom_alignments_bam": [ + + ], + "custom_ref_fa": [ + + ], + "custom_ref_fai": [ + + ], + "custom_regions": [ + + ], + "igv_session": [ + + ], + "sequences_chrM": [ + + ], + "stats": [ + + ], + "tbi": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "sample_1" + }, + "sample_1.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_mitorsaw": [ + [ + "MITORSAW_HAPLOTYPE", + "mitorsaw", + "0.2.7-8ee9b5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T14:46:02.066588859" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index f008047ea..8f8af87ad 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,7 +120,7 @@ params { hifiasm_mode = 'trio-binning' snv_caller = 'deepvariant' str_caller = params.preset == 'ONT_R10' ? 'strdust' : 'trgt' - mitochondrial_caller = 'deepvariant' + mitochondrial_caller = 'mitorsaw' tiddit_bin_size = 500 vep_cache_version = 110 vep_mitochondrial_genome_distance = 0 diff --git a/subworkflows/local/call_mitochondrial_variants/main.nf b/subworkflows/local/call_mitochondrial_variants/main.nf new file mode 100644 index 000000000..a4d43d572 --- /dev/null +++ b/subworkflows/local/call_mitochondrial_variants/main.nf @@ -0,0 +1,57 @@ +/* + * Workflow to call mitochondrial variants + */ + +include { DEEPVARIANT_RUNDEEPVARIANT } from '../../../modules/nf-core/deepvariant/rundeepvariant/main' +include { MITORSAW_HAPLOTYPE } from '../../../modules/nf-core/mitorsaw/haplotype/main' + +workflow CALL_MITOCHONDRIAL_VARIANTS { + take: + ch_bam_bai // channel: [val(meta), path(bam), path(bai)] + ch_fasta // channel: [val(meta), path(fasta)] + ch_fai // channel: [val(meta), path(fai)] + ch_par_bed // channel: [val(meta), path(bed)] – PAR regions (deepvariant) + ch_mitochondrial_bed // channel: [val(meta), path(bed)] – mitochondrial interval (deepvariant) + mitochondrial_caller // string + + main: + ch_fasta.join(ch_fai).set { ch_fasta_fai } + + if (mitochondrial_caller == "mitorsaw") { + + MITORSAW_HAPLOTYPE( + ch_bam_bai, + ch_fasta_fai, + [], + [], + ) + + ch_vcf = MITORSAW_HAPLOTYPE.out.vcf + ch_tbi = MITORSAW_HAPLOTYPE.out.tbi + + } else if (mitochondrial_caller == "deepvariant") { + + // Broadcast the single mito BED to every sample + ch_bam_bai + .combine(ch_mitochondrial_bed) + .map { bam_meta, bam, bai, mito_meta, bed -> + [bam_meta + [genome: mito_meta.genome, num_intervals: 1], bam, bai, bed, 1] } + .set { call_snvs_input } + + DEEPVARIANT_RUNDEEPVARIANT( + call_snvs_input, + ch_fasta, + ch_fai, + [[], []], + ch_par_bed, + ) + + ch_vcf = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf + ch_tbi = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf_tbi + + } + + emit: + mitochondrial_vcf = ch_vcf // channel: [val(meta), path(vcf/gvcf)] + mitochondrial_tbi = ch_tbi // channel: [val(meta), path(tbi)] +} diff --git a/subworkflows/local/gvcf_glnexus_norm_variants/main.nf b/subworkflows/local/gvcf_glnexus_norm_variants/main.nf index a8a231b21..c4100dde3 100644 --- a/subworkflows/local/gvcf_glnexus_norm_variants/main.nf +++ b/subworkflows/local/gvcf_glnexus_norm_variants/main.nf @@ -5,6 +5,7 @@ include { BCFTOOLS_PLUGINFIXPLOIDY } from '../../../modules/nf-core/bcftools/pluginfixploidy/main' include { BCFTOOLS_NORM as BCFTOOLS_NORM_MULTISAMPLE } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' include { SENTIEON_GVCFTYPER } from '../../../modules/nf-core/sentieon/gvcftyper/main' include { VCFEXPRESS } from '../../../modules/nf-core/vcfexpress/main' @@ -16,68 +17,100 @@ workflow GVCF_GLNEXUS_NORM_VARIANTS { ch_bed // channel: [optional] [ val(meta), path(input_bed) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - variant_caller // string: variant caller to use ch_vcfexpress_prelude // path: [mandatory] lua file main: ch_merged_family_gvcf = channel.empty() - if (variant_caller.equals("deepvariant")) { - GLNEXUS( - ch_gvcfs.map { meta, gvcfs -> [meta, gvcfs, []] }, - ch_bed, - ) - - ch_merged_family_gvcf = GLNEXUS.out.bcf - - } else if (variant_caller.equals("sentieon")) { - - ch_gvcfs - .join(ch_tbis, failOnMismatch: true, failOnDuplicate: true) - .map { meta, gvcfs, tbis -> - [meta, gvcfs, tbis, []] - } - .set { ch_gvcftyper_in } - - SENTIEON_GVCFTYPER( - ch_gvcftyper_in, - ch_fasta, - ch_fai, - [[], []], - [[], []], - ) - - // Re-encoding haploid GTs to diploid needs to occur _after_ GVCFtyper - // in the gvcf joint-calling path, as the altered ploidy of haploid - // to diploid GT no longer matches the sample PL field, which leads to a - // crash in GVCFTyper - BCFTOOLS_PLUGINFIXPLOIDY( - SENTIEON_GVCFTYPER.out.vcf_gz.join(SENTIEON_GVCFTYPER.out.vcf_gz_tbi), - [], - [], - [], - [], - ) - - ch_merged_family_gvcf = BCFTOOLS_PLUGINFIXPLOIDY.out.vcf - } + // Branching gVCFs and TBI channels by caller, as they need to be processed differently in the next steps + ch_gvcfs + .branch { meta, _gvcfs -> + mitorsaw: meta.caller == "mitorsaw" + sentieon: meta.caller == "sentieon" + deepvariant: meta.caller == "deepvariant" + } + .set { branched_gvcfs } + + ch_tbis + .branch { meta, _tbi -> + mitorsaw: meta.caller == "mitorsaw" + sentieon: meta.caller == "sentieon" + deepvariant: meta.caller == "deepvariant" + } + .set { branched_tbis } + + // GLNEXUS processes deepvariant gVCFs + GLNEXUS( + branched_gvcfs.deepvariant.map { meta, gvcfs -> [meta, gvcfs, []] }, + ch_bed, + ) + + ch_merged_family_gvcf_glnexus = GLNEXUS.out.bcf + + // SENTIEON_GVCFTYPER processes sentieon gVCFs + branched_gvcfs.sentieon + .join(branched_tbis.sentieon, failOnMismatch: true, failOnDuplicate: true) + .map { meta, gvcfs, tbis -> + [meta, gvcfs, tbis, []] + } + .set { ch_gvcftyper_in } + + SENTIEON_GVCFTYPER( + ch_gvcftyper_in, + ch_fasta, + ch_fai, + [[], []], + [[], []], + ) + + // Re-encoding haploid GTs to diploid needs to occur _after_ GVCFtyper + // in the gvcf joint-calling path, as the altered ploidy of haploid + // to diploid GT no longer matches the sample PL field, which leads to a + // crash in GVCFTyper + BCFTOOLS_PLUGINFIXPLOIDY( + SENTIEON_GVCFTYPER.out.vcf_gz.join(SENTIEON_GVCFTYPER.out.vcf_gz_tbi), + [], + [], + [], + [], + ) + ch_merged_family_gvcf_sentieon = BCFTOOLS_PLUGINFIXPLOIDY.out.vcf + + // BCFTOOLS_MERGE is used for the mitochondrial specific caller + branched_gvcfs.mitorsaw.join(branched_tbis.mitorsaw, failOnMismatch: true, failOnDuplicate: true) + .map { meta, gvcfs, tbis -> + [meta, gvcfs, tbis, []] + } + .set { ch_bcftools_merge_in } + + ch_fasta.join(ch_fai).set { ch_fasta_fai } + + BCFTOOLS_MERGE( + ch_bcftools_merge_in, + ch_fasta_fai.collect(), + ) + ch_merged_family_gvcf_bcftools = BCFTOOLS_MERGE.out.vcf // Annotate with FOUND_IN tag - not sure what would happen if we do this before glnexus instead? // Add caller information to meta so vcfexpress can add the FOUND_IN tag based on sv_caller - ch_merged_family_gvcf - .map { meta, vcf -> - [meta + [sv_caller: variant_caller], vcf] - } - .set { ch_vcfexpress_input } + // ch_merged_family_gvcf + // .map { meta, vcf -> + // [meta + [sv_caller: meta.], vcf] + // } + // .set { ch_vcfexpress_input } + ch_merged_family_gvcf_glnexus + .mix(ch_merged_family_gvcf_sentieon) + .mix(ch_merged_family_gvcf_bcftools) + .set { ch_merged_family_gvcf } VCFEXPRESS( - ch_vcfexpress_input, + ch_merged_family_gvcf, ch_vcfexpress_prelude, ) // Remove added caller information in meta VCFEXPRESS.out.vcf .map { meta, vcf -> - [meta - meta.subMap('sv_caller'), vcf, []] + [meta - meta.subMap('caller'), vcf, []] } .set { ch_bcftools_norm_input } diff --git a/subworkflows/local/scatter_genome/main.nf b/subworkflows/local/scatter_genome/main.nf index f71eb3247..0c554443f 100644 --- a/subworkflows/local/scatter_genome/main.nf +++ b/subworkflows/local/scatter_genome/main.nf @@ -75,7 +75,8 @@ workflow SCATTER_GENOME { add_bed_count( ch_bed_genomes.nuclear.mix(ch_bed_mitochondrial_to_mix) - ).map { meta, bed, num_intervals -> [meta.subMap('genome'), bed, num_intervals] }.set { ch_bed_nuclear_mitochondrial_intervals } + ).map { meta, bed, num_intervals -> [meta.subMap('genome'), bed, num_intervals] } + .set { ch_bed_nuclear_mitochondrial_intervals } // Make bed interval if split_n > 1, otherwise just pass the bed file through if (split_n > 1) { @@ -97,8 +98,10 @@ workflow SCATTER_GENOME { // Remove num_intervals for add_bed_count function. Then recalculate the total bed count (nuclear + mitochondrial) and mix the two channels add_bed_count( - ch_bed_nuclear_intervals.map { meta, bed, _num_intervals -> [meta, bed] }.mix(ch_bed_mitochondrial_to_mix) - ).map { meta, bed, num_intervals -> [meta.subMap('genome'), bed, num_intervals] }.set { ch_bed_nuclear_mitochondrial_intervals } + ch_bed_nuclear_intervals.map { meta, bed, _num_intervals -> [meta, bed] } + .mix(ch_bed_mitochondrial_to_mix) + ).map { meta, bed, num_intervals -> [meta.subMap('genome'), bed, num_intervals] } + .set { ch_bed_nuclear_mitochondrial_intervals } /* * Since we don't check beforehand how many intervals it's possible to split the bed file into, @@ -121,8 +124,8 @@ workflow SCATTER_GENOME { } emit: - bed = BEDTOOLS_MERGE.out.bed // channel: [ val(meta), path(bed) ] - bed_nuclear_intervals = ch_bed_nuclear_intervals // channel: [ val(meta), path(bed), val(num_intervals) ] + bed = BEDTOOLS_MERGE.out.bed // channel: [ val(meta), path(bed) ] + bed_nuclear_intervals = ch_bed_nuclear_intervals // channel: [ val(meta), path(bed), val(num_intervals) ] bed_nuclear_mitochondrial_intervals = ch_bed_nuclear_mitochondrial_intervals // channel: [ val(meta), path(bed), val(num_intervals) ] } diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 83bc92a04..3fec3ad98 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -16,6 +16,7 @@ include { CONVERT_INPUT_FILES as CONVERT_INPUT_FASTQS } from '../subw include { CONVERT_INPUT_FILES as CONVERT_INPUT_BAMS } from '../subworkflows/local/convert_input_files' include { CHROMOGRAPH } from '../subworkflows/local/chromograph' include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex' +include { CALL_MITOCHONDRIAL_VARIANTS } from '../subworkflows/local/call_mitochondrial_variants' include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs' include { CALL_REPEAT_EXPANSIONS_STRDUST } from '../subworkflows/local/call_repeat_expansions_strdust' include { CALL_REPEAT_EXPANSIONS_TRGT } from '../subworkflows/local/call_repeat_expansions_trgt' @@ -137,6 +138,7 @@ workflow NALLO { val_phaser val_plot_chromograph_autozygosity val_plot_chromograph_coverage + val_preset val_pre_vep_snv_filter_expression val_run_methbat val_run_modkit @@ -436,12 +438,54 @@ workflow NALLO { val_snv_calling_processes, ) - if (val_mitochondrial_caller == "deepvariant") { - - SCATTER_GENOME.out.bed_nuclear_mitochondrial_intervals.set { ch_bed_intervals } + // Prevents running Mitorsaw with ONT data, which is not supported. + if (val_mitochondrial_caller == "mitorsaw" && val_preset == "ONT_R10") { + error("Mitorsaw does not support ONT data. Please use a compatible mitochondrial caller (e.g. deepvariant) with ONT sequencing.") } - else { - ch_bed_intervals = SCATTER_GENOME.out.bed_nuclear_intervals + + if (val_snv_caller == "deepvariant" && val_mitochondrial_caller == "deepvariant") { + // Deepvariant handles nuclear and mitochondrial + SCATTER_GENOME.out.bed_nuclear_mitochondrial_intervals + .map { meta, bed, num_intervals -> [ meta + [caller: val_snv_caller], bed, num_intervals] } + .set { ch_bed_intervals } + ch_mitochondrial_vcf = channel.empty() + ch_mitochondrial_tbi = channel.empty() + } else { + // SNV caller gets nuclear-only intervals + // Sentieon gets nuclear BED only; deepvariant + mito caller, Deepvariant also gets nuclear only) + SCATTER_GENOME.out.bed_nuclear_intervals + .map { meta, bed, num_intervals -> [ meta + [caller: val_snv_caller], bed, num_intervals] } + .set { ch_bed_intervals } + + // Mitochondrial BED interval needed by deepvariant in CALL_MITOCHONDRIAL_VARIANTS + SCATTER_GENOME.out.bed_nuclear_mitochondrial_intervals + .filter { meta, _bed, _num_intervals -> meta.genome == "mitochondrial" } + .map { meta, bed, _num_intervals -> [meta, bed] } + .set { ch_mito_bed } + CALL_MITOCHONDRIAL_VARIANTS( + ch_bam_bai, + ch_fasta, + ch_fai, + ch_par, + ch_mito_bed, + val_mitochondrial_caller, + ) + CALL_MITOCHONDRIAL_VARIANTS.out.mitochondrial_vcf + .map { meta, vcf -> [meta + [caller: val_mitochondrial_caller], vcf] } + .set { ch_mitochondrial_vcf } + CALL_MITOCHONDRIAL_VARIANTS.out.mitochondrial_tbi + .map { meta, tbi -> [meta + [caller: val_mitochondrial_caller], tbi] } + .set { ch_mitochondrial_tbi } + + // Add nuclear interval count + 1 (mitochondrial) to get total num_intervals. ////// add for nuclear too + ch_mitochondrial_vcf + .combine(ch_bed_intervals.map { _meta, _bed, num_intervals -> num_intervals }.first()) + .map {meta, vcf, num_intervals -> [meta + [num_intervals: num_intervals + 1], vcf] } + .set { ch_mitochondrial_vcf } + ch_mitochondrial_tbi + .combine(ch_bed_intervals.map { _meta, _bed, num_intervals -> num_intervals }.first()) + .map {meta, tbi, num_intervals -> [meta + [num_intervals: num_intervals + 1], tbi] } + .set { ch_mitochondrial_tbi } } // Combine the BED intervals with BAM/BAI files to create a region-bam-bai for each sample. @@ -465,19 +509,37 @@ workflow NALLO { val_snv_caller, val_sentieon_tech, ) - + call_snvs_vcf = CALL_SNVS.out.vcf + call_snvs_index = CALL_SNVS.out.index + call_snvs_gvcf = CALL_SNVS.out.gvcf + call_snvs_gvcf_index = CALL_SNVS.out.gvcf_index + + // add 1 to the num intervals count for the mitochondrial region + if (val_snv_caller == "deepvariant" && val_mitochondrial_caller != "deepvariant") { + call_snvs_gvcf + .map {meta, gvcf -> [meta + [num_intervals: meta.num_intervals + 1], gvcf] } + .set { call_snvs_gvcf } + } // Group GVCFs per region and family (one region with all samples) - CALL_SNVS.out.gvcf + call_snvs_gvcf .map { meta, gvcf -> - [[id: meta.region.name, family_id: meta.family_id, num_intervals: meta.num_intervals], gvcf] + [[id: meta.region.name, family_id: meta.family_id, num_intervals: meta.num_intervals, caller: val_snv_caller], gvcf] } + .mix(ch_mitochondrial_vcf + .map { meta, vcf -> + [[id: "mitochondrial", family_id: meta.family_id, num_intervals: meta.num_intervals, caller: meta.caller], vcf]} + ) .groupTuple() .set { variants_to_merge_per_family } - CALL_SNVS.out.gvcf_index + call_snvs_index .map { meta, tbi -> - [[id: meta.region.name, family_id: meta.family_id, num_intervals: meta.num_intervals], tbi] + [[id: meta.region.name, family_id: meta.family_id, num_intervals: meta.num_intervals, caller: val_snv_caller], tbi] } + .mix(ch_mitochondrial_tbi + .map { meta, tbi -> + [[id: "mitochondrial", family_id: meta.family_id, num_intervals: meta.num_intervals, caller: meta.caller], tbi]} + ) .groupTuple() .set { gvcf_tbis_per_family } @@ -489,12 +551,12 @@ workflow NALLO { SCATTER_GENOME.out.bed, ch_fasta, ch_fai, - val_snv_caller, ch_vcfexpress_prelude, ) + GVCF_GLNEXUS_NORM_VARIANTS.out.vcf // Grouping VCF, containing one sample with all regions - CALL_SNVS.out.vcf + call_snvs_vcf .map { meta, vcf -> def new_meta = meta - meta.subMap('region', 'genome') [groupKey(new_meta, new_meta.num_intervals), vcf] @@ -505,6 +567,7 @@ workflow NALLO { } .set { variants_to_concat_per_sample } + // Create a concatenated and normalized VCF, containing one sample with all regions. VCF_CONCAT_NORM_VARIANTS( variants_to_concat_per_sample, @@ -523,7 +586,7 @@ workflow NALLO { // SNV QC // Can we use the normalized VCF here, for DV vcfstatsreport? QC_SNVS( - VCF_CONCAT_NORM_VARIANTS.out.bcftools_concat_vcf, + VCF_CONCAT_NORM_VARIANTS.out.bcftools_concat_vcf.map { meta, vcf -> [meta - meta.subMap('caller'), vcf] }, sample_snv_vcf, sample_snv_index, val_snv_caller.equals("deepvariant"), @@ -535,8 +598,8 @@ workflow NALLO { .set { ch_vcf_tbi_per_region } } if (!val_skip_prepare_gens_input) { - CALL_SNVS.out.gvcf - .join(CALL_SNVS.out.gvcf_index) + call_snvs_gvcf + .join(call_snvs_gvcf_index) .map { meta, gvcf, gvcf_index -> def sample_meta = meta - meta.subMap(['region', 'num_intervals', 'genome']) [sample_meta, gvcf, gvcf_index] @@ -818,7 +881,7 @@ workflow NALLO { ch_vcf_tbi_per_region .map { meta, vcf, tbi -> def new_meta = [id: meta.family_id, set: meta.set, sample_ids: meta.sample_ids, num_intervals: meta.num_intervals] - [groupKey(new_meta, meta.num_intervals), vcf, tbi] + [groupKey(new_meta, 2), vcf, tbi] } .groupTuple() .set { ch_concat_sort_input }