-
Notifications
You must be signed in to change notification settings - Fork 24
Split hifiasm #969
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Split hifiasm #969
Changes from all commits
9f1b855
f2fed59
6683f38
fdf5540
4e49b04
f665f27
2032f05
5df5fc0
ee24447
4b9d671
183d219
c135aca
e0fc0a9
8fa55cb
cb43236
f16be8d
fe3eb5a
fec8384
f43182b
7bbe899
3e86f7b
7261fe4
c3be48e
0f988c3
a7643be
4b39334
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,25 @@ | ||
| include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' | ||
| include { HIFIASM } from '../../../modules/nf-core/hifiasm' | ||
| include { YAK_COUNT } from '../../../modules/nf-core/yak/count/main' | ||
| include { GFASTATS } from '../../../modules/nf-core/gfastats/main' | ||
| include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' | ||
| include { HIFIASM as HIFIASM_BINS } from '../../../modules/nf-core/hifiasm' | ||
| include { HIFIASM as HIFIASM_ASSEMBLY } from '../../../modules/nf-core/hifiasm' | ||
| include { YAK_COUNT } from '../../../modules/nf-core/yak/count/main' | ||
| include { GFASTATS } from '../../../modules/nf-core/gfastats/main' | ||
|
|
||
| // This subworkflow assembles and outputs haplotypes from a set of reads (grouped per sample), using hifiasm and gfastats. | ||
| // It assumes that while each sample can have multiple files, each sample belongs to one family at most. | ||
| workflow GENOME_ASSEMBLY { | ||
|
|
||
| take: | ||
| ch_reads // channel: [ val(meta), fastqs ] | ||
| trio_binning // bool: Should we use trio binning mode where possible? | ||
| ch_reads // channel: [ val(meta), fastqs ] | ||
| trio_binning // bool: Should we use trio binning mode where possible? | ||
|
|
||
| main: | ||
| if (trio_binning) { | ||
| // First, we need to branch the samples based on their relationship | ||
| ch_reads | ||
| .branch { meta, _reads -> | ||
| def is_parent = meta.relationship in ['father', 'mother'] | ||
| paired_parents : is_parent && meta.has_other_parent | ||
| children_with_both_parents : meta.relationship == 'child' && meta.two_parents | ||
| other : true | ||
| paired_parents: is_parent && meta.has_other_parent | ||
| children_with_both_parents: meta.relationship == 'child' && meta.two_parents | ||
| other: true | ||
| } | ||
| .set { ch_branched_samples } | ||
|
|
||
|
|
@@ -32,85 +32,102 @@ workflow GENOME_ASSEMBLY { | |
| } | ||
| .set { ch_paired_parents_for_yak } | ||
|
|
||
| CAT_FASTQ ( | ||
| CAT_FASTQ( | ||
| ch_paired_parents_for_yak.cat | ||
| ) | ||
|
|
||
| YAK_COUNT ( | ||
| YAK_COUNT( | ||
| CAT_FASTQ.out.reads.concat(ch_paired_parents_for_yak.no_cat) | ||
| ) | ||
|
|
||
| YAK_COUNT.out.yak | ||
| // Because a parent can have multiple children, and meta.children is a list of all children, | ||
| // we need to return one tuple per child. | ||
| .flatMap { meta, yak -> | ||
| (meta.children ?: []).collect { child_id -> | ||
| [child_id, meta, yak] | ||
| } | ||
| } | ||
| .branch { child_id, meta, yak -> | ||
| paternal: meta.relationship == 'father' | ||
| return [ child_id, yak ] | ||
| return [child_id, yak] | ||
| maternal: meta.relationship == 'mother' | ||
| return [ child_id, yak ] | ||
| return [child_id, yak] | ||
| } | ||
| .set { ch_yak_output } | ||
|
|
||
| // Creates the input for trio-binned assemblies (children with both parents) | ||
| ch_branched_samples.children_with_both_parents | ||
| .map { meta, reads -> [ meta.id, meta, reads ] } | ||
| .map { meta, reads -> [meta.id, meta, reads] } | ||
| .join(ch_yak_output.paternal) | ||
| .join(ch_yak_output.maternal) | ||
| .map { _id, meta, reads, yak_paternal, yak_maternal -> | ||
| [ meta, reads, yak_paternal, yak_maternal ] | ||
| [meta, reads, yak_paternal, yak_maternal] | ||
| } | ||
| .set { ch_with_both_parents } | ||
|
|
||
| // Create the input for hifiasm by combining the non-trio binned samples with the trio-binned samples. | ||
| ch_branched_samples.other | ||
| .concat(ch_branched_samples.paired_parents) | ||
| .map { meta, fastqs -> | ||
| [ meta, fastqs, [], [] ] | ||
| [meta, fastqs, [], []] | ||
| } | ||
| .concat(ch_with_both_parents) | ||
| .multiMap { meta, reads, yak_paternal, yak_maternal -> | ||
| reads : [ meta, reads , [] ] | ||
| yak : [ meta, yak_paternal, yak_maternal ] | ||
| reads: [meta, reads, []] | ||
| yak: [meta, yak_paternal, yak_maternal] | ||
| } | ||
| .set { ch_hifiasm_in } | ||
| } else { | ||
| } | ||
| else { | ||
| ch_reads | ||
| .multiMap { meta, reads -> | ||
| reads : [ meta, reads, [] ] | ||
| yak : [ [], [], [] ] | ||
| reads: [meta, reads, []] | ||
| yak: [meta, [], []] | ||
| } | ||
| .set { ch_hifiasm_in } | ||
| } | ||
|
|
||
| HIFIASM ( | ||
| HIFIASM_BINS( | ||
| ch_hifiasm_in.reads, | ||
| ch_hifiasm_in.yak, | ||
| [[],[],[]], | ||
| [[],[]] | ||
| [[], [], []], | ||
| [[], []], | ||
| ) | ||
|
|
||
| // Explicitly key bins/reads/yak by sample ID before assembly so each sample gets its own bins and yaks. | ||
| ch_hifiasm_in.reads | ||
| .join(ch_hifiasm_in.yak, failOnMismatch: true, failOnDuplicate: true) | ||
| .join(HIFIASM_BINS.out.bin_files, failOnMismatch: true, failOnDuplicate: true) | ||
| .multiMap { meta, reads, ul_reads, yak_paternal, yak_maternal, bin_files -> | ||
| reads: [meta, reads, ul_reads] | ||
| bins: [meta, bin_files] | ||
| yak: [meta, yak_paternal, yak_maternal] | ||
| } | ||
| .set { ch_hifiasm_assembly_in } | ||
|
Comment on lines
+100
to
+105
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done ! :) Should I use it on all the main.nf files for nallo in the meantime to make sure they have the right format ?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. There is an issue where it removes inline comments, so it's not perfect yet.. we will have to do it manually for a bit longer on the files we modify. |
||
|
|
||
| HIFIASM_ASSEMBLY( | ||
| ch_hifiasm_assembly_in.reads, | ||
| ch_hifiasm_assembly_in.yak, | ||
| [[], [], []], | ||
| ch_hifiasm_assembly_in.bins, | ||
| ) | ||
|
|
||
| HIFIASM.out.hap1_contigs | ||
| .map { meta, fasta -> [ meta + [ 'haplotype': 1 ], fasta ] } | ||
| HIFIASM_ASSEMBLY.out.hap1_contigs | ||
| .map { meta, fasta -> [meta + ['haplotype': 1], fasta] } | ||
| .set { ch_gfastats_paternal_in } | ||
|
|
||
| HIFIASM.out.hap2_contigs | ||
| .map { meta, fasta -> [ meta + [ 'haplotype': 2 ], fasta ] } | ||
| HIFIASM_ASSEMBLY.out.hap2_contigs | ||
| .map { meta, fasta -> [meta + ['haplotype': 2], fasta] } | ||
| .set { ch_gfastats_maternal_in } | ||
|
|
||
| GFASTATS( | ||
| ch_gfastats_paternal_in.mix(ch_gfastats_maternal_in), | ||
| 'fasta', | ||
| '', | ||
| '', | ||
| [[],[]], | ||
| [[],[]], | ||
| [[],[]], | ||
| [[],[]] | ||
| [[], []], | ||
| [[], []], | ||
| [[], []], | ||
| [[], []], | ||
| ) | ||
|
|
||
| emit: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍