Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deps/libvgio
8 changes: 4 additions & 4 deletions src/minimizer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ class MinimizerMapper : public AlignerClient {
* If we do gapless extension, turn good full-length gapless extensions into alignments and return them in alignments
* Gapless extensions are considered good enough if they have fewer than default_max_extension_mismatches mismatches
*/
void do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds, const VectorView<MinimizerMapper::Minimizer>& minimizers,
void do_chaining_on_trees(const Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds, const VectorView<MinimizerMapper::Minimizer>& minimizers,
const vector<algorithms::Anchor>& seed_anchors,
std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags,
std::vector<size_t>& chain_rec_counts, std::vector<size_t>& chain_source_tree,
Expand All @@ -896,7 +896,7 @@ class MinimizerMapper : public AlignerClient {
/**
* Collect stats about the best chains for annotating the final alignment
*/
void get_best_chain_stats( Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds,
void get_best_chain_stats(const Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds,
const VectorView<MinimizerMapper::Minimizer>& minimizers,
const std::vector<std::vector<size_t>>& chains,
const std::vector<size_t>& chain_source_tree,
Expand All @@ -906,7 +906,7 @@ class MinimizerMapper : public AlignerClient {
double& best_chain_average_jump, size_t& best_chain_anchors, size_t& best_chain_anchor_length,
Funnel& funnel) const ;

void do_alignment_on_chains(Alignment& aln, const std::vector<Seed>& seeds,
void do_alignment_on_chains(const Alignment& aln, const std::vector<Seed>& seeds,
const VectorView<MinimizerMapper::Minimizer>& minimizers,
const vector<algorithms::Anchor>& seed_anchors,
const std::vector<std::vector<size_t>>& chains,
Expand All @@ -925,7 +925,7 @@ class MinimizerMapper : public AlignerClient {
*
* If no alignments have a positive score, responsible for creating an unmapped mapping.
*/
void pick_mappings_from_alignments(Alignment& aln, const std::vector<Alignment>& alignments,
void pick_mappings_from_alignments(const Alignment& aln, const std::vector<Alignment>& alignments,
const std::vector<double>& multiplicity_by_alignment, const std::vector<size_t>& alignments_to_source,
const std::vector<int>& chain_score_estimates,
std::vector<Alignment>& mappings,
Expand Down
47 changes: 26 additions & 21 deletions src/minimizer_mapper_from_chains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,28 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
return aln.sequence();
});

// Create a new alignment object to get rid of old annotations.
{
Alignment temp;
temp.set_sequence(aln.sequence());
temp.set_name(aln.name());
temp.set_quality(aln.quality());
if (has_annotation(aln, "tags")) {
// Preserve any BAM tags, which might really have come in as FASTQ
// comments and which we want to keep.
// TODO: What if these came from a previous Giraffe run though???
set_annotation(temp, "tags", get_annotation<string>(aln, "tags"));
}
aln = std::move(temp);
}

// Annotate the read with metadata
if (!sample_name.empty()) {
aln.set_sample_name(sample_name);
}
if (!read_group.empty()) {
aln.set_read_group(read_group);
}

// Minimizers sorted by position
std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
Expand Down Expand Up @@ -1138,7 +1160,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
return mappings;
}

void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest,
void MinimizerMapper::do_chaining_on_trees(const Alignment& aln, const ZipCodeForest& zip_code_forest,
const std::vector<Seed>& seeds, const VectorView<MinimizerMapper::Minimizer>& minimizers,
const vector<algorithms::Anchor>& seed_anchors,
std::vector<std::vector<size_t>>& chains, std::vector<std::vector<bool>>& chain_rec_flags,
Expand Down Expand Up @@ -1811,7 +1833,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest&



void MinimizerMapper::get_best_chain_stats(Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds,
void MinimizerMapper::get_best_chain_stats(const Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector<Seed>& seeds,
const VectorView<MinimizerMapper::Minimizer>& minimizers,
const std::vector<std::vector<size_t>>& chains,
const std::vector<size_t>& chain_source_tree,
Expand Down Expand Up @@ -1872,7 +1894,7 @@ void MinimizerMapper::get_best_chain_stats(Alignment& aln, const ZipCodeForest&

}

void MinimizerMapper::do_alignment_on_chains(Alignment& aln, const std::vector<Seed>& seeds,
void MinimizerMapper::do_alignment_on_chains(const Alignment& aln, const std::vector<Seed>& seeds,
const VectorView<MinimizerMapper::Minimizer>& minimizers,
const vector<algorithms::Anchor>& seed_anchors,
const std::vector<std::vector<size_t>>& chains,
Expand All @@ -1897,23 +1919,6 @@ void MinimizerMapper::do_alignment_on_chains(Alignment& aln, const std::vector<S
vector<size_t> minimizer_kept_count(minimizers.size(), 0);
#endif

// Create a new alignment object to get rid of old annotations.
{
Alignment temp;
temp.set_sequence(aln.sequence());
temp.set_name(aln.name());
temp.set_quality(aln.quality());
aln = std::move(temp);
}

// Annotate the read with metadata
if (!sample_name.empty()) {
aln.set_sample_name(sample_name);
}
if (!read_group.empty()) {
aln.set_read_group(read_group);
}

// Compute lower limit on chain score to actually investigate
int chain_min_score = (int) (min_chain_score_per_base * aln.sequence().size());
// Apply the max in chain score limit
Expand Down Expand Up @@ -2226,7 +2231,7 @@ void MinimizerMapper::do_alignment_on_chains(Alignment& aln, const std::vector<S
}
}

void MinimizerMapper::pick_mappings_from_alignments(Alignment& aln, const std::vector<Alignment>& alignments,
void MinimizerMapper::pick_mappings_from_alignments(const Alignment& aln, const std::vector<Alignment>& alignments,
const std::vector<double>& multiplicity_by_alignment,
const std::vector<size_t>& alignments_to_source,
const std::vector<int>& chain_score_estimates,
Expand Down
14 changes: 11 additions & 3 deletions test/t/15_vg_surject.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 75
plan tests 77

vg construct -r small/x.fa >j.vg
vg index -x j.xg j.vg
Expand Down Expand Up @@ -125,9 +125,17 @@ is "$(cat surjected.sam | grep '@RG' | grep 'RG1' | grep 'Sample1' | wc -l)" "1"

# a uniform random sequence
printf "@read TG:Z:val\nGGCGACGTACTAGGGACTACAGTCCTTCGTCTTTCTCTCTCGACTCCGAA\n+\nHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n" > x.fq
is $(vg map -d x -t 1 -f x.fq -5 bam --comments-as-tags | samtools view -f 4 | grep "TG:Z:val" | wc -l | sed 's/^[[:space:]]*//') 1 "Tags are preserved on unmapped reads"
vg map -d x -t 1 -f x.fq --comments-as-tags >x.gam
is $(vg surject -p x -x x.xg --bam-output x.gam | samtools view -f 4 | grep "TG:Z:val" | wc -l | sed 's/^[[:space:]]*//') 1 "Tags are preserved on unmapped reads"
vg map -d x -t 1 -f x.fq --comments-as-tags --gaf >x.gaf
is $(vg surject -p x -x x.xg --bam-output --gaf-input x.gaf | samtools view -f 4 | grep "TG:Z:val" | wc -l | sed 's/^[[:space:]]*//') 1 "Tags are preserved on unmapped reads in GAF"

rm -rf j.vg x.vg j.gam x.gam x.idx j.xg x.xg x.gcsa read.gam reads.gam surjected.sam x.fq
# A sequence that should map
printf "@read TG:Z:val\nACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGA\n+\nHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n" > x2.fq
vg map -d x -t 1 -f x2.fq --comments-as-tags --gaf >x2.gaf
is $(vg surject -p x -x x.xg --bam-output --gaf-input x2.gaf | samtools view | grep "TG:Z:val" | wc -l | sed 's/^[[:space:]]*//') 1 "Tags are preserved on mapped reads in GAF"

rm -rf j.vg x.vg j.gam x.gam x.gaf x.idx j.xg x.xg x.gcsa read.gam reads.gam surjected.sam x.fq

vg mod -c graphs/fail.vg >f.vg
vg index -k 11 -g f.gcsa -x f.xg f.vg
Expand Down
21 changes: 17 additions & 4 deletions test/t/50_vg_giraffe.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 83
plan tests 87

vg construct -a -r small/x.fa -v small/x.vcf.gz >x.vg
vg index -x x.xg x.vg
Expand Down Expand Up @@ -116,22 +116,35 @@ vg giraffe -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes
vg giraffe -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f tagged1.fq -f tagged2.fq --comments-as-tags -o BAM > t3.bam
vg giraffe -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f tagged1.fq --comments-as-tags -o GAF > t1.gaf

vg giraffe -b chaining-sr -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f tagged1.fq --comments-as-tags -o BAM > t1_chaining.bam
vg giraffe -b chaining-sr -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f tagged1.fq --comments-as-tags -o GAM > t1_chaining.gam
vg giraffe -b chaining-sr -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f tagged1.fq --comments-as-tags -o GAF > t1_chaining.gaf


is "$(samtools view t1.bam | grep T1 | grep T2 | grep T3 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are preserved on read 1"
is "$(samtools view t2.bam | grep T4 | grep T5 | grep T6 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are preserved on read 2"
is "$(samtools view t3.bam | grep T1 | grep T2 | grep T3 | grep read1 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are preserved on paired read 1"
is "$(samtools view t3.bam | grep T4 | grep T5 | grep T6 | grep read2 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are preserved on paired read 2"
is "$(cat t1.gaf | grep T1 | grep T2 | grep T3 | wc -l | sed 's/^[[:space:]]*//')" "1" "GAF tags are preserved on read 1"
is "$(samtools view t1_chaining.bam | grep T1 | grep T2 | grep T3 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are preserved in chaining mode"
is "$(vg view -aj t1_chaining.gam | jq -r '.annotation.tags' | grep T1 | grep T2 | grep T3 | wc -l | sed 's/^[[:space:]]*//')" "1" "BAM tags are present in GAM in chaining mode"
is "$(cat t1_chaining.gaf | grep T1 | grep T2 | grep T3 | wc -l | sed 's/^[[:space:]]*//')" "1" "GAF tags are preserved in chaining mode"

# a uniform random sequence
printf "@read TG:Z:val\nGGCGACGTACTAGGGACTACAGTCCTTCGTCTTTCTCTCTCGACTCCGAA\n+\nHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n" > x.fq
vg giraffe -b chaining-sr -x x.xg -H x.gbwt -m x.shortread.withzip.min -z x.shortread.zipcodes -d x.dist -f x.fq --comments-as-tags -o GAF >x.gaf
is "$(cat x.gaf | grep 'TG:Z' | wc -l | sed 's/^[[:space:]]*//')" "1" "GAF tags are preserved in chaining mode for unmapped reads"


rm t1.bam t2.bam t3.bam t1_chaining.bam t1_chaining.gam t1_chaining.gaf t1.gaf tagged1.fq tagged2.fq x.fq
rm -f read.fq read.gam

# Attempts to use mismatched files fail
vg gbwt -G graphs/components_walks.gfa -g wrong.gbz
vg giraffe -Z wrong.gbz -d x.dist -m x.shortread.withzip.min -z x.shortread.zipcodes -f reads/small.middle.ref.fq > /dev/null 2> log.txt
is "${?}" "1" "mapping with mismatched GBZ and minimizer index fails"
is "$(grep -c 'error.*are not compatible' log.txt)" "1" "appropriate error message is printed"

rm t1.bam t2.bam t3.bam t1.gaf tagged1.fq tagged2.fq
rm -f read.fq read.gam

rm -rf explanation_*

vg giraffe -Z x.giraffe.gbz -f reads/small.middle.ref.indel.multi.fq --show-work --track-position -b chaining-sr > /dev/null 2>&1
Expand Down
Loading