diff --git a/src/Alignment.h b/src/Alignment.h index 04367c4..8b767df 100644 --- a/src/Alignment.h +++ b/src/Alignment.h @@ -43,6 +43,8 @@ struct ALIGNMENT { int endpos; // Copy number float copynum; + // Keep track of other STRs spanned by this read + std::string other_spanned_strs; // provide overloaded operators to compare // when used as a map key bool operator<(const ALIGNMENT& ref_pos1) const { diff --git a/src/BWAReadAligner.cpp b/src/BWAReadAligner.cpp index 35234f6..02f6f74 100644 --- a/src/BWAReadAligner.cpp +++ b/src/BWAReadAligner.cpp @@ -645,12 +645,22 @@ bool BWAReadAligner::SetSTRCoordinates(vector* good_left_alignments, msg << "[SetSTRCoordinates]: Found " << spanned_ref_strs.size() << " spanned STRs"; PrintMessageDieOnError(msg.str(), DEBUG); } + if (spanned_ref_strs.size() == 0) continue; + ALIGNMENT l_spanning_alignment, r_spanning_alignment; for (size_t j = 0; j < spanned_ref_strs.size(); j++) { - if (j >= 1) continue; // TODO: remove this, put field to keep track of if read spans more than 1 STR + if (j >= 1) { + // If a single read spans multiple STRs, keep track of the other ones + stringstream other_spanned_strs; + other_spanned_strs << spanned_ref_strs.at(j).chrom << ":" + << spanned_ref_strs.at(j).start << ":" + << repseqs.at(j) << ";"; + l_spanning_alignment.other_spanned_strs += other_spanned_strs.str(); + r_spanning_alignment.other_spanned_strs += other_spanned_strs.str(); + continue; + } float copynum = static_cast(spanned_ref_strs.at(j).stop-spanned_ref_strs.at(j).start+1)/ static_cast(repseqs.at(j).size()); copynum = ceilf(copynum * 10) / 10; // Round to 1 digit to match reference - ALIGNMENT l_spanning_alignment; l_spanning_alignment.id = refid; l_spanning_alignment.chrom = spanned_ref_strs.at(j).chrom; l_spanning_alignment.start = spanned_ref_strs.at(j).start; @@ -659,7 +669,6 @@ bool BWAReadAligner::SetSTRCoordinates(vector* good_left_alignments, l_spanning_alignment.strand = lalign.strand; l_spanning_alignment.pos = lalign.pos; l_spanning_alignment.copynum = copynum; - ALIGNMENT r_spanning_alignment; r_spanning_alignment.id = refid; r_spanning_alignment.chrom = spanned_ref_strs.at(j).chrom; r_spanning_alignment.start = spanned_ref_strs.at(j).start; @@ -675,9 +684,9 @@ bool BWAReadAligner::SetSTRCoordinates(vector* good_left_alignments, l_spanning_alignment.left = false; r_spanning_alignment.left = true; } - processed_left_alignments.push_back(l_spanning_alignment); - processed_right_alignments.push_back(r_spanning_alignment); } + processed_left_alignments.push_back(l_spanning_alignment); + processed_right_alignments.push_back(r_spanning_alignment); } *good_left_alignments = processed_left_alignments; *good_right_alignments = processed_right_alignments; @@ -771,6 +780,7 @@ bool BWAReadAligner::OutputAlignment(ReadPair* read_pair, read_pair->reads.at(aligned_read_num).right_flank_nuc.length(); read_pair->alternate_mappings = alternate_mappings; + read_pair->other_spanned_strs = left_alignment.other_spanned_strs; // checks to make sure the alignment is reasonable // coords make sense on forward strand diff --git a/src/ReadPair.cpp b/src/ReadPair.cpp index 77f6748..959afa4 100644 --- a/src/ReadPair.cpp +++ b/src/ReadPair.cpp @@ -27,4 +27,6 @@ void ReadPair::ResetAlignmentFlags() { read2_passed_alignment = false; found_unique_alignment = false; aligned_read_num = -1; + alternate_mappings = ""; + other_spanned_strs = ""; } diff --git a/src/ReadPair.h b/src/ReadPair.h index a532d7b..59e4d14 100644 --- a/src/ReadPair.h +++ b/src/ReadPair.h @@ -56,6 +56,8 @@ class ReadPair { bool treat_as_paired; // String for alternate mappings std::string alternate_mappings; + // Keep track of other STRs spanned by the primary read + std::string other_spanned_strs; /* Reset alignment flags */ void ResetAlignmentFlags(); diff --git a/src/SamFileWriter.cpp b/src/SamFileWriter.cpp index da7e95b..1a2be83 100644 --- a/src/SamFileWriter.cpp +++ b/src/SamFileWriter.cpp @@ -175,6 +175,10 @@ void SamFileWriter::WriteRecord(const ReadPair& read_pair) { if (!read_pair.alternate_mappings.empty()) { bam_alignment.AddTag("XA", "Z", read_pair.alternate_mappings); } + // XO: other spanned STRs + if (!read_pair.other_spanned_strs.empty()) { + bam_alignment.AddTag("XO", "Z", read_pair.other_spanned_strs); + } // XS: start pos of matching STR bam_alignment.AddTag("XS", "i", read_pair.reads. at(aligned_read_num).msStart); diff --git a/wishlist.txt b/wishlist.txt index b43a212..1a366ae 100644 --- a/wishlist.txt +++ b/wishlist.txt @@ -1,5 +1,6 @@ lobSTR wish list... some day these features will be added +- report STRs spanned by mate pair reads - ability to call more than 2 alleles at loci (such as DYS464) with multiple copies - improved remove duplicates algorithm - ability to easily compare lobSTR results to capillary electrophoresis callsets