Skip to content

Commit

Permalink
added tag to annotate reads spanning more than 1 STR
Browse files Browse the repository at this point in the history
mgymrek committed Sep 6, 2014
1 parent c8c7595 commit 3f36669
Showing 6 changed files with 26 additions and 5 deletions.
2 changes: 2 additions & 0 deletions src/Alignment.h
Original file line number Diff line number Diff line change
@@ -43,6 +43,8 @@ struct ALIGNMENT {
int endpos;
// Copy number
float copynum;
// Keep track of other STRs spanned by this read
std::string other_spanned_strs;
// provide overloaded operators to compare
// when used as a map key
bool operator<(const ALIGNMENT& ref_pos1) const {
20 changes: 15 additions & 5 deletions src/BWAReadAligner.cpp
Original file line number Diff line number Diff line change
@@ -645,12 +645,22 @@ bool BWAReadAligner::SetSTRCoordinates(vector<ALIGNMENT>* good_left_alignments,
msg << "[SetSTRCoordinates]: Found " << spanned_ref_strs.size() << " spanned STRs";
PrintMessageDieOnError(msg.str(), DEBUG);
}
if (spanned_ref_strs.size() == 0) continue;
ALIGNMENT l_spanning_alignment, r_spanning_alignment;
for (size_t j = 0; j < spanned_ref_strs.size(); j++) {
if (j >= 1) continue; // TODO: remove this, put field to keep track of if read spans more than 1 STR
if (j >= 1) {
// If a single read spans multiple STRs, keep track of the other ones
stringstream other_spanned_strs;
other_spanned_strs << spanned_ref_strs.at(j).chrom << ":"
<< spanned_ref_strs.at(j).start << ":"
<< repseqs.at(j) << ";";
l_spanning_alignment.other_spanned_strs += other_spanned_strs.str();
r_spanning_alignment.other_spanned_strs += other_spanned_strs.str();
continue;
}
float copynum = static_cast<float>(spanned_ref_strs.at(j).stop-spanned_ref_strs.at(j).start+1)/
static_cast<float>(repseqs.at(j).size());
copynum = ceilf(copynum * 10) / 10; // Round to 1 digit to match reference
ALIGNMENT l_spanning_alignment;
l_spanning_alignment.id = refid;
l_spanning_alignment.chrom = spanned_ref_strs.at(j).chrom;
l_spanning_alignment.start = spanned_ref_strs.at(j).start;
@@ -659,7 +669,6 @@ bool BWAReadAligner::SetSTRCoordinates(vector<ALIGNMENT>* good_left_alignments,
l_spanning_alignment.strand = lalign.strand;
l_spanning_alignment.pos = lalign.pos;
l_spanning_alignment.copynum = copynum;
ALIGNMENT r_spanning_alignment;
r_spanning_alignment.id = refid;
r_spanning_alignment.chrom = spanned_ref_strs.at(j).chrom;
r_spanning_alignment.start = spanned_ref_strs.at(j).start;
@@ -675,9 +684,9 @@ bool BWAReadAligner::SetSTRCoordinates(vector<ALIGNMENT>* good_left_alignments,
l_spanning_alignment.left = false;
r_spanning_alignment.left = true;
}
processed_left_alignments.push_back(l_spanning_alignment);
processed_right_alignments.push_back(r_spanning_alignment);
}
processed_left_alignments.push_back(l_spanning_alignment);
processed_right_alignments.push_back(r_spanning_alignment);
}
*good_left_alignments = processed_left_alignments;
*good_right_alignments = processed_right_alignments;
@@ -771,6 +780,7 @@ bool BWAReadAligner::OutputAlignment(ReadPair* read_pair,
read_pair->reads.at(aligned_read_num).right_flank_nuc.length();

read_pair->alternate_mappings = alternate_mappings;
read_pair->other_spanned_strs = left_alignment.other_spanned_strs;

// checks to make sure the alignment is reasonable
// coords make sense on forward strand
2 changes: 2 additions & 0 deletions src/ReadPair.cpp
Original file line number Diff line number Diff line change
@@ -27,4 +27,6 @@ void ReadPair::ResetAlignmentFlags() {
read2_passed_alignment = false;
found_unique_alignment = false;
aligned_read_num = -1;
alternate_mappings = "";
other_spanned_strs = "";
}
2 changes: 2 additions & 0 deletions src/ReadPair.h
Original file line number Diff line number Diff line change
@@ -56,6 +56,8 @@ class ReadPair {
bool treat_as_paired;
// String for alternate mappings
std::string alternate_mappings;
// Keep track of other STRs spanned by the primary read
std::string other_spanned_strs;

/* Reset alignment flags */
void ResetAlignmentFlags();
4 changes: 4 additions & 0 deletions src/SamFileWriter.cpp
Original file line number Diff line number Diff line change
@@ -175,6 +175,10 @@ void SamFileWriter::WriteRecord(const ReadPair& read_pair) {
if (!read_pair.alternate_mappings.empty()) {
bam_alignment.AddTag("XA", "Z", read_pair.alternate_mappings);
}
// XO: other spanned STRs
if (!read_pair.other_spanned_strs.empty()) {
bam_alignment.AddTag("XO", "Z", read_pair.other_spanned_strs);
}
// XS: start pos of matching STR
bam_alignment.AddTag("XS", "i", read_pair.reads.
at(aligned_read_num).msStart);
1 change: 1 addition & 0 deletions wishlist.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
lobSTR wish list... some day these features will be added

- report STRs spanned by mate pair reads
- ability to call more than 2 alleles at loci (such as DYS464) with multiple copies
- improved remove duplicates algorithm
- ability to easily compare lobSTR results to capillary electrophoresis callsets

0 comments on commit 3f36669

Please sign in to comment.