From bf5d326ab60795b80b63a33dc7282465776a0d9a Mon Sep 17 00:00:00 2001 From: Michael Kotliar Date: Thu, 21 Mar 2019 20:24:01 -0400 Subject: [PATCH] Bug fix When we are looking for the start and end intervals for the specific read, we iterate over our interval map using iterator that should be restored to its original value in following cases: 1. we failed to find start or end interval 2. we processed sliced read If not restored, we miss some reads due to the "jump" in coordinates of our annotation --- src/interval_map.cpp | 3 +++ src/thread.cpp | 36 ++++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/interval_map.cpp b/src/interval_map.cpp index ae0fc7a..dc4c548 100644 --- a/src/interval_map.cpp +++ b/src/interval_map.cpp @@ -14,17 +14,20 @@ bool find_start_segment_annotation (BamRecordPtr current_bam_record, BamRecord p assert (current_bam_record.use_count() > 0); if (current_bam_record->read_id == previous_bam_record.read_id and *allow_skip_rest){ freeze = false; + cout << " MOVE BAM" << endl; return false; } if (current_bam_record->start_pose < current_gtf_records_splitted_it->first.lower()) { freeze = false; // Set freeze to false to change current_bam_record allow_skip_rest.reset (new bool(true)); + cout << " MOVE BAM" << endl; return false; } allow_skip_rest.reset (new bool(false)); if (current_bam_record->start_pose >= current_gtf_records_splitted_it->first.upper()) { current_gtf_records_splitted_it++; + cout << " MOVE GTF" << endl; freeze = true; // Set freeze to true to prevent changing current_bam_record return false; } diff --git a/src/thread.cpp b/src/thread.cpp index 5a96a78..bf511a1 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -178,12 +178,6 @@ void process ( vector < std::map >::ite assert (current_bam_record.use_count() > 0); - // FOR DEBUG USE ONLY - // cout << endl << "Process read " << current_bam_record->read_id << " [" << - // current_bam_record->start_pose << "," << - // current_bam_record->end_pose << "] - " << - // current_bam_record->slices << " parts" << endl; - // get the number of parts from current bam record slice_number = current_bam_record->slices; @@ -198,7 +192,11 @@ void process ( vector < std::map >::ite current_slice = 0; iso_map.clear(); backup_current_gtf_records_splitted_it = current_gtf_records_splitted_it; // update the backup for gff iterator -// cout << "iso_map is cleared because of the new read" << endl; + cout << "GOT NEW READ: [" + << current_bam_record->start_pose << ", " + << current_bam_record->end_pose << "] - " + << current_bam_record->read_id << " - " + << current_bam_record->slices << " slice(s)" << endl; } // increment current_slice (position inside a spliced read) @@ -206,6 +204,12 @@ void process ( vector < std::map >::ite current_slice++; } + cout << " CURSOR COORDINATES:" << endl; + cout << " BAM - " << current_bam_record->start_pose << endl; + cout << " GTF - " << current_gtf_records_splitted_it->first.lower() << endl; + cout << " READ_ID - " << current_bam_record->read_id << endl; + cout << " SLICE - " << current_slice << endl; + // Find start segment annotation // If false call get_bam_record again. freeze is true if we need to skip the read and false if we want to skip annotation if (not find_start_segment_annotation(current_bam_record, previous_bam_record, current_gtf_records_splitted_it, freeze)) { @@ -214,6 +218,7 @@ void process ( vector < std::map >::ite if (current_bam_record->slices > 1 && current_slice > 1) { // cerr << "Rewind current_gtf_records_splitted_it" << endl; current_gtf_records_splitted_it = backup_current_gtf_records_splitted_it; + cout << " REWIND GTF: failed to find start segment" << endl; freeze = false; continue; } else { @@ -230,6 +235,7 @@ void process ( vector < std::map >::ite } if (current_slice == current_bam_record->slices && current_bam_record->slices > 1 && (not freeze)) { current_gtf_records_splitted_it = backup_current_gtf_records_splitted_it; + cout << " REWIND GTF: failed to find start segment" << endl; } continue; } @@ -242,8 +248,9 @@ void process ( vector < std::map >::ite // If false call get_bam_record with freeze = false ===> skip current bam record and get the next one interval_map::iterator temp_gtf_records_splitted_it = current_gtf_records_splitted_it; - if (not find_stop_segment_annotation(current_bam_record, temp_gtf_records_splitted_it, - gtf_records_splitted.end(), freeze)) { + if (not find_stop_segment_annotation(current_bam_record, temp_gtf_records_splitted_it, gtf_records_splitted.end(), freeze)) { + current_gtf_records_splitted_it = backup_current_gtf_records_splitted_it; + cout << " REWIND GTF: failed to find stop segment" << endl; continue; } @@ -332,7 +339,7 @@ void process ( vector < std::map >::ite gff_it->annotation->reads_count++; assert (gff_it->annotation.use_count() > 0); assert (bam_record_to_put_in_array.use_count() > 0); -// cout << " into annotation " << gff_it->annotation->exon_id << " from " << gff_it->annotation->isoform_id << " added read " << bam_record_to_put_in_array->read_id << endl; + cout << " into exon " << gff_it->annotation->exon_id << " from " << gff_it->annotation->isoform_id << " added read " << bam_record_to_put_in_array->read_id << endl; // updated weight array // string isoform = gff_it->annotation->isoform_id; // int j = iso_var_map[chrom][isoform].index; @@ -349,10 +356,10 @@ void process ( vector < std::map >::ite throw ("Error: dividing by zero"); } double weight = 1 / (global_koef * vertical_koef * (double) horizontal_koef); -// cout << "global_koef = " << global_koef << endl; -// cout << "vertical_koef = " << vertical_koef << endl; -// cout << "horizontal_koef = " << horizontal_koef << endl; -// cout << "weight = " << weight << endl; + cout << "global_koef = " << global_koef << endl; + cout << "vertical_koef = " << vertical_koef << endl; + cout << "horizontal_koef = " << horizontal_koef << endl; + cout << "weight = " << weight << endl; for (int i = start_i; i <= stop_i; i++) { weight_array[j][i] += weight; } @@ -371,6 +378,7 @@ void process ( vector < std::map >::ite // from its backup version if (slice_number > 1) { // rewind iterator back only if it was spliced read current_gtf_records_splitted_it = backup_current_gtf_records_splitted_it; // get iterator from the backup + cout << " REWIND GTF: after processing the sliced read" << endl; } } // set freeze to false to get new read from the bam file when calling function get_bam_record