From fad48b22afda146d6bc883c6cf9a3dda9fb94efe Mon Sep 17 00:00:00 2001 From: Thomas Willems Date: Wed, 19 Mar 2014 17:10:53 -0400 Subject: [PATCH] Added function documentation and integer casts to removed signed-unsigned comparison warnings --- src/AlignmentFilters.cpp | 20 ++++----- src/AlignmentFilters.h | 6 ++- src/ZAlgorithm.cpp | 93 ++-------------------------------------- src/ZAlgorithm.h | 25 +++++++++++ 4 files changed, 42 insertions(+), 102 deletions(-) diff --git a/src/AlignmentFilters.cpp b/src/AlignmentFilters.cpp index 3fb3a2c..23cc72a 100644 --- a/src/AlignmentFilters.cpp +++ b/src/AlignmentFilters.cpp @@ -186,10 +186,7 @@ namespace AlignmentFilters { bases = aln->nucleotides.substr(start_index, num_bases); } - - /* - - */ + bool HasLargestEndMatches(AlignedRead* aln, const string& ref_seq, int ref_seq_start, int max_external, int max_internal){ // Extract sequence, start and end coordinates of read after clipping string bases; @@ -197,16 +194,16 @@ namespace AlignmentFilters { GetUnclippedInfo(aln, bases, start, end); // Check that the prefix match is the longest - if (start >= ref_seq_start && start < ref_seq_start + ref_seq.size()){ + if (start >= ref_seq_start && start < ref_seq_start + static_cast(ref_seq.size())){ int start_index = start - ref_seq_start; int start = max(0, start_index - max_external); - int stop = min((int)(ref_seq.size()-1), start_index + max_internal); + int stop = min(static_cast((ref_seq.size()-1)), start_index + max_internal); vector match_counts; ZAlgorithm::GetPrefixMatchCounts(bases, ref_seq, start, stop, match_counts); int align_index = start_index - start; int num_matches = match_counts[align_index]; - for (unsigned int i = 0; i < match_counts.size(); i++){ + for (int i = 0; i < static_cast(match_counts.size()); i++){ if (i == align_index) continue; if (match_counts[i] >= num_matches) @@ -215,23 +212,22 @@ namespace AlignmentFilters { } // Check that the suffix match is the longest - if (end >= ref_seq_start && end < ref_seq_start + ref_seq.size()){ + if (end >= ref_seq_start && end < ref_seq_start + static_cast(ref_seq.size())){ int end_index = end - ref_seq_start; int start = max(0, end_index - max_internal); - int stop = min((int)(ref_seq.size()-1), end_index + max_external); + int stop = min(static_cast(ref_seq.size()-1), end_index + max_external); vector match_counts; ZAlgorithm::GetSuffixMatchCounts(bases, ref_seq, start, stop, match_counts); int align_index = end_index - start; int num_matches = match_counts[align_index]; - for (unsigned int i = 0; i < match_counts.size(); i++){ + for (int i = 0; i < static_cast(match_counts.size()); i++){ if (i == align_index) continue; if (match_counts[i] >= num_matches) return false; } - } - + } return true; } } diff --git a/src/AlignmentFilters.h b/src/AlignmentFilters.h index ec45bf7..e548ae2 100644 --- a/src/AlignmentFilters.h +++ b/src/AlignmentFilters.h @@ -40,7 +40,11 @@ namespace AlignmentFilters { /* Minimum distances from 5' and 3' end of reads to first indel. If no such indel exists, returns (-1,-1). */ std::pair GetEndDistToIndel(AlignedRead* aln); - /* Returns true iff the alignment ends match maximally compared to other positions within the specified window. */ + /* Returns true iff the alignment has: + 1) a maximal matching prefix compared to alignments that start [-max_upstream, max_downstream] from the 5' alignment position of the read + 2) a maximal matching suffix compared to alignments that end [-max_downstream, max_upstream] from the 3' alignment position of the read + Ignores clipped bases when performing these comparions + */ bool HasLargestEndMatches(AlignedRead* aln, const std::string& ref_seq, int ref_seq_start, int max_upstream, int max_downstream); } diff --git a/src/ZAlgorithm.cpp b/src/ZAlgorithm.cpp index d4745c4..9ec1a9b 100644 --- a/src/ZAlgorithm.cpp +++ b/src/ZAlgorithm.cpp @@ -28,47 +28,6 @@ along with lobSTR. If not, see . namespace ZAlgorithm{ - /* - void suffix_helper(int start, const std::string& s1, const std::string& s2, - std::vector& s1_matches, std::vector& num_matches){ - num_matches = std::vector(s2.size(), -1); - int leftmost = s2.size(), right_index = s2.size(); - for (int i = start; i >= 0; i--){ - if (i <= leftmost){ - int index_a = s1.size()-1, index_b = i; - while (index_a >= 0 && index_b >= 0 && s1[index_a] == s2[index_b]){ - index_a--; - index_b--; - } - num_matches[i] = i - index_b; - if (index_b < i){ - right_index = i; - leftmost = index_b + 1; - } - } - else { - int twin = i - right_index + s1.size()-1; - int new_left = i - s1_matches[twin] + 1; - if (new_left > leftmost) - num_matches[i] = s1_matches[twin]; - else if (new_left < leftmost) - num_matches[i] = i-leftmost+1; - else { - int index_a = s1.size()-2-i+leftmost, index_b = leftmost-1; - while (index_a >= 0 && index_b >= 0 && s1[index_a] == s2[index_b]){ - index_a--; - index_b--; - } - num_matches[i] = i-index_b; - right_index = i; - leftmost = index_b + 1; - } - } - } - } - */ - - void suffix_helper(const std::string& s1, const std::string& s2, int s2_left, int s2_right, std::vector& s1_matches, std::vector& num_matches){ num_matches = std::vector(s2_right - s2_left + 1, -1); @@ -107,49 +66,6 @@ namespace ZAlgorithm{ } } - - - - /* - void prefix_helper(unsigned int start, const std::string& s1, const std::string& s2, - std::vector& s1_matches, std::vector& num_matches){ - num_matches = std::vector(s2.size(), -1); - int rightmost = 0, left_index = 0; - for (int i = start; i < s2.size(); i++){ - if (i >= rightmost){ - int index_a = 0, index_b = i; - while (index_a < s1.size() && index_b < s2.size() && s1[index_a] == s2[index_b]){ - index_a++; - index_b++; - } - num_matches[i] = index_b - i; - if (index_b > i){ - left_index = i; - rightmost = index_b - 1; - } - } - else { - int twin = i - left_index; - int new_right = i + s1_matches[twin] - 1; - if (new_right < rightmost) - num_matches[i] = s1_matches[twin]; - else if (new_right > rightmost) - num_matches[i] = rightmost-i+1; - else { - int index_a = rightmost+1-i, index_b = rightmost+1; - while (index_a < s1.size() && index_b < s2.size() && s1[index_a] == s2[index_b]){ - index_a++; - index_b++; - } - num_matches[i] = index_b - i; - left_index = i; - rightmost = index_b - 1; - } - } - } - } - */ - void prefix_helper(const std::string& s1, const std::string& s2, int s2_left, int s2_right, std::vector& s1_matches, std::vector& num_matches, int offset){ num_matches = std::vector(s2_right-s2_left+1+offset, -1); @@ -157,7 +73,7 @@ namespace ZAlgorithm{ for (int i = s2_left; i <= s2_right; i++){ if (i >= rightmost){ int index_a = 0, index_b = i; - while (index_a < s1.size() && index_b < s2.size() && s1[index_a] == s2[index_b]){ + while (index_a < static_cast(s1.size()) && index_b < static_cast(s2.size()) && s1[index_a] == s2[index_b]){ index_a++; index_b++; } @@ -176,7 +92,7 @@ namespace ZAlgorithm{ num_matches[i-s2_left+offset] = rightmost-i+1; else { int index_a = rightmost+1-i, index_b = rightmost+1; - while (index_a < s1.size() && index_b < s2.size() && s1[index_a] == s2[index_b]){ + while (index_a < static_cast(s1.size()) && index_b < static_cast(s2.size()) && s1[index_a] == s2[index_b]){ index_a++; index_b++; } @@ -189,7 +105,6 @@ namespace ZAlgorithm{ } - void GetPrefixMatchCounts(const std::string& s1, const std::string& s2, std::vector& num_matches) { std::vector s1_matches; prefix_helper(s1, s1, 1, s1.size()-1, s1_matches, s1_matches, 1); @@ -203,7 +118,7 @@ namespace ZAlgorithm{ } void GetPrefixMatchCounts(const std::string& s1, const std::string& s2, int s2_start, int s2_stop, std::vector& num_matches) { - if (s2_start < 0 or s2_stop >= s2.size()) + if (s2_start < 0 or s2_stop >= static_cast(s2.size())) PrintMessageDieOnError("Invalid string indices provided to GetPrefixMatchCounts", ERROR); std::vector s1_matches; prefix_helper(s1, s1, 1, s1.size()-1, s1_matches, s1_matches, 1); @@ -211,7 +126,7 @@ namespace ZAlgorithm{ } void GetSuffixMatchCounts(const std::string& s1, const std::string& s2, int s2_start, int s2_stop, std::vector& num_matches) { - if (s2_start < 0 or s2_stop >= s2.size()) + if (s2_start < 0 or s2_stop >= static_cast(s2.size())) PrintMessageDieOnError("Invalid string indices provided to GetSuffixMatchCounts", ERROR); std::vector s1_matches; suffix_helper(s1, s1, 0, s1.size()-2, s1_matches, s1_matches); diff --git a/src/ZAlgorithm.h b/src/ZAlgorithm.h index 14ce501..777fbc8 100644 --- a/src/ZAlgorithm.h +++ b/src/ZAlgorithm.h @@ -23,8 +23,33 @@ along with lobSTR. If not, see . namespace ZAlgorithm{ + /* + * For each position in s2, calculates the length of the matching prefix of s1 and s2[i...] + * and stores it in num_matches[i]. The provided vector is cleared and sized appropriately. + * Runs in O(length_of_s1 + length_of_s2) + */ void GetPrefixMatchCounts(const std::string& s1, const std::string& s2, std::vector& num_matches); + + /* + * For each position in s2, calculates the length of the matching suffix of s1 and s2[0...i] + * and stores it in num_matches[i]. The provided vector is cleared and sized appropriately. + * Runs in O(length_of_s1 + length_of_s2) + */ void GetSuffixMatchCounts(const std::string& s1, const std::string& s2, std::vector& num_matches); + + /* + * For each position i in s2 in the range [s2_start, s2_stop], calculates the length of + * the matching prefix of s1 and s2[i...] and stores it in num_matches[i-s2_start]. + * The provided vector is cleared and sized appropriately. + * Runs in O(length_of_s1 + size_of_s2_range) + */ void GetPrefixMatchCounts(const std::string& s1, const std::string& s2, int s2_start, int s2_stop, std::vector& num_matches); + + /* + * For each position i in s2 in the range [s2_start, s2_stop], calculates the length of + * the matching suffix of s1 and s2[0...i] and stores it in num_matches[i-s2_start]. + * The provided vector is cleared and sized appropriately. + * Runs in O(length_of_s1 + size_of_s2_range) + */ void GetSuffixMatchCounts(const std::string& s1, const std::string& s2, int s2_start, int s2_stop, std::vector& num_matches); }