diff --git a/src/owler/owler.h b/src/owler/owler.h index fa3319b..e3f26d5 100644 --- a/src/owler/owler.h +++ b/src/owler/owler.h @@ -38,129 +38,7 @@ //#include "index/index_spaced_hash_fast.h" #include "index/index_owler.h" -class OverlapResult { - public: - int64_t read_id; - int64_t ref_id; - float jaccard_score; - int64_t shared_minmers; - bool read_is_reverse; /// In the MHAP-like output, read is always considered to be forward oriented. - int64_t read_start; - int64_t read_end; - int64_t read_length; - bool ref_is_reverse; - int64_t ref_start; - int64_t ref_end; - int64_t ref_length; - - int64_t front_id; - int64_t back_id; - - std::string read_header; - std::string ref_header; - int64_t cov_bases_read; - int64_t cov_bases_ref; - - OverlapResult() { - read_id = ref_id = shared_minmers = read_start = read_end = read_length = ref_start = ref_end = ref_length = 0; - jaccard_score = 0.0f; - read_is_reverse = ref_is_reverse = false; - front_id = back_id = 0; - cov_bases_read = cov_bases_ref = 0; - read_header = ref_header = ""; - } - - std::string GenerateMHAPLine() { - std::stringstream ret; - ret << (read_id + 1) << " "; /// read1_id - ret << (ref_id + 1) << " "; /// read2_id - ret << jaccard_score << " "; /// Jaccard score - ret << shared_minmers << " "; /// Shared minmers - ret << (read_is_reverse ? 1 : 0) << " "; /// A is reverse - ret << read_start << " "; - ret << read_end << " "; - ret << read_length << " "; - ret << (ref_is_reverse ? 1 : 0) << " "; - ret << ref_start << " "; - ret << ref_end << " "; - ret << ref_length; - return ret.str(); - } - - std::string GenerateAFGLine() { - std::stringstream ret; - -// int64_t front_id = lcskpp_indices.front(); -// int64_t back_id = lcskpp_indices.back(); -// -// int64_t A_start = seed_hits[hits_start + back_id].query_pos; -// int64_t A_end = seed_hits[hits_start + front_id].query_pos + 12; -// int64_t B_start = seed_hits[hits_start + back_id].ref_pos; -// int64_t B_end = seed_hits[hits_start + front_id].ref_pos + 12; -// -// int64_t read1_id = read_id + 1; -// int64_t read2_id = ref_id + 1; -// int64_t score = std::min((A_end - A_start), (B_end - B_start)); -// -// //ahg - Ahang. Length of the non-overlapping portion of the first read. -// //bhg - Bhang. Length of the non-overlapping portion of the second read. -// /// The position (alignment_start - clip_count_front) would be roughly where alignment of one reads starts on another. -// /// If this value is > 0, the first read starts within read2, and thus ahang needs to be negative (hence the '-' sign). -// int64_t ahang = 0; // - (alignment_start - clip_count_front); -// int64_t bhang = 0; // reference_length - (alignment_end + clip_count_back); -// -// std::string adj = (ref_is_reverse == false) ? OVERLAP_NORMAL : OVERLAP_INNIE; -// ret << "{OVL" << "\n"; -// ret << "adj:" << adj << "\n"; -// ret << "rds:" << (read_id + 1) << "," << (ref_id + 1) << "\n"; -// ret << "scr:" << score << "\n"; -// ret << "ahg:" << ahang << "\n"; -// ret << "bhg:" << bhang << "\n"; -// ret << "}"; - - return ret.str(); - } - - std::string GeneratePAFLine() { - std::stringstream ret; -// ret << ref_header << "\t"; - if (read_header == "") { ret << (read_id + 1) << "\t"; } else { ret << TrimToFirstSpace(read_header) << "\t"; } - ret << read_length << "\t"; - ret << read_start << "\t"; - ret << read_end << "\t"; - - ret << (ref_is_reverse ? "-" : "+") << "\t"; /// Or maybe this should be (read_is_reverse)? In this case, upper and lower parts need to be switched (ref and read). - if (ref_header == "") { ret << (ref_id + 1) << "\t"; } else { ret << TrimToFirstSpace(ref_header) << "\t"; } - ret << ref_length << "\t"; - ret << ref_start << "\t"; - ret << ref_end << "\t"; - - ret << std::min(cov_bases_read, cov_bases_ref) << "\t"; - ret << (((read_end - read_start) > (ref_end - ref_start)) ? (read_end - read_start) : (ref_end - ref_start)) << "\t"; - ret << "255" << "\t"; - ret << "cm:i:" << shared_minmers; - - return ret.str(); - } - -}; - -struct overlapresult_sort_key -{ - inline bool operator() (const OverlapResult& op1, const OverlapResult& op2) { - if (op1.read_id == op2.read_id) { - if (op1.ref_id == op2.ref_id) { - return (op1.jaccard_score > op2.jaccard_score); - } else { - return (op1.ref_id < op2.ref_id); - } - } else { - return (op1.read_id < op2.read_id); - } - return false; - } -}; class Owler { public: diff --git a/src/owler/owler_data.cc b/src/owler/owler_data.cc index 6f976be..c7fcb91 100644 --- a/src/owler/owler_data.cc +++ b/src/owler/owler_data.cc @@ -95,3 +95,92 @@ void OwlerData::Init(SingleSequence *read, std::vector &indexes) { seed_types.push_back(((IndexSpacedHash *) indexes[i])->get_shape_index()); } } + + + + + +OverlapResult::OverlapResult() { + read_id = ref_id = shared_minmers = read_start = read_end = read_length = ref_start = ref_end = ref_length = 0; + jaccard_score = 0.0f; + read_is_reverse = ref_is_reverse = false; + front_id = back_id = 0; + cov_bases_read = cov_bases_ref = 0; + read_header = ref_header = ""; +} + +std::string OverlapResult::GenerateMHAPLine() { + std::stringstream ret; + ret << (read_id + 1) << " "; /// read1_id + ret << (ref_id + 1) << " "; /// read2_id + ret << jaccard_score << " "; /// Jaccard score + ret << shared_minmers << " "; /// Shared minmers + ret << (read_is_reverse ? 1 : 0) << " "; /// A is reverse + ret << read_start << " "; + ret << read_end << " "; + ret << read_length << " "; + ret << (ref_is_reverse ? 1 : 0) << " "; + ret << ref_start << " "; + ret << ref_end << " "; + ret << ref_length; + return ret.str(); +} + +std::string OverlapResult::GenerateAFGLine() { + std::stringstream ret; + +// int64_t front_id = lcskpp_indices.front(); +// int64_t back_id = lcskpp_indices.back(); +// +// int64_t A_start = seed_hits[hits_start + back_id].query_pos; +// int64_t A_end = seed_hits[hits_start + front_id].query_pos + 12; +// int64_t B_start = seed_hits[hits_start + back_id].ref_pos; +// int64_t B_end = seed_hits[hits_start + front_id].ref_pos + 12; +// +// int64_t read1_id = read_id + 1; +// int64_t read2_id = ref_id + 1; +// int64_t score = std::min((A_end - A_start), (B_end - B_start)); +// +// //ahg - Ahang. Length of the non-overlapping portion of the first read. +// //bhg - Bhang. Length of the non-overlapping portion of the second read. +// /// The position (alignment_start - clip_count_front) would be roughly where alignment of one reads starts on another. +// /// If this value is > 0, the first read starts within read2, and thus ahang needs to be negative (hence the '-' sign). +// int64_t ahang = 0; // - (alignment_start - clip_count_front); +// int64_t bhang = 0; // reference_length - (alignment_end + clip_count_back); +// + +// std::string adj = (ref_is_reverse == false) ? OVERLAP_NORMAL : OVERLAP_INNIE; +// ret << "{OVL" << "\n"; +// ret << "adj:" << adj << "\n"; +// ret << "rds:" << (read_id + 1) << "," << (ref_id + 1) << "\n"; +// ret << "scr:" << score << "\n"; +// ret << "ahg:" << ahang << "\n"; +// ret << "bhg:" << bhang << "\n"; +// ret << "}"; + + return ret.str(); +} + +std::string OverlapResult::GeneratePAFLine() { + std::stringstream ret; + if (read_header == "") { ret << (read_id + 1) << "\t"; } else { ret << TrimToFirstSpace(read_header) << "\t"; } + ret << read_length << "\t"; + ret << read_start << "\t"; + ret << read_end << "\t"; + + ret << (ref_is_reverse ? "-" : "+") << "\t"; /// Or maybe this should be (read_is_reverse)? In this case, upper and lower parts need to be switched (ref and read). + if (ref_header == "") { ret << (ref_id + 1) << "\t"; } else { ret << TrimToFirstSpace(ref_header) << "\t"; } + ret << ref_length << "\t"; + ret << ref_start << "\t"; + ret << ref_end << "\t"; + + float idty_read = ((float) cov_bases_read) / ((float) (read_end - read_start)); + float idty_ref = ((float) cov_bases_ref) / ((float) (ref_end - ref_start)); + ret << ((idty_read < idty_ref) ? cov_bases_ref : cov_bases_read) << "\t"; + ret << ((idty_read < idty_ref) ? (ref_end - ref_start) : (read_end - read_start)) << "\t"; + + ret << "255" << "\t"; + ret << "cm:i:" << shared_minmers; + + return ret.str(); +} diff --git a/src/owler/owler_data.h b/src/owler/owler_data.h index cda98ee..743332a 100644 --- a/src/owler/owler_data.h +++ b/src/owler/owler_data.h @@ -142,6 +142,51 @@ class OwlerData { std::vector *indexes_; }; +class OverlapResult { + public: + int64_t read_id; + int64_t ref_id; + float jaccard_score; + int64_t shared_minmers; + bool read_is_reverse; /// In the MHAP-like output, read is always considered to be forward oriented. + int64_t read_start; + int64_t read_end; + int64_t read_length; + bool ref_is_reverse; + int64_t ref_start; + int64_t ref_end; + int64_t ref_length; + + int64_t front_id; + int64_t back_id; + + std::string read_header; + std::string ref_header; + int64_t cov_bases_read; + int64_t cov_bases_ref; + + OverlapResult(); + std::string GenerateMHAPLine(); + std::string GenerateAFGLine(); + std::string GeneratePAFLine(); +}; + +struct overlapresult_sort_key +{ + inline bool operator() (const OverlapResult& op1, const OverlapResult& op2) { + if (op1.read_id == op2.read_id) { + if (op1.ref_id == op2.ref_id) { + return (op1.jaccard_score > op2.jaccard_score); + } else { + return (op1.ref_id < op2.ref_id); + } + } else { + return (op1.read_id < op2.read_id); + } + return false; + } +}; + struct overlaps_greater_than_key { inline bool operator() (const PairwiseOverlapData& op1, const PairwiseOverlapData& op2) {