Skip to content

Commit

Permalink
Added support for PAF output format for overlaps.
Browse files Browse the repository at this point in the history
  • Loading branch information
isovic committed Nov 14, 2015
1 parent 6146f48 commit 8ef2eb0
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 122 deletions.
122 changes: 0 additions & 122 deletions src/owler/owler.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,129 +38,7 @@
//#include "index/index_spaced_hash_fast.h"
#include "index/index_owler.h"

class OverlapResult {
public:
int64_t read_id;
int64_t ref_id;
float jaccard_score;
int64_t shared_minmers;
bool read_is_reverse; /// In the MHAP-like output, read is always considered to be forward oriented.
int64_t read_start;
int64_t read_end;
int64_t read_length;
bool ref_is_reverse;
int64_t ref_start;
int64_t ref_end;
int64_t ref_length;

int64_t front_id;
int64_t back_id;

std::string read_header;
std::string ref_header;
int64_t cov_bases_read;
int64_t cov_bases_ref;

OverlapResult() {
read_id = ref_id = shared_minmers = read_start = read_end = read_length = ref_start = ref_end = ref_length = 0;
jaccard_score = 0.0f;
read_is_reverse = ref_is_reverse = false;
front_id = back_id = 0;
cov_bases_read = cov_bases_ref = 0;
read_header = ref_header = "";
}

std::string GenerateMHAPLine() {
std::stringstream ret;
ret << (read_id + 1) << " "; /// read1_id
ret << (ref_id + 1) << " "; /// read2_id
ret << jaccard_score << " "; /// Jaccard score
ret << shared_minmers << " "; /// Shared minmers
ret << (read_is_reverse ? 1 : 0) << " "; /// A is reverse
ret << read_start << " ";
ret << read_end << " ";
ret << read_length << " ";
ret << (ref_is_reverse ? 1 : 0) << " ";
ret << ref_start << " ";
ret << ref_end << " ";
ret << ref_length;
return ret.str();
}

std::string GenerateAFGLine() {
std::stringstream ret;

// int64_t front_id = lcskpp_indices.front();
// int64_t back_id = lcskpp_indices.back();
//
// int64_t A_start = seed_hits[hits_start + back_id].query_pos;
// int64_t A_end = seed_hits[hits_start + front_id].query_pos + 12;
// int64_t B_start = seed_hits[hits_start + back_id].ref_pos;
// int64_t B_end = seed_hits[hits_start + front_id].ref_pos + 12;
//
// int64_t read1_id = read_id + 1;
// int64_t read2_id = ref_id + 1;
// int64_t score = std::min((A_end - A_start), (B_end - B_start));
//
// //ahg - Ahang. Length of the non-overlapping portion of the first read.
// //bhg - Bhang. Length of the non-overlapping portion of the second read.
// /// The position (alignment_start - clip_count_front) would be roughly where alignment of one reads starts on another.
// /// If this value is > 0, the first read starts within read2, and thus ahang needs to be negative (hence the '-' sign).
// int64_t ahang = 0; // - (alignment_start - clip_count_front);
// int64_t bhang = 0; // reference_length - (alignment_end + clip_count_back);
//

// std::string adj = (ref_is_reverse == false) ? OVERLAP_NORMAL : OVERLAP_INNIE;
// ret << "{OVL" << "\n";
// ret << "adj:" << adj << "\n";
// ret << "rds:" << (read_id + 1) << "," << (ref_id + 1) << "\n";
// ret << "scr:" << score << "\n";
// ret << "ahg:" << ahang << "\n";
// ret << "bhg:" << bhang << "\n";
// ret << "}";

return ret.str();
}

std::string GeneratePAFLine() {
std::stringstream ret;
// ret << ref_header << "\t";
if (read_header == "") { ret << (read_id + 1) << "\t"; } else { ret << TrimToFirstSpace(read_header) << "\t"; }
ret << read_length << "\t";
ret << read_start << "\t";
ret << read_end << "\t";

ret << (ref_is_reverse ? "-" : "+") << "\t"; /// Or maybe this should be (read_is_reverse)? In this case, upper and lower parts need to be switched (ref and read).
if (ref_header == "") { ret << (ref_id + 1) << "\t"; } else { ret << TrimToFirstSpace(ref_header) << "\t"; }
ret << ref_length << "\t";
ret << ref_start << "\t";
ret << ref_end << "\t";

ret << std::min(cov_bases_read, cov_bases_ref) << "\t";
ret << (((read_end - read_start) > (ref_end - ref_start)) ? (read_end - read_start) : (ref_end - ref_start)) << "\t";
ret << "255" << "\t";
ret << "cm:i:" << shared_minmers;

return ret.str();
}

};

struct overlapresult_sort_key
{
inline bool operator() (const OverlapResult& op1, const OverlapResult& op2) {
if (op1.read_id == op2.read_id) {
if (op1.ref_id == op2.ref_id) {
return (op1.jaccard_score > op2.jaccard_score);
} else {
return (op1.ref_id < op2.ref_id);
}
} else {
return (op1.read_id < op2.read_id);
}
return false;
}
};

class Owler {
public:
Expand Down
89 changes: 89 additions & 0 deletions src/owler/owler_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,92 @@ void OwlerData::Init(SingleSequence *read, std::vector<Index*> &indexes) {
seed_types.push_back(((IndexSpacedHash *) indexes[i])->get_shape_index());
}
}





OverlapResult::OverlapResult() {
read_id = ref_id = shared_minmers = read_start = read_end = read_length = ref_start = ref_end = ref_length = 0;
jaccard_score = 0.0f;
read_is_reverse = ref_is_reverse = false;
front_id = back_id = 0;
cov_bases_read = cov_bases_ref = 0;
read_header = ref_header = "";
}

std::string OverlapResult::GenerateMHAPLine() {
std::stringstream ret;
ret << (read_id + 1) << " "; /// read1_id
ret << (ref_id + 1) << " "; /// read2_id
ret << jaccard_score << " "; /// Jaccard score
ret << shared_minmers << " "; /// Shared minmers
ret << (read_is_reverse ? 1 : 0) << " "; /// A is reverse
ret << read_start << " ";
ret << read_end << " ";
ret << read_length << " ";
ret << (ref_is_reverse ? 1 : 0) << " ";
ret << ref_start << " ";
ret << ref_end << " ";
ret << ref_length;
return ret.str();
}

std::string OverlapResult::GenerateAFGLine() {
std::stringstream ret;

// int64_t front_id = lcskpp_indices.front();
// int64_t back_id = lcskpp_indices.back();
//
// int64_t A_start = seed_hits[hits_start + back_id].query_pos;
// int64_t A_end = seed_hits[hits_start + front_id].query_pos + 12;
// int64_t B_start = seed_hits[hits_start + back_id].ref_pos;
// int64_t B_end = seed_hits[hits_start + front_id].ref_pos + 12;
//
// int64_t read1_id = read_id + 1;
// int64_t read2_id = ref_id + 1;
// int64_t score = std::min((A_end - A_start), (B_end - B_start));
//
// //ahg - Ahang. Length of the non-overlapping portion of the first read.
// //bhg - Bhang. Length of the non-overlapping portion of the second read.
// /// The position (alignment_start - clip_count_front) would be roughly where alignment of one reads starts on another.
// /// If this value is > 0, the first read starts within read2, and thus ahang needs to be negative (hence the '-' sign).
// int64_t ahang = 0; // - (alignment_start - clip_count_front);
// int64_t bhang = 0; // reference_length - (alignment_end + clip_count_back);
//

// std::string adj = (ref_is_reverse == false) ? OVERLAP_NORMAL : OVERLAP_INNIE;
// ret << "{OVL" << "\n";
// ret << "adj:" << adj << "\n";
// ret << "rds:" << (read_id + 1) << "," << (ref_id + 1) << "\n";
// ret << "scr:" << score << "\n";
// ret << "ahg:" << ahang << "\n";
// ret << "bhg:" << bhang << "\n";
// ret << "}";

return ret.str();
}

std::string OverlapResult::GeneratePAFLine() {
std::stringstream ret;
if (read_header == "") { ret << (read_id + 1) << "\t"; } else { ret << TrimToFirstSpace(read_header) << "\t"; }
ret << read_length << "\t";
ret << read_start << "\t";
ret << read_end << "\t";

ret << (ref_is_reverse ? "-" : "+") << "\t"; /// Or maybe this should be (read_is_reverse)? In this case, upper and lower parts need to be switched (ref and read).
if (ref_header == "") { ret << (ref_id + 1) << "\t"; } else { ret << TrimToFirstSpace(ref_header) << "\t"; }
ret << ref_length << "\t";
ret << ref_start << "\t";
ret << ref_end << "\t";

float idty_read = ((float) cov_bases_read) / ((float) (read_end - read_start));
float idty_ref = ((float) cov_bases_ref) / ((float) (ref_end - ref_start));
ret << ((idty_read < idty_ref) ? cov_bases_ref : cov_bases_read) << "\t";
ret << ((idty_read < idty_ref) ? (ref_end - ref_start) : (read_end - read_start)) << "\t";

ret << "255" << "\t";
ret << "cm:i:" << shared_minmers;

return ret.str();
}
45 changes: 45 additions & 0 deletions src/owler/owler_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,51 @@ class OwlerData {
std::vector<Index*> *indexes_;
};

class OverlapResult {
public:
int64_t read_id;
int64_t ref_id;
float jaccard_score;
int64_t shared_minmers;
bool read_is_reverse; /// In the MHAP-like output, read is always considered to be forward oriented.
int64_t read_start;
int64_t read_end;
int64_t read_length;
bool ref_is_reverse;
int64_t ref_start;
int64_t ref_end;
int64_t ref_length;

int64_t front_id;
int64_t back_id;

std::string read_header;
std::string ref_header;
int64_t cov_bases_read;
int64_t cov_bases_ref;

OverlapResult();
std::string GenerateMHAPLine();
std::string GenerateAFGLine();
std::string GeneratePAFLine();
};

struct overlapresult_sort_key
{
inline bool operator() (const OverlapResult& op1, const OverlapResult& op2) {
if (op1.read_id == op2.read_id) {
if (op1.ref_id == op2.ref_id) {
return (op1.jaccard_score > op2.jaccard_score);
} else {
return (op1.ref_id < op2.ref_id);
}
} else {
return (op1.read_id < op2.read_id);
}
return false;
}
};

struct overlaps_greater_than_key
{
inline bool operator() (const PairwiseOverlapData& op1, const PairwiseOverlapData& op2) {
Expand Down

0 comments on commit 8ef2eb0

Please sign in to comment.