From ffc8740f7ac9d8efc58e0748a9e06f5422c5655b Mon Sep 17 00:00:00 2001 From: leandro Date: Thu, 27 Jun 2019 18:06:55 +0100 Subject: [PATCH] #151 --- include/kmergraphwithcoverage.h | 16 ++++++++++++---- src/kmergraphwithcoverage.cpp | 24 ++++++++++++++++-------- src/pangenome/pangraph.cpp | 4 ++-- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/include/kmergraphwithcoverage.h b/include/kmergraphwithcoverage.h index 6a33ce29..fd3be754 100644 --- a/include/kmergraphwithcoverage.h +++ b/include/kmergraphwithcoverage.h @@ -22,7 +22,7 @@ class KmerGraphWithCoverage { private: //each coverage is represented as a std::vector, each position of the vector representing the coverage of a sample //each coverage of a sample is represented by a std::pair, for the forward and reverse reads coverage of that sample - std::vector>> nodeIndex2SampleCoverage; + std::vector>> nodeIndex2SampleCoverage; uint32_t exp_depth_covg; float binomial_parameter_p; float negative_binomial_parameter_p; @@ -48,13 +48,18 @@ class KmerGraphWithCoverage { virtual ~KmerGraphWithCoverage() = default; //getter + //TODO: we should return a uint16_t instead of a uint32_t, but I did not want to change the interface + //TODO: converting uint16_t to uint32_t is safe anyway + //TODO: some parts of the code accumulates under the type of this variable, so it might be dangerous to change to uint16_t wihtout careful analysis + //TODO: leaving to uint32_t to be safe uint32_t get_covg(uint32_t node_id, bool strand, uint32_t sample_id) const; + uint32_t get_num_reads() const { return num_reads; } uint32_t get_total_number_samples() const {return total_number_samples; } //setters void increment_covg(uint32_t node_id, bool strand, uint32_t sample_id); - void set_covg(uint32_t node_id, uint32_t value, bool strand, uint32_t sample_id); + void set_covg(uint32_t node_id, uint16_t value, bool strand, uint32_t sample_id); void set_exp_depth_covg(const uint32_t); void set_binomial_parameter_p(const float); void set_negative_binomial_parameters(const float &, const float &); @@ -62,8 +67,11 @@ class KmerGraphWithCoverage { void set_num_reads(uint32_t num_reads) { this->num_reads = num_reads; } void zeroCoverages() { - for (auto &sampleCoverage: nodeIndex2SampleCoverage) - sampleCoverage = std::vector>(total_number_samples); + for (auto &sampleCoverage: nodeIndex2SampleCoverage) { + sampleCoverage = std::vector>(total_number_samples); + sampleCoverage.shrink_to_fit(); //tries to make this information as compact as possible (TODO: use sdsl?) + } + } float nbin_prob(uint32_t, const uint32_t &sample_id); diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp index 0ffebc03..acb4bf05 100644 --- a/src/kmergraphwithcoverage.cpp +++ b/src/kmergraphwithcoverage.cpp @@ -5,6 +5,7 @@ #include /* NULL */ #include /* srand, rand */ #include +#include #include #include @@ -36,10 +37,15 @@ void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate) { void KmerGraphWithCoverage::increment_covg(uint32_t node_id, bool strand, uint32_t sample_id) { assert(this->nodeIndex2SampleCoverage[node_id].size() > sample_id); + //get a pointer to the value we want to increment + uint16_t* coverage_ptr = nullptr; if (strand) - this->nodeIndex2SampleCoverage[node_id][sample_id].first++; + coverage_ptr = &(this->nodeIndex2SampleCoverage[node_id][sample_id].first); else - this->nodeIndex2SampleCoverage[node_id][sample_id].second++; + coverage_ptr = &(this->nodeIndex2SampleCoverage[node_id][sample_id].second); + + if ((*coverage_ptr) < UINT16_MAX) //checks if it is safe to increase the coverage (no overflow) + ++(*coverage_ptr); } uint32_t KmerGraphWithCoverage::get_covg(uint32_t node_id, bool strand, uint32_t sample_id) const { @@ -48,12 +54,12 @@ uint32_t KmerGraphWithCoverage::get_covg(uint32_t node_id, bool strand, uint32_t return 0; if (strand) - return this->nodeIndex2SampleCoverage[node_id][sample_id].first; + return (uint32_t)(this->nodeIndex2SampleCoverage[node_id][sample_id].first); else - return this->nodeIndex2SampleCoverage[node_id][sample_id].second; + return (uint32_t)(this->nodeIndex2SampleCoverage[node_id][sample_id].second); } -void KmerGraphWithCoverage::set_covg(uint32_t node_id, uint32_t value, bool strand, uint32_t sample_id) { +void KmerGraphWithCoverage::set_covg(uint32_t node_id, uint16_t value, bool strand, uint32_t sample_id) { assert(this->nodeIndex2SampleCoverage[node_id].size() > sample_id); if (strand) this->nodeIndex2SampleCoverage[node_id][sample_id].first = value; @@ -416,6 +422,7 @@ void KmerGraphWithCoverage::save(const std::string &filepath, const std::shared_ } //TODO: THIS SHOULD BE RECODED, WE ARE DUPLICATING CODE HERE (SEE KmerGraph::load())!!! +//TODO: remove this method? void KmerGraphWithCoverage::load(const std::string &filepath) { //TODO: this might be dangerous, recode this? auto kmer_prg = const_cast(this->kmer_prg); @@ -425,7 +432,8 @@ void KmerGraphWithCoverage::load(const std::string &filepath) { std::string line; std::vector split_line; std::stringstream ss; - uint32_t id = 0, covg, from, to; + uint32_t id = 0, from, to; + uint16_t covg; prg::Path p; uint32_t num_nodes = 0; @@ -466,9 +474,9 @@ void KmerGraphWithCoverage::load(const std::string &filepath) { if (kmer_prg->k == 0 and p.length() > 0) { kmer_prg->k = p.length(); } - covg = stoi(split(split_line[3], "FC:i:")[0]); + covg = (uint16_t)(stoul(split(split_line[3], "FC:i:")[0])); set_covg(n->id, covg, 0, sample_id); - covg = stoi(split(split_line[4], "RC:i:")[0]); + covg = (uint16_t)(stoul(split(split_line[4], "RC:i:")[0])); set_covg(n->id, covg, 1, sample_id); if (split_line.size() >= 6) { n->num_AT = std::stoi(split_line[5]); diff --git a/src/pangenome/pangraph.cpp b/src/pangenome/pangraph.cpp index 06f473a0..2404e315 100644 --- a/src/pangenome/pangraph.cpp +++ b/src/pangenome/pangraph.cpp @@ -346,8 +346,8 @@ void pangenome::Graph::copy_coverages_to_kmergraphs(const Graph &ref_pangraph, c for (auto & kmergraph_node_ptr : pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes){ const auto &knode_id = kmergraph_node_ptr->id; assert(knode_id < ref_node.kmer_prg_with_coverage.kmer_prg->nodes.size()); - pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, ref_node.kmer_prg_with_coverage.get_covg(knode_id, 0, ref_sample_id), 0, sample_id); - pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, ref_node.kmer_prg_with_coverage.get_covg(knode_id, 1, ref_sample_id), 1, sample_id); + pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, (uint16_t)(ref_node.kmer_prg_with_coverage.get_covg(knode_id, 0, ref_sample_id)), 0, sample_id); + pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, (uint16_t)(ref_node.kmer_prg_with_coverage.get_covg(knode_id, 1, ref_sample_id)), 1, sample_id); } } }