Skip to content

Commit

Permalink
Nov 5, 2024: Reserve memory and cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Nov 11, 2024
1 parent f3eb7c6 commit 8e94d76
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 29 deletions.
30 changes: 27 additions & 3 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,33 @@ class Teloscope {
std::vector<float> entropyValues; // Total entropy values
std::vector<float> gcContentValues; // Total GC content values

float getShannonEntropy(const uint32_t nucleotideCounts[4], uint32_t windowSize);
float getGCContent(const uint32_t nucleotideCounts[4], uint32_t windowSize);
void getPatternDensities(WindowData& windowData, uint32_t windowSize);

inline float getShannonEntropy(const uint32_t nucleotideCounts[4], uint32_t windowSize) {
float entropy = 0.0;
for (int i = 0; i < 4; ++i) {
if (nucleotideCounts[i] > 0) {
float probability = static_cast<float>(nucleotideCounts[i]) / windowSize;
entropy -= probability * std::log2(probability);
}
}
return entropy;
}


inline float getGCContent(const uint32_t nucleotideCounts[4], uint32_t windowSize) {
uint32_t gcCount = nucleotideCounts[1] + nucleotideCounts[2]; // Indices: 1 = C, 2 = G
return static_cast<float>(gcCount) / windowSize * 100.0;
}


inline void getPatternDensities(WindowData& windowData, uint32_t windowSize) {
for (auto &entry : windowData.patternMap) {
auto &pattern = entry.first;
auto &data = entry.second;
data.density = static_cast<float>(data.count * pattern.size()) / windowSize;
}
}


float getMean(const std::vector<float>& values);
float getMedian(std::vector<float> values);
Expand Down
29 changes: 29 additions & 0 deletions include/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <iostream>
#include <algorithm>
#include <stdexcept>
// #include <numeric>

void getCombinations(const std::string &pattern, std::string &current, size_t index, std::vector<std::string> &combinations);

Expand All @@ -16,4 +17,32 @@ std::unordered_map<std::string, uint8_t> getHammingDistances(
const std::pair<std::string, std::string> &canonicalPatterns
);

// inline float getMean(const std::vector<float>& values) {
// if (values.empty()) return 0.0;
// float sum = std::accumulate(values.begin(), values.end(), 0.0);
// return sum / values.size();
// }

// inline float getMedian(std::vector<float> values) {
// if (values.empty()) return 0.0;
// std::sort(values.begin(), values.end());
// size_t size = values.size();
// if (size % 2 == 0) {
// return (values[size / 2 - 1] + values[size / 2]) / 2;
// } else {
// return values[size / 2];
// }
// }

// inline float getMin(const std::vector<float>& values) {
// if (values.empty()) return 0.0;
// return *std::min_element(values.begin(), values.end());
// }

// inline float getMax(const std::vector<float>& values) {
// if (values.empty()) return 0.0;
// return *std::max_element(values.begin(), values.end());
// }


#endif // TOOLS_H
34 changes: 8 additions & 26 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,32 +41,6 @@ void Trie::insertPattern(const std::string& pattern) {
}


float Teloscope::getShannonEntropy(const uint32_t nucleotideCounts[4], uint32_t windowSize) {
float entropy = 0.0;
for (int i = 0; i < 4; ++i) {
if (nucleotideCounts[i] > 0) {
float probability = static_cast<float>(nucleotideCounts[i]) / windowSize;
entropy -= probability * std::log2(probability);
}
}
return entropy;
}

float Teloscope::getGCContent(const uint32_t nucleotideCounts[4], uint32_t windowSize) {
uint32_t gcCount = nucleotideCounts[1] + nucleotideCounts[2]; // Index 1 = C, Index 2 = G
return float(gcCount) / windowSize * 100.0;
}


void Teloscope::getPatternDensities(WindowData& windowData, uint32_t windowSize) {
for (auto &entry : windowData.patternMap) {
auto &pattern = entry.first;
auto &data = entry.second;
data.density = static_cast<float>(data.count * pattern.size()) / windowSize;
}
}


float Teloscope::getMean(const std::vector<float>& values) {
if (values.empty()) return 0.0;
float sum = std::accumulate(values.begin(), values.end(), 0.0);
Expand Down Expand Up @@ -344,6 +318,14 @@ SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope

// Prepare and analyze current window
WindowData windowData = prevOverlapData;

// Reserve space for vectors
windowData.winBlocks.reserve(windowSize / 13 + 1);
windowData.hDistances.reserve(windowSize / 6);
windowData.canonicalMatches.reserve(windowSize / 6);
windowData.nonCanonicalMatches.reserve(windowSize / 6);
windowData.windowMatches.reserve(windowSize / 6);

analyzeWindow(window, windowStart, windowData, nextOverlapData);

if (userInput.modeGC) { windowData.gcContent = getGCContent(windowData.nucleotideCounts, window.size()); }
Expand Down

0 comments on commit 8e94d76

Please sign in to comment.