Skip to content

Commit

Permalink
Nov 15, 2024: Bug in densities fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Nov 15, 2024
1 parent 97dc44c commit b1cdfbe
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 26 deletions.
1 change: 0 additions & 1 deletion include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ struct WindowData {
std::vector<uint32_t> windowMatches;
uint16_t canonicalCounts = 0;
uint16_t nonCanonicalCounts = 0;
uint16_t windowCounts = 0;
float canonicalDensity = 0.0f;
float nonCanonicalDensity = 0.0f;

Expand Down
41 changes: 41 additions & 0 deletions src/generate-tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,44 @@ int main(int, char **argv) {




// // JACK: New functions
// void cleanOldTests() {
// std::vector<std::string> oldTests = {"validateFiles/random1.fasta.1.tst", "validateFiles/random2.fasta.1.tst"};
// for (const auto& testFile : oldTests) {
// if (remove(testFile.c_str()) != 0) {
// std::cerr << "Error deleting " << testFile << std::endl;
// }
// }
// }
// void generateTest(const std::string& exePath, const std::string& inputFile, const std::string& args) {
// std::string tstFile = "validateFiles/" + inputFile + ".tst";
// std::ofstream out(tstFile);
// if (!out) {
// std::cerr << "Failed to create test file: " << tstFile << std::endl;
// return;
// }
// out << exePath << " testFiles/" << inputFile << " " << args << " > embedded\n";
// out.close();
// std::cout << "Generated test file: " << tstFile << std::endl;
// }
// int main(int argc, char** argv) {
// if (argc < 2) {
// std::cerr << "Usage: " << argv[0] << " <path_to_teloscope_executable>" << std::endl;
// return EXIT_FAILURE;
// }
// std::string exePath = argv[1];
// std::cout << "WARNING: Previous validate files will be deleted. Continue? (Y/N) ";
// std::string input;
// std::cin >> input;
// if (input != "Y" && input != "y") {
// std::cout << "Validate generation cancelled." << std::endl;
// return EXIT_FAILURE;
// }
// cleanOldTests();
// std::cout << "Generating new validate files..." << std::endl;
// // Generate tests based on specific command line arguments
// generateTest(exePath, "random1.fasta", "-w 3 -s 1");
// generateTest(exePath, "random2.fasta", "-p TTAGGG,CCCTAA -w 10 -s 5");
// return EXIT_SUCCESS;
// }
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <input.h> // check


std::string version = "0.0.1";
std::string version = "0.0.2b";

// global
std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
Expand Down
33 changes: 15 additions & 18 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,25 +204,29 @@ void Teloscope::analyzeWindow(const std::string &window, uint32_t windowStart, W
auto current = trie.getRoot();
uint32_t scanLimit = std::min(i + longestPatternSize, static_cast<uint32_t>(window.size()));

for (uint32_t j = i; j < scanLimit; ++j) { // Only scan positions in range of patterns
for (uint32_t j = i; j < scanLimit; ++j) { // Scan positions until longest pattern

if (!trie.hasChild(current, window[j])) break;
current = trie.getChild(current, window[j]); // window[j] is a character

if (current->isEndOfWord) {
std::string pattern = window.substr(i, j - i + 1);
bool isCanonical = (pattern == userInput.canonicalPatterns.first || pattern == userInput.canonicalPatterns.second); // Check canonical patterns
uint8_t patternSize = pattern.size();

// Update windowData from prevOverlapData
if (j >= overlapSize || overlapSize == 0 || windowStart == 0 ) {
// windowData.patternMap[pattern].count++;
isCanonical ? windowData.canonicalCounts++ : windowData.nonCanonicalCounts++;
windowData.windowCounts++;

// windowData.patternMap[pattern].patMatches.push_back(i);
isCanonical ? windowData.canonicalMatches.push_back(i) : windowData.nonCanonicalMatches.push_back(i);
windowData.windowMatches.push_back(i); // Ordered by design

if (j >= overlapSize || overlapSize == 0 || windowStart == 0) {
if (isCanonical) {
windowData.canonicalCounts++;
windowData.canonicalDensity += patternSize / window.size();
windowData.canonicalMatches.push_back(i);
} else {
windowData.nonCanonicalCounts++;
windowData.nonCanonicalDensity += patternSize / window.size();
windowData.nonCanonicalMatches.push_back(i);
}

windowData.windowMatches.push_back(i);
windowData.hDistances.push_back(userInput.hammingDistances[pattern]);
// windowData.winHDistance += userInput.hammingDistances[pattern];
}
Expand Down Expand Up @@ -280,9 +284,6 @@ SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope
if (userInput.modeGC) { windowData.gcContent = getGCContent(windowData.nucleotideCounts, window.size()); }
if (userInput.modeEntropy) { windowData.shannonEntropy = getShannonEntropy(windowData.nucleotideCounts, window.size()); }

windowData.canonicalDensity = static_cast<float>(windowData.canonicalCounts) / window.size();
windowData.nonCanonicalDensity = static_cast<float>(windowData.nonCanonicalCounts) / window.size();

// Update windowData
windowData.windowStart = windowStart + absPos;
windowData.currentWindowSize = currentWindowSize;
Expand Down Expand Up @@ -505,11 +506,7 @@ void Teloscope::printSummary() {

std::cout << "\n+++Summary Report+++\n";
std::cout << "Total windows analyzed:\t" << totalNWindows << "\n";
std::cout << "Total input patterns found:\n";
for (const auto& [pattern, count] : patternCounts) {
std::cout << "Pattern:\t" << pattern << "\t" << count << "\n";
}

// Print the total canonical and non-canonical matches per path - PENDING
// For each pattern, print the path header with the highest number of matches - PENDING
// For each pattern, print the path header with the lowest number of matches - PENDING
std::cout << "Max Shannon Entropy:\t" << getMax(entropyValues) << "\n";
Expand Down
22 changes: 16 additions & 6 deletions validateFiles/random1.fasta.1.tst
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
testFiles/random1.fasta -w 3 -s 1 -p TT
testFiles/random1.fasta -w 3 -s 1 -c TT -p TT -k
embedded
/// Teloscope v0.0.1
/// Teloscope v0.0.2b
Sliding windows with step size (1) and window size (3).
Tip: A step value close the window size results in faster runs.
Setting canonical pattern: TT and its reverse complement: AA
Adding pattern: TT and its reverse complement: AA
Hamming distances precomputed for all input patterns.
Waiting for jobs to complete
All jobs completed
Reporting window matches and metrics in BED/BEDgraphs...

+++Summary Report+++
Total windows analyzed: 40
Total input patterns found:
Pattern: AA 2
Pattern: TT 4
Total windows analyzed: 48
Max Shannon Entropy: 1.58496
Mean Shannon Entropy: 0.770604
Median Shannon Entropy: 0.918296
Min Shannon Entropy: 0
Max GC Content: 100
Mean GC Content: 26.3889
Median GC Content: 16.6667
Min GC Content: 0

0 comments on commit b1cdfbe

Please sign in to comment.