Skip to content

Commit

Permalink
Nov 22, 2024: Simplify functions
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Nov 22, 2024
1 parent c347b76 commit 970b97f
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 191 deletions.
5 changes: 3 additions & 2 deletions include/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ struct UserInputTeloscope : UserInput {
uint32_t windowSize = 1000;
uint8_t kmerLen = 21;
uint32_t step = 500;
unsigned short int minBlockLen = 50;
unsigned short int maxBlockDist = 50;
unsigned short int minBlockLen = 100; // Not used anymore
unsigned short int maxBlockDist = 200;
unsigned short int minBlockCounts = 2;

bool keepWindowData = false; // Memory intensive
bool modeMatch = true, modeEntropy = true, modeGC = true; // Change to: de novo, user-defined
Expand Down
17 changes: 6 additions & 11 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class Trie {
struct TelomereBlock {
uint64_t start;
uint16_t blockLen; // End = start + blockLen
uint32_t blockDistance;
uint16_t blockCounts;
};

struct WindowData {
Expand All @@ -60,11 +62,6 @@ struct WindowData {
// uint32_t winHDistance = 0;

std::vector<uint8_t> hDistances;
// std::vector<TelomereBlock> winBlocks;

// std::vector<uint32_t> canonicalMatches;
// std::vector<uint32_t> nonCanonicalMatches;
// std::vector<uint32_t> windowMatches;
uint16_t canonicalCounts = 0;
uint16_t nonCanonicalCounts = 0;
float canonicalDensity = 0.0f;
Expand All @@ -88,6 +85,8 @@ struct PathData {
std::string header;
std::vector<WindowData> windows; // Empty unless specified by user
std::unordered_map<std::string, std::vector<TelomereBlock>> mergedBlocks;
std::vector<uint32_t> canonicalMatches;
std::vector<uint32_t> nonCanonicalMatches;
};


Expand Down Expand Up @@ -134,7 +133,6 @@ class Teloscope {

bool walkPath(InPath* path, std::vector<InSegment*> &inSegments, std::vector<InGap> &inGaps);

// void analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData, WindowData& nextOverlapData);
void analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData, WindowData& nextOverlapData, SegmentData& segmentData);

SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint64_t absPos);
Expand All @@ -143,14 +141,11 @@ class Teloscope {

void sortBySeqPos();

// std::vector<TelomereBlock> getTelomereBlocks(const std::vector<uint32_t>& inputMatches, uint64_t windowStart);
std::vector<TelomereBlock> getTelomereBlocks(const std::vector<uint32_t>& inputMatches, uint64_t windowStart, uint32_t currentWindowSize);

std::vector<TelomereBlock> mergeTelomereBlocks(const std::vector<TelomereBlock>& winBlocks);
std::vector<TelomereBlock> getTelomereBlocks(const std::vector<uint32_t>& inputMatches, uint16_t mergeDist);

void writeBEDFile(std::ofstream& windowMetricsFile, std::ofstream& windowRepeatsFile,
std::ofstream& canonicalMatchFile, std::ofstream& noncanonicalMatchFile,
std::ofstream& allBlocksFile, std::ofstream& canonicalBlocksFile, std::ofstream& noncanonicalBlocksFile);
std::ofstream& allBlocksFile, std::ofstream& canonicalBlocksFile);

void handleBEDFile();

Expand Down
35 changes: 28 additions & 7 deletions src/input.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <iostream>
#include <stdint.h>
#include <stdlib.h>
#include <string>
#include <stdexcept> // jack: std::runtime_error
Expand Down Expand Up @@ -64,16 +65,18 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
uint64_t absPos = 0;
unsigned int cUId = 0, gapLen = 0, seqPos = path->getSeqPos();
std::vector<PathComponent> pathComponents = path->getComponents();
// uint64_t pathLen = path->getLen(); // PENDING
uint64_t pathSize = path->getLen();

threadLog.add("\n\tWalking path:\t" + path->getHeader());
std::string header = path->getHeader();
eraseChar(header, '\r');
uint32_t numSegments = (pathComponents.size() + 1) / 2;

// Initialize PathData for this path
PathData pathData;
pathData.seqPos = seqPos;
pathData.header = header;
pathData.windows.reserve((pathSize - (userInput.windowSize + 2 * userInput.step) * (numSegments)) / userInput.step);

for (std::vector<PathComponent>::iterator component = pathComponents.begin(); component != pathComponents.end(); component++) {

Expand All @@ -88,17 +91,35 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
if (component->orientation == '+') {
SegmentData segmentData = analyzeSegment(sequence, userInput, absPos);

if (userInput.keepWindowData) {
pathData.windows.insert(pathData.windows.end(), segmentData.windows.begin(), segmentData.windows.end());
}
// Collect window data
pathData.windows.insert(
pathData.windows.end(),
std::make_move_iterator(segmentData.windows.begin()),
std::make_move_iterator(segmentData.windows.end())
);

for (const auto& [groupName, blocks] : segmentData.mergedBlocks) {
// Collect blocks
for (auto& [groupName, blocks] : segmentData.mergedBlocks) {
pathData.mergedBlocks[groupName].insert(
pathData.mergedBlocks[groupName].end(),
blocks.begin(),
blocks.end()
std::make_move_iterator(blocks.begin()),
std::make_move_iterator(blocks.end())
);
}

// Collect matches
pathData.canonicalMatches.insert(
pathData.canonicalMatches.end(),
std::make_move_iterator(segmentData.canonicalMatches.begin()),
std::make_move_iterator(segmentData.canonicalMatches.end())
);

pathData.nonCanonicalMatches.insert(
pathData.nonCanonicalMatches.end(),
std::make_move_iterator(segmentData.nonCanonicalMatches.begin()),
std::make_move_iterator(segmentData.nonCanonicalMatches.end())
);

} else {
}

Expand Down
Loading

0 comments on commit 970b97f

Please sign in to comment.