Skip to content

Commit

Permalink
DFS subgraph working prototype with test files
Browse files Browse the repository at this point in the history
  • Loading branch information
gf777 committed Jun 17, 2024
1 parent 671bf08 commit ae106fe
Show file tree
Hide file tree
Showing 137 changed files with 131 additions and 6 deletions.
2 changes: 1 addition & 1 deletion include/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class InSequencesDBG : public InSequences {

struct UserInputKreeq : UserInput {

uint32_t covCutOff = 0;
uint32_t covCutOff = 0, kmerDepth = 11; // kmer search is in both directions
uint8_t depth = 3, backtrackingSpan = 5;
uint64_t maxMem = 0;

Expand Down
4 changes: 4 additions & 0 deletions include/kreeq.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //

void subgraph();

void DFS();

ParallelMap32 DFSpass(ParallelMap32* candidates, std::array<uint16_t, 2> mapRange);

void mergeSubgraphs();

void DBGgraphToGFA();
Expand Down
2 changes: 1 addition & 1 deletion src/generate-tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ int main(void) {
}
// test subgraph
file_args = {
{{"-d testFiles/test1.kreeq -f testFiles/random1.fasta"}, {""}}
{{"-d testFiles/test1.kreeq -f testFiles/random1.fasta --search-depth 0"}, {""}}
// {{set of test inputs}, {list of command line args to run with}}
};
for(const auto &pair : file_args) {
Expand Down
102 changes: 101 additions & 1 deletion src/kreeq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,8 +906,8 @@ void DBG::subgraph() {
deleteMapRange(mapRange);

}

mergeSubgraphs();
DFS();
DBGgraphToGFA();

}
Expand Down Expand Up @@ -981,6 +981,106 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m
return true;
}

void DBG::DFS() {

ParallelMap32 candidates, newCandidates;
ParallelMap32* subgraph = DBGsubgraph;

std::array<uint16_t, 2> mapRange = {0,0};
for (uint8_t i = 0; i < userInput.kmerDepth; ++i) {

mapRange = {0,0};

while (mapRange[1] < mapCount) {

mapRange = computeMapRange(mapRange);
loadMapRange(mapRange);
newCandidates = DFSpass(subgraph, mapRange);
deleteMapRange(mapRange);
candidates.insert(newCandidates.begin(), newCandidates.end());
subgraph = &newCandidates;
}
}
DBGsubgraph->insert(candidates.begin(), candidates.end());
}

ParallelMap32 DBG::DFSpass(ParallelMap32* subgraph, std::array<uint16_t, 2> mapRange) {

ParallelMap32 newCandidates;

for (auto pair : *subgraph) {

for (uint8_t i = 0; i<4; ++i) { // forward edges
if (pair.second.fw[i] != 0) {

uint8_t nextKmer[k];
std::string firstKmer = reverseHash(pair.first);
firstKmer.push_back(itoc[i]);
for (uint8_t e = 0; e<k; ++e)
nextKmer[e] = ctoi[(unsigned char)firstKmer[e+1]];

ParallelMap *map;
ParallelMap32 *map32;
bool isFw = false;
uint64_t key = hash(nextKmer, &isFw);
uint64_t m = key % mapCount;

if (m >= mapRange[0] && m < mapRange[1]) {

map = maps[m];
auto got = map->find(key);

if (got != map->end()) {

if (got->second.cov != 255) {
newCandidates.insert(*got);
}else{
map32 = maps32[m];
auto got = map32->find(key);
newCandidates.insert(*got);
}
}
}
}
}
for (uint8_t i = 0; i<4; ++i) { // reverse edges
if (pair.second.bw[i] != 0) {

uint8_t nextKmer[k];
std::string firstKmer;
firstKmer.push_back(itoc[i]);
firstKmer.append(reverseHash(pair.first));

for (uint8_t e = 0; e<k; ++e)
nextKmer[e] = ctoi[(unsigned char)firstKmer[e]];

ParallelMap *map;
ParallelMap32 *map32;
bool isFw = false;
uint64_t key = hash(nextKmer, &isFw);
uint64_t m = key % mapCount;

if (m >= mapRange[0] && m < mapRange[1]) {

map = maps[m];
auto got = map->find(key);

if (got != map->end()) {
if (got->second.cov != 255) {
newCandidates.insert(*got);
}else{
map32 = maps32[m];
auto got = map32->find(key);
newCandidates.insert(*got);
}
}
}
}
}
}
return newCandidates;
}

void DBG::mergeSubgraphs() {

for (ParallelMap32 *map1 : DBGTmpSubgraphs) {
Expand Down
15 changes: 12 additions & 3 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ int main(int argc, char **argv) {
printf("\t-m --max-memory use at most this amount of memory (in Gb, default: 0.9 of max).\n");
printf("\t-j --threads <n> numbers of threads (default: max).\n");
printf("\t-v --version software version.\n");
printf("\t--search-depth the max depth for graph traversal (default: 3).\n");
printf("\t--cmd print $0 to stdout.\n");
exit(0);
}
Expand Down Expand Up @@ -290,7 +291,9 @@ int main(int argc, char **argv) {
static struct option long_options[] = { // struct mapping long options
{"database", required_argument, 0, 'd'},
{"input-sequence", required_argument, 0, 'f'},
{"search-depth", required_argument, 0, 0},
{"out-format", required_argument, 0, 'o'},
{"input-positions", required_argument, 0, 'p'},

{"threads", required_argument, 0, 'j'},
{"verbose", no_argument, &verbose_flag, 1},
Expand All @@ -302,7 +305,7 @@ int main(int argc, char **argv) {
while (true) { // loop through argv

int option_index = 1;
c = getopt_long(argc, argv, "-:d:f:j:o:h",
c = getopt_long(argc, argv, "-:d:f:j:o:p:h",
long_options, &option_index);

if (c == -1) // exit the loop if run out of options
Expand All @@ -321,8 +324,8 @@ int main(int argc, char **argv) {
break;
default: // handle positional arguments
case 0: // case for long options without short options
// if (strcmp(long_options[option_index].name,"line-length") == 0)
// splitLength = atoi(optarg);
if(strcmp(long_options[option_index].name,"search-depth") == 0)
userInput.kmerDepth = atoi(optarg);
break;
case 'd': // input sequence

Expand Down Expand Up @@ -350,13 +353,19 @@ int main(int argc, char **argv) {
case 'o': // handle output (file or stdout)
userInput.outFile = optarg;
break;
case 'p': // input coordinates
ifFileExists(optarg);
userInput.inBedInclude = optarg;
break;
case 'h': // help
printf("kreeq subgraph [options]\n");
printf("\nOptions:\n");
printf("\t-d --database DBG database.\n");
printf("\t-f --input-sequence sequence input file (fasta).\n");
printf("\t--search-depth the max depth for graph traversal (default: 3).\n");
printf("\t-j --threads <n> numbers of threads (default: max).\n");
printf("\t-o --out-format generates various kinds of outputs (currently supported: .gfa1/2).\n");
printf("\t-p --input-positions BED coordinates of positions to extract kmers from.\n");
printf("\t--cmd print $0 to stdout.\n");
exit(0);
}
Expand Down
2 changes: 2 additions & 0 deletions testFiles/random5.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>sequence1 - no errors
CATACTACGATCAGATCGACTGACTCGTACATGCAGTGATGTACGAATGCATGCATCGATCGATCGAAATGATAGCATGACTCAGACTGATCAGATCGA
8 changes: 8 additions & 0 deletions testFiles/random5.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@sequence1 - no errors
CATACTACGATCAGATCGACTGACTCGTACATGCAGTGATGTACGAATGCATGCATCGATCGATCGAAATGATAGCATGACTCAGACTGATCAGATCGA
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@sequence2 - two point errors apart (25:T>A, 65:C>T)
CATACTACGATCAGATCGACTGACaCGTACATGCAGTGATGTACGAATGCATGCATCGATCGATtGAAATGATAGCATGACTCAGACTGATCAGATCGA
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 changes: 2 additions & 0 deletions testFiles/random5.kreeq/.index
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
21
128
Binary file added testFiles/random5.kreeq/.map.0.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.1.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.10.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.100.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.101.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.102.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.103.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.104.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.105.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.106.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.107.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.108.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.109.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.11.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.110.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.111.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.112.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.113.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.114.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.115.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.116.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.117.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.118.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.119.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.12.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.120.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.121.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.122.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.123.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.124.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.125.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.126.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.127.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.13.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.14.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.15.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.16.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.17.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.18.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.19.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.2.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.20.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.21.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.22.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.23.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.24.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.25.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.26.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.27.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.28.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.29.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.3.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.30.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.31.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.32.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.33.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.34.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.35.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.36.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.37.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.38.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.39.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.4.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.40.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.41.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.42.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.43.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.44.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.45.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.46.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.47.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.48.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.49.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.5.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.50.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.51.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.52.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.53.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.54.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.55.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.56.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.57.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.58.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.59.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.6.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.60.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.61.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.62.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.63.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.64.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.65.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.66.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.67.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.68.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.69.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.7.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.70.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.71.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.72.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.73.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.74.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.75.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.76.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.77.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.78.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.79.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.8.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.80.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.81.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.82.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.83.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.84.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.85.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.86.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.87.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.88.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.89.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.9.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.90.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.91.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.92.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.93.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.94.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.95.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.96.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.97.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.98.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.99.bin
Binary file not shown.
Binary file added testFiles/random5.kreeq/.map.hc.bin
Binary file not shown.

0 comments on commit ae106fe

Please sign in to comment.