diff --git a/gfalibs b/gfalibs index 77cb846..cd3a92d 160000 --- a/gfalibs +++ b/gfalibs @@ -1 +1 @@ -Subproject commit 77cb84617a7e7f07c46b5ff9e97599b32559315f +Subproject commit cd3a92dd0334faafc7cb5017f527315751f29c56 diff --git a/include/input.h b/include/input.h index 8002321..2226f16 100644 --- a/include/input.h +++ b/include/input.h @@ -24,7 +24,8 @@ class InSequencesDBG : public InSequences { struct UserInputKreeq : UserInput { - uint8_t covCutOff = 0, depth = 3, backtrackingSpan = 5; + uint32_t covCutOff = 0; + uint8_t depth = 3, backtrackingSpan = 5; uint64_t maxMem = 0; }; diff --git a/include/kreeq.h b/include/kreeq.h index 8f0e603..b19cad6 100644 --- a/include/kreeq.h +++ b/include/kreeq.h @@ -35,21 +35,21 @@ struct DBGkmer32 { }; -using parallelMap = phmap::parallel_flat_hash_map, std::equal_to, std::allocator>, 8, phmap::NullMutex>; -using parallelMap32 = phmap::parallel_flat_hash_map, +using ParallelMap32 = phmap::parallel_flat_hash_map, std::equal_to, std::allocator>, 8, phmap::NullMutex>; -class DBG : public Kmap { // CRTP +class DBG : public Kmap { // CRTP std::atomic totMissingKmers{0}, totKcount{0}, totEdgeMissingKmers{0}; UserInputKreeq userInput; @@ -57,8 +57,8 @@ class DBG : public Kmap { // CRTP InSequencesDBG *genome; // subgraph objects - parallelMap32 *DBGsubgraph = new parallelMap32; - std::vector DBGTmpSubgraphs; + ParallelMap32 *DBGsubgraph = new ParallelMap32; + std::vector DBGTmpSubgraphs; InSequences GFAsubgraph; uint64_t totEdgeCount = 0; @@ -93,11 +93,11 @@ class DBG : public Kmap { // CRTP bool deleteMap(uint16_t m); - bool mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex, uint16_t m); + bool mergeSubMaps(ParallelMap* map1, ParallelMap* map2, uint8_t subMapIndex, uint16_t m); - bool mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex); + bool mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex); - bool unionSum(parallelMap32* map1, parallelMap32* map2); + bool unionSum(ParallelMap32* map1, ParallelMap32* map2); void kunion(); diff --git a/src/graph-builder.cpp b/src/graph-builder.cpp index d3ce135..12a80af 100644 --- a/src/graph-builder.cpp +++ b/src/graph-builder.cpp @@ -144,8 +144,8 @@ bool DBG::processBuffers(uint16_t m) { }); } - parallelMap& map = *maps[m]; // the map associated to this buffer - parallelMap32& map32 = *maps32[m]; + ParallelMap& map = *maps[m]; // the map associated to this buffer + ParallelMap32& map32 = *maps32[m]; uint64_t map_size = mapSize(map); bufFile.read(reinterpret_cast(&pos), sizeof(uint64_t)); @@ -224,8 +224,8 @@ bool DBG::processBuffers(uint16_t m) { bool DBG::reloadMap32(uint16_t m) { - parallelMap& map = *maps[m]; // the map associated to this buffer - parallelMap32& map32 = *maps32[m]; + ParallelMap& map = *maps[m]; // the map associated to this buffer + ParallelMap32& map32 = *maps32[m]; for (auto pair : map32) { @@ -267,14 +267,14 @@ bool DBG::summary(uint16_t m) { } std::lock_guard lck(mtx); - totKmersUnique += kmersUnique; - totKmersDistinct += kmersDistinct; + totUnique += kmersUnique; + totDistinct += kmersDistinct; totEdgeCount += edgeCount; for (auto pair : hist) { finalHistogram[pair.first] += pair.second; - totKmers += pair.first * pair.second; + tot += pair.first * pair.second; } return true; @@ -283,12 +283,12 @@ bool DBG::summary(uint16_t m) { void DBG::DBstats() { - uint64_t missing = pow(4,k)-totKmersDistinct; + uint64_t missing = pow(4,k)-totDistinct; std::cout<<"DBG Summary statistics:\n" - <<"Total kmers: "<get_inner(subMapIndex); // to retrieve the submap at given index auto& submap1 = inner.set_; // can be a set or a map, depending on the type of map1 auto& inner2 = map2->get_inner(subMapIndex); auto& submap2 = inner2.set_; - parallelMap32& map32 = *maps32[m]; + ParallelMap32& map32 = *maps32[m]; for (auto pair : submap1) { // for each element in map1, find it in map2 and increase its value @@ -433,7 +433,7 @@ bool DBG::mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex // subgraph functions -bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex) { +bool DBG::mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex) { auto& inner = map1->get_inner(subMapIndex); // to retrieve the submap at given index auto& submap1 = inner.set_; // can be a set or a map, depending on the type of map1 @@ -476,7 +476,7 @@ bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapI } -bool DBG::unionSum(parallelMap32* map1, parallelMap32* map2) { +bool DBG::unionSum(ParallelMap32* map1, ParallelMap32* map2) { std::vector> jobs; diff --git a/src/kreeq.cpp b/src/kreeq.cpp index 2afe71b..bcebc44 100644 --- a/src/kreeq.cpp +++ b/src/kreeq.cpp @@ -133,8 +133,8 @@ bool DBG::evaluateSegment(uint32_t s, std::array mapRange) { uint64_t key, i; - parallelMap *map; - parallelMap32 *map32; + ParallelMap *map; + ParallelMap32 *map32; // kreeq QV bool isFw = false; @@ -289,7 +289,7 @@ void DBG::correctSequences() { bool DBG::searchGraph(std::array mapRange) { // stub - parallelMap* genomeDBG = new parallelMap; + ParallelMap* genomeDBG = new ParallelMap; std::vector *inSegments = genome->getInSegments(); @@ -310,7 +310,7 @@ bool DBG::searchGraph(std::array mapRange) { // stub uint64_t key, i; bool isFw = false; - parallelMap *map; + ParallelMap *map; for(uint64_t c = 0; c DBG::findDBGkmer(uint8_t *origin) { bool isFw = false; key = hash(origin, &isFw); i = key % mapCount; - parallelMap *map = maps[i]; + ParallelMap *map = maps[i]; auto got = map->find(key); if (got != map->end()) return std::make_pair(&(got->second), isFw); @@ -560,7 +560,7 @@ bool DBG::detectAnomalies(InSegment *inSegment, std::vector &anomalies threadLog.setId(inSegment->getuId()); std::string sHeader = inSegment->getSeqHeader(); - parallelMap *map; + ParallelMap *map; uint64_t key, i; bool isFw = false, anomaly = false; @@ -612,7 +612,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) { threadLog.setId(inSegment->getuId()); std::string sHeader = inSegment->getSeqHeader(); - parallelMap *map; + ParallelMap *map; uint64_t key, i; bool isFw = false; std::vector> variants; @@ -918,9 +918,9 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array m threadLog.setId(inSegment->getuId()); std::string sHeader = inSegment->getSeqHeader(); - parallelMap *map; - parallelMap32 *map32; - parallelMap32 *segmentSubmap = new parallelMap32; + ParallelMap *map; + ParallelMap32 *map32; + ParallelMap32 *segmentSubmap = new ParallelMap32; uint64_t key, i; bool isFw = false; std::vector segmentCoordinates; @@ -983,7 +983,7 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array m void DBG::mergeSubgraphs() { - for (parallelMap32 *map1 : DBGTmpSubgraphs) { + for (ParallelMap32 *map1 : DBGTmpSubgraphs) { unionSum(map1, DBGsubgraph); delete map1; } diff --git a/src/main.cpp b/src/main.cpp index eea656c..1fd7015 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -80,8 +80,8 @@ int main(int argc, char **argv) { {"input-reads", required_argument, 0, 'r'}, {"tmp-prefix", required_argument, 0, 't'}, {"max-memory", required_argument, 0, 'm'}, - {"threads", required_argument, 0, 'j'}, + {"threads", required_argument, 0, 'j'}, {"verbose", no_argument, &verbose_flag, 1}, {"cmd", no_argument, &cmd_flag, 1}, {"version", no_argument, 0, 'v'}, @@ -173,9 +173,8 @@ int main(int argc, char **argv) { break; case 't': // prefix for temporary files userInput.prefix = optarg; - break; - - case 'm': // prefix for temporary files + break; + case 'm': // max memory userInput.maxMem = atof(optarg); break; case 'v': // software version @@ -185,6 +184,7 @@ int main(int argc, char **argv) { case 'h': // help printf("kreeq [command]\n"); printf("\nOptions:\n"); + printf("\t-c --coverage-cutoff coverage cutoff.\n"); printf("\t-d --database kreeq database to load.\n"); printf("\t-f --input-sequence sequence input file (fasta,gfa1/2).\n"); printf("\t-r --input-reads read input files (fastq).\n");