From 671bf0849ba9e385b0882faf236eea1cb748237d Mon Sep 17 00:00:00 2001
From: gf777 <giulio.formenti@gmail.com>
Date: Sun, 16 Jun 2024 00:30:36 -0400
Subject: [PATCH] updated gfalibs

---
 gfalibs               |  2 +-
 include/input.h       |  3 ++-
 include/kreeq.h       | 18 +++++++++---------
 src/graph-builder.cpp | 34 +++++++++++++++++-----------------
 src/kreeq.cpp         | 22 +++++++++++-----------
 src/main.cpp          |  8 ++++----
 6 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/gfalibs b/gfalibs
index 77cb846..cd3a92d 160000
--- a/gfalibs
+++ b/gfalibs
@@ -1 +1 @@
-Subproject commit 77cb84617a7e7f07c46b5ff9e97599b32559315f
+Subproject commit cd3a92dd0334faafc7cb5017f527315751f29c56
diff --git a/include/input.h b/include/input.h
index 8002321..2226f16 100644
--- a/include/input.h
+++ b/include/input.h
@@ -24,7 +24,8 @@ class InSequencesDBG : public InSequences {
 
 struct UserInputKreeq : UserInput {
 
-    uint8_t covCutOff = 0, depth = 3, backtrackingSpan = 5;
+    uint32_t covCutOff = 0;
+    uint8_t depth = 3, backtrackingSpan = 5;
     uint64_t maxMem = 0;
 
 };
diff --git a/include/kreeq.h b/include/kreeq.h
index 8f0e603..b19cad6 100644
--- a/include/kreeq.h
+++ b/include/kreeq.h
@@ -35,21 +35,21 @@ struct DBGkmer32 {
     
 };
 
-using parallelMap = phmap::parallel_flat_hash_map<uint64_t, DBGkmer,
+using ParallelMap = phmap::parallel_flat_hash_map<uint64_t, DBGkmer,
                                           std::hash<uint64_t>,
                                           std::equal_to<uint64_t>,
                                           std::allocator<std::pair<const uint64_t, DBGkmer>>,
                                           8,
                                           phmap::NullMutex>;
 
-using parallelMap32 = phmap::parallel_flat_hash_map<uint64_t, DBGkmer32,
-                                          std::hash<uint64_t>,
+using ParallelMap32 = phmap::parallel_flat_hash_map<uint64_t, DBGkmer32,
+std::hash<uint64_t>,
                                           std::equal_to<uint64_t>,
                                           std::allocator<std::pair<const uint64_t, DBGkmer32>>,
                                           8,
                                           phmap::NullMutex>;
 
-class DBG : public Kmap<DBG, UserInputKreeq, DBGkmer, DBGkmer32> { // CRTP
+class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { // CRTP
     
     std::atomic<uint64_t> totMissingKmers{0}, totKcount{0}, totEdgeMissingKmers{0};
     UserInputKreeq userInput;
@@ -57,8 +57,8 @@ class DBG : public Kmap<DBG, UserInputKreeq, DBGkmer, DBGkmer32> { // CRTP
     InSequencesDBG *genome;
     
     // subgraph objects
-    parallelMap32 *DBGsubgraph = new parallelMap32;
-    std::vector<parallelMap32*> DBGTmpSubgraphs;
+    ParallelMap32 *DBGsubgraph = new ParallelMap32;
+    std::vector<ParallelMap32*> DBGTmpSubgraphs;
     InSequences GFAsubgraph;
 
     uint64_t totEdgeCount = 0;
@@ -93,11 +93,11 @@ class DBG : public Kmap<DBG, UserInputKreeq, DBGkmer, DBGkmer32> { // CRTP
     
     bool deleteMap(uint16_t m);
     
-    bool mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex, uint16_t m);
+    bool mergeSubMaps(ParallelMap* map1, ParallelMap* map2, uint8_t subMapIndex, uint16_t m);
     
-    bool mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex);
+    bool mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex);
     
-    bool unionSum(parallelMap32* map1, parallelMap32* map2);
+    bool unionSum(ParallelMap32* map1, ParallelMap32* map2);
     
     void kunion();
     
diff --git a/src/graph-builder.cpp b/src/graph-builder.cpp
index d3ce135..12a80af 100644
--- a/src/graph-builder.cpp
+++ b/src/graph-builder.cpp
@@ -144,8 +144,8 @@ bool DBG::processBuffers(uint16_t m) {
             });
         }
         
-        parallelMap& map = *maps[m]; // the map associated to this buffer
-        parallelMap32& map32 = *maps32[m];
+        ParallelMap& map = *maps[m]; // the map associated to this buffer
+        ParallelMap32& map32 = *maps32[m];
         uint64_t map_size = mapSize(map);
         
         bufFile.read(reinterpret_cast<char *>(&pos), sizeof(uint64_t));
@@ -224,8 +224,8 @@ bool DBG::processBuffers(uint16_t m) {
 
 bool DBG::reloadMap32(uint16_t m) {
     
-    parallelMap& map = *maps[m]; // the map associated to this buffer
-    parallelMap32& map32 = *maps32[m];
+    ParallelMap& map = *maps[m]; // the map associated to this buffer
+    ParallelMap32& map32 = *maps32[m];
     
     for (auto pair : map32) {
         
@@ -267,14 +267,14 @@ bool DBG::summary(uint16_t m) {
     }
  
     std::lock_guard<std::mutex> lck(mtx);
-    totKmersUnique += kmersUnique;
-    totKmersDistinct += kmersDistinct;
+    totUnique += kmersUnique;
+    totDistinct += kmersDistinct;
     totEdgeCount += edgeCount;
     
     for (auto pair : hist) {
         
         finalHistogram[pair.first] += pair.second;
-        totKmers += pair.first * pair.second;
+        tot += pair.first * pair.second;
     }
     
     return true;
@@ -283,12 +283,12 @@ bool DBG::summary(uint16_t m) {
 
 void DBG::DBstats() {
     
-    uint64_t missing = pow(4,k)-totKmersDistinct;
+    uint64_t missing = pow(4,k)-totDistinct;
     
     std::cout<<"DBG Summary statistics:\n"
-             <<"Total kmers: "<<totKmers<<"\n"
-             <<"Unique kmers: "<<totKmersUnique<<"\n"
-             <<"Distinct kmers: "<<totKmersDistinct<<"\n"
+             <<"Total kmers: "<<tot<<"\n"
+             <<"Unique kmers: "<<totUnique<<"\n"
+             <<"Distinct kmers: "<<totDistinct<<"\n"
              <<"Missing kmers: "<<missing<<"\n"
              <<"Total edges: "<<totEdgeCount<<"\n";
     
@@ -296,14 +296,14 @@ void DBG::DBstats() {
 
 void DBG::kunion(){ // concurrent merging of the maps that store the same hashes
     
-    parallelMap32 map32Total; // first merge high-copy kmers
+    ParallelMap32 map32Total; // first merge high-copy kmers
     
     for (unsigned int i = 0; i < userInput.kmerDB.size(); ++i) { // for each kmerdb loads the map and merges it
         
         std::string prefix = userInput.kmerDB[i]; // loads the next map
         prefix.append("/.map.hc.bin");
         
-        parallelMap32 nextMap;
+        ParallelMap32 nextMap;
         phmap::BinaryInputArchive ar_in(prefix.c_str());
         nextMap.phmap_load(ar_in);
         
@@ -350,13 +350,13 @@ void DBG::kunion(){ // concurrent merging of the maps that store the same hashes
     
 }
 
-bool DBG::mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex, uint16_t m) {
+bool DBG::mergeSubMaps(ParallelMap* map1, ParallelMap* map2, uint8_t subMapIndex, uint16_t m) {
     
     auto& inner = map1->get_inner(subMapIndex);   // to retrieve the submap at given index
     auto& submap1 = inner.set_;        // can be a set or a map, depending on the type of map1
     auto& inner2 = map2->get_inner(subMapIndex);
     auto& submap2 = inner2.set_;
-    parallelMap32& map32 = *maps32[m];
+    ParallelMap32& map32 = *maps32[m];
     
     for (auto pair : submap1) { // for each element in map1, find it in map2 and increase its value
         
@@ -433,7 +433,7 @@ bool DBG::mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex
 
 // subgraph functions
 
-bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex) {
+bool DBG::mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex) {
     
     auto& inner = map1->get_inner(subMapIndex);   // to retrieve the submap at given index
     auto& submap1 = inner.set_;        // can be a set or a map, depending on the type of map1
@@ -476,7 +476,7 @@ bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapI
 }
 
 
-bool DBG::unionSum(parallelMap32* map1, parallelMap32* map2) {
+bool DBG::unionSum(ParallelMap32* map1, ParallelMap32* map2) {
     
     std::vector<std::function<bool()>> jobs;
     
diff --git a/src/kreeq.cpp b/src/kreeq.cpp
index 2afe71b..bcebc44 100644
--- a/src/kreeq.cpp
+++ b/src/kreeq.cpp
@@ -133,8 +133,8 @@ bool DBG::evaluateSegment(uint32_t s, std::array<uint16_t, 2> mapRange) {
     
     uint64_t key, i;
     
-    parallelMap *map;
-    parallelMap32 *map32;
+    ParallelMap *map;
+    ParallelMap32 *map32;
     
     // kreeq QV
     bool isFw = false;
@@ -289,7 +289,7 @@ void DBG::correctSequences() {
 
 bool DBG::searchGraph(std::array<uint16_t, 2> mapRange) { // stub
     
-    parallelMap* genomeDBG = new parallelMap;
+    ParallelMap* genomeDBG = new ParallelMap;
     
     std::vector<InSegment*> *inSegments = genome->getInSegments();
     
@@ -310,7 +310,7 @@ bool DBG::searchGraph(std::array<uint16_t, 2> mapRange) { // stub
         
         uint64_t key, i;
         bool isFw = false;
-        parallelMap *map;
+        ParallelMap *map;
         
         for(uint64_t c = 0; c<kcount; ++c){
             
@@ -341,7 +341,7 @@ std::pair<DBGkmer*,bool> DBG::findDBGkmer(uint8_t *origin) {
     bool isFw = false;
     key = hash(origin, &isFw);
     i = key % mapCount;
-    parallelMap *map = maps[i];
+    ParallelMap *map = maps[i];
     auto got = map->find(key);
     if (got != map->end())
         return std::make_pair(&(got->second), isFw);
@@ -560,7 +560,7 @@ bool DBG::detectAnomalies(InSegment *inSegment, std::vector<uint64_t> &anomalies
     threadLog.setId(inSegment->getuId());
         
     std::string sHeader = inSegment->getSeqHeader();
-    parallelMap *map;
+    ParallelMap *map;
     uint64_t key, i;
     bool isFw = false, anomaly = false;
         
@@ -612,7 +612,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
     threadLog.setId(inSegment->getuId());
         
     std::string sHeader = inSegment->getSeqHeader();
-    parallelMap *map;
+    ParallelMap *map;
     uint64_t key, i;
     bool isFw = false;
     std::vector<std::deque<DBGpath>> variants;
@@ -918,9 +918,9 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m
     threadLog.setId(inSegment->getuId());
         
     std::string sHeader = inSegment->getSeqHeader();
-    parallelMap *map;
-    parallelMap32 *map32;
-    parallelMap32 *segmentSubmap = new parallelMap32;
+    ParallelMap *map;
+    ParallelMap32 *map32;
+    ParallelMap32 *segmentSubmap = new ParallelMap32;
     uint64_t key, i;
     bool isFw = false;
     std::vector<uint64_t> segmentCoordinates;
@@ -983,7 +983,7 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m
 
 void DBG::mergeSubgraphs() {
     
-    for (parallelMap32 *map1 : DBGTmpSubgraphs) {
+    for (ParallelMap32 *map1 : DBGTmpSubgraphs) {
         unionSum(map1, DBGsubgraph);
         delete map1;
     }
diff --git a/src/main.cpp b/src/main.cpp
index eea656c..1fd7015 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -80,8 +80,8 @@ int main(int argc, char **argv) {
                 {"input-reads", required_argument, 0, 'r'},
                 {"tmp-prefix", required_argument, 0, 't'},
                 {"max-memory", required_argument, 0, 'm'},
-                {"threads", required_argument, 0, 'j'},
                 
+                {"threads", required_argument, 0, 'j'},
                 {"verbose", no_argument, &verbose_flag, 1},
                 {"cmd", no_argument, &cmd_flag, 1},
                 {"version", no_argument, 0, 'v'},
@@ -173,9 +173,8 @@ int main(int argc, char **argv) {
                         break;
                     case 't': // prefix for temporary files
                         userInput.prefix = optarg;
-                        break;
-                        
-                    case 'm': // prefix for temporary files
+                        break;                        
+                    case 'm': // max memory
                         userInput.maxMem = atof(optarg);
                         break;
                     case 'v': // software version
@@ -185,6 +184,7 @@ int main(int argc, char **argv) {
                     case 'h': // help
                         printf("kreeq [command]\n");
                         printf("\nOptions:\n");
+                        printf("\t-c --coverage-cutoff coverage cutoff.\n");
                         printf("\t-d --database kreeq database to load.\n");
                         printf("\t-f --input-sequence sequence input file (fasta,gfa1/2).\n");
                         printf("\t-r --input-reads read input files (fastq).\n");