From ac94f9eb892a9836e9d3a2cb593732f22fca5234 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Wed, 21 Oct 2020 17:43:03 +1000
Subject: [PATCH 1/9] exclude nonessential GATB files and limit kszie

---
 ext/gatb.cmake | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/ext/gatb.cmake b/ext/gatb.cmake
index c587b9d9..f4a60c3f 100644
--- a/ext/gatb.cmake
+++ b/ext/gatb.cmake
@@ -1,30 +1,15 @@
 include(ExternalProject)
 
-execute_process(COMMAND mkdir -p ${CMAKE_BINARY_DIR}/ext)
-execute_process(COMMAND mkdir -p ${CMAKE_BINARY_DIR}/bin)
-execute_process(COMMAND mkdir -p ${CMAKE_BINARY_DIR}/lib)
-execute_process(COMMAND mkdir -p ${CMAKE_BINARY_DIR}/include)
-
-set(GATB_DIR ${CMAKE_BINARY_DIR}/ext/gatb-core-1.4.1/gatb-core)
-set(GATB_BUILD_DIR ${GATB_DIR}/build)
-execute_process(COMMAND mkdir -p ${GATB_BUILD_DIR})
+# We don't want to install some GATB-CORE artifacts
+SET (GATB_CORE_EXCLUDE_TOOLS     1)
+SET (GATB_CORE_EXCLUDE_TESTS     1)
+SET (GATB_CORE_INCLUDE_EXAMPLES  1)
 
 ExternalProject_Add(gatb
-        DOWNLOAD_COMMAND  wget https://github.com/GATB/gatb-core/archive/v1.4.1.tar.gz --timestamping -O gatb.tar.gz
-        DOWNLOAD_DIR      "${CMAKE_BINARY_DIR}/ext"
-        CONFIGURE_COMMAND ""
-        BUILD_COMMAND     bash -c "cd ${GATB_BUILD_DIR} && cmake -D CMAKE_C_COMPILER=${CMAKE_C_COMPILER} -D CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} ${GATB_DIR} && make"
-        INSTALL_COMMAND   ""
-        TEST_COMMAND      "")
-
-ExternalProject_Add_Step(gatb extract_tar
-        COMMAND tar -xvzf ${CMAKE_BINARY_DIR}/ext/gatb.tar.gz -C ${CMAKE_BINARY_DIR}/ext
-        DEPENDEES download
-        DEPENDERS configure)
+        GIT_REPOSITORY https://github.com/GATB/gatb-core.git
+        GIT_TAG "v1.4.1"
+        PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gatb"
+        SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gatb/src/gatb/gatb-core/gatb-core"
+        CMAKE_ARGS -DKSIZE_LIST=32
+        INSTALL_COMMAND "")
 
-ExternalProject_Add_Step(gatb copy
-        COMMAND bash -c "cp -r ${GATB_BUILD_DIR}/bin/* ${CMAKE_BINARY_DIR}/bin/"
-        COMMAND bash -c "cp -r ${GATB_BUILD_DIR}/lib/* ${CMAKE_BINARY_DIR}/lib/"
-        COMMAND bash -c "cp -r ${GATB_BUILD_DIR}/include/* ${CMAKE_BINARY_DIR}/include/"
-        COMMAND bash -c "cp -r ${GATB_DIR}/src/gatb/* ${CMAKE_BINARY_DIR}/include/gatb/"
-        DEPENDEES build)

From a40c3b36ddd0acbc4bf3554c8f9105363047e186 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Thu, 22 Oct 2020 16:18:20 +1000
Subject: [PATCH 2/9] restrict denovo kmer size

---
 include/denovo_discovery/denovo_discovery.h | 4 ++--
 include/denovo_discovery/discover_main.h    | 3 ++-
 src/denovo_discovery/denovo_discovery.cpp   | 2 +-
 src/denovo_discovery/discover_main.cpp      | 1 +
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/denovo_discovery/denovo_discovery.h b/include/denovo_discovery/denovo_discovery.h
index 0c28720b..32613954 100644
--- a/include/denovo_discovery/denovo_discovery.h
+++ b/include/denovo_discovery/denovo_discovery.h
@@ -18,7 +18,7 @@ class DenovoDiscovery {
     bool clean_assembly_graph { false };
     const uint16_t max_insertion_size;
 
-    DenovoDiscovery(const uint32_t& kmer_size, const double& read_error_rate,
+    DenovoDiscovery(const uint16_t& kmer_size, const double& read_error_rate,
         uint16_t max_insertion_size = 15, uint16_t min_covg_for_node_in_assembly_graph = 1);
 
     void find_paths_through_candidate_region(
@@ -28,7 +28,7 @@ class DenovoDiscovery {
         const uint32_t& read_covg, const uint32_t& ref_length) const;
 
 private:
-    const uint32_t kmer_size;
+    const uint16_t kmer_size;
     const double read_error_rate;
 };
 
diff --git a/include/denovo_discovery/discover_main.h b/include/denovo_discovery/discover_main.h
index 1188bb9b..408fcd42 100644
--- a/include/denovo_discovery/discover_main.h
+++ b/include/denovo_discovery/discover_main.h
@@ -13,6 +13,7 @@
 #include "denovo_discovery/candidate_region.h"
 #include "denovo_discovery/denovo_discovery.h"
 
+constexpr auto MAX_DENOVO_K { 32 };
 namespace fs = boost::filesystem;
 
 /// Collection of all options of discover subcommand.
@@ -33,7 +34,7 @@ struct DiscoverOptions {
     bool clean { false };
     bool binomial { false };
     uint32_t max_covg { 600 };
-    uint32_t denovo_kmer_size { 11 };
+    uint16_t denovo_kmer_size { 11 };
     uint16_t max_insertion_size { 15 };
     uint16_t min_covg_for_node_in_assembly_graph { 2 };
     uint32_t min_candidate_covg { 3 };
diff --git a/src/denovo_discovery/denovo_discovery.cpp b/src/denovo_discovery/denovo_discovery.cpp
index f13e720f..049196e2 100644
--- a/src/denovo_discovery/denovo_discovery.cpp
+++ b/src/denovo_discovery/denovo_discovery.cpp
@@ -132,7 +132,7 @@ void DenovoDiscovery::find_paths_through_candidate_region(
     remove_graph_file(GATB_graph_filepath);
 }
 
-DenovoDiscovery::DenovoDiscovery(const uint32_t& kmer_size,
+DenovoDiscovery::DenovoDiscovery(const uint16_t& kmer_size,
     const double& read_error_rate, const uint16_t max_insertion_size,
     const uint16_t min_covg_for_node_in_assembly_graph)
     : min_covg_for_node_in_assembly_graph { min_covg_for_node_in_assembly_graph }
diff --git a/src/denovo_discovery/discover_main.cpp b/src/denovo_discovery/discover_main.cpp
index ad82373e..b821c02d 100644
--- a/src/denovo_discovery/discover_main.cpp
+++ b/src/denovo_discovery/discover_main.cpp
@@ -107,6 +107,7 @@ void setup_discover_subcommand(CLI::App& app)
         ->add_option("--discover-k", opt->denovo_kmer_size,
             "K-mer size to use when discovering novel variants")
         ->capture_default_str()
+        ->check(CLI::Range(0, MAX_DENOVO_K))
         ->type_name("INT");
 
     std::string description = "Max. insertion size for novel variants. Warning: "

From f72f62151c7c86573252d9758843705ab41f3876 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Thu, 22 Oct 2020 17:00:27 +1000
Subject: [PATCH 3/9] expose discover opt to clean local assembly graph

---
 include/denovo_discovery/denovo_discovery.h | 15 ++++++++-------
 include/denovo_discovery/discover_main.h    |  1 +
 src/denovo_discovery/denovo_discovery.cpp   | 13 ++++++++-----
 src/denovo_discovery/discover_main.cpp      |  8 +++++++-
 src/denovo_discovery/local_assembly.cpp     | 17 ++++++++---------
 5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/include/denovo_discovery/denovo_discovery.h b/include/denovo_discovery/denovo_discovery.h
index 32613954..fbaf49b1 100644
--- a/include/denovo_discovery/denovo_discovery.h
+++ b/include/denovo_discovery/denovo_discovery.h
@@ -13,23 +13,24 @@
 namespace fs = boost::filesystem;
 
 class DenovoDiscovery {
+private:
+    const uint16_t kmer_size;
+    const double read_error_rate;
+
 public:
-    const uint16_t min_covg_for_node_in_assembly_graph;
-    bool clean_assembly_graph { false };
     const uint16_t max_insertion_size;
+    const uint16_t min_covg_for_node_in_assembly_graph;
+    bool clean_assembly_graph;
 
     DenovoDiscovery(const uint16_t& kmer_size, const double& read_error_rate,
-        uint16_t max_insertion_size = 15, uint16_t min_covg_for_node_in_assembly_graph = 1);
+        uint16_t max_insertion_size = 15,
+        uint16_t min_covg_for_node_in_assembly_graph = 1, bool clean = false);
 
     void find_paths_through_candidate_region(
         CandidateRegion& candidate_region, const fs::path& denovo_output_directory);
 
     double calculate_kmer_coverage(
         const uint32_t& read_covg, const uint32_t& ref_length) const;
-
-private:
-    const uint16_t kmer_size;
-    const double read_error_rate;
 };
 
 #endif // PANDORA_DENOVO_DISCOVERY_H
diff --git a/include/denovo_discovery/discover_main.h b/include/denovo_discovery/discover_main.h
index 408fcd42..e797a069 100644
--- a/include/denovo_discovery/discover_main.h
+++ b/include/denovo_discovery/discover_main.h
@@ -44,6 +44,7 @@ struct DiscoverOptions {
     uint32_t merge_dist { 22 };
     uint32_t min_cluster_size { 10 };
     uint32_t max_num_kmers_to_avg { 100 };
+    bool clean_dbg { false };
 };
 
 void setup_discover_subcommand(CLI::App& app);
diff --git a/src/denovo_discovery/denovo_discovery.cpp b/src/denovo_discovery/denovo_discovery.cpp
index 049196e2..25160569 100644
--- a/src/denovo_discovery/denovo_discovery.cpp
+++ b/src/denovo_discovery/denovo_discovery.cpp
@@ -38,8 +38,10 @@ void DenovoDiscovery::find_paths_through_candidate_region(
             new BankStrings(candidate_region.pileup),
             "-kmer-size %d -abundance-min %d -verbose 0 -nb-cores 1 -out %s", kmer_size,
             min_covg_for_node_in_assembly_graph, GATB_graph_filepath_as_string.c_str());
-        if (clean_assembly_graph) {
+        if (this->clean_assembly_graph) {
+            BOOST_LOG_TRIVIAL(debug) << "Cleaning local assembly graph...";
             clean(gatb_graph);
+            BOOST_LOG_TRIVIAL(debug) << "Local assembly graph cleaned";
         }
         graph = gatb_graph; // TODO: use move constructor
 
@@ -134,11 +136,12 @@ void DenovoDiscovery::find_paths_through_candidate_region(
 
 DenovoDiscovery::DenovoDiscovery(const uint16_t& kmer_size,
     const double& read_error_rate, const uint16_t max_insertion_size,
-    const uint16_t min_covg_for_node_in_assembly_graph)
-    : min_covg_for_node_in_assembly_graph { min_covg_for_node_in_assembly_graph }
-    , max_insertion_size { max_insertion_size }
-    , kmer_size { kmer_size }
+    const uint16_t min_covg_for_node_in_assembly_graph, const bool clean)
+    : kmer_size { kmer_size }
     , read_error_rate { read_error_rate }
+    , max_insertion_size { max_insertion_size }
+    , min_covg_for_node_in_assembly_graph { min_covg_for_node_in_assembly_graph }
+    , clean_assembly_graph { clean }
 {
 }
 
diff --git a/src/denovo_discovery/discover_main.cpp b/src/denovo_discovery/discover_main.cpp
index b821c02d..f2c6788a 100644
--- a/src/denovo_discovery/discover_main.cpp
+++ b/src/denovo_discovery/discover_main.cpp
@@ -92,6 +92,11 @@ void setup_discover_subcommand(CLI::App& app)
             "--clean", opt->clean, "Add a step to clean and detangle the pangraph")
         ->group("Filtering");
 
+    discover_subcmd
+        ->add_flag(
+            "--clean-dbg", opt->clean_dbg, "Clean the local assembly de Bruijn graph")
+        ->group("Filtering");
+
     discover_subcmd
         ->add_flag("--bin", opt->binomial,
             "Use binomial model for kmer coverages [default: negative binomial]")
@@ -343,7 +348,8 @@ int pandora_discover(DiscoverOptions& opt)
     }
 
     DenovoDiscovery denovo { opt.denovo_kmer_size, opt.error_rate,
-        opt.max_insertion_size, opt.min_covg_for_node_in_assembly_graph };
+        opt.max_insertion_size, opt.min_covg_for_node_in_assembly_graph,
+        opt.clean_dbg };
     const fs::path denovo_output_directory { fs::path(opt.outdir) / "denovo_paths" };
     fs::create_directories(denovo_output_directory);
     BOOST_LOG_TRIVIAL(info)
diff --git a/src/denovo_discovery/local_assembly.cpp b/src/denovo_discovery/local_assembly.cpp
index d763def3..600c7a25 100644
--- a/src/denovo_discovery/local_assembly.cpp
+++ b/src/denovo_discovery/local_assembly.cpp
@@ -259,15 +259,14 @@ void clean(Graph& graph, const uint16_t& num_cores)
     graph_simplifications._doBulgeRemoval = false;
     graph_simplifications._doECRemoval = false;
 
-    graph_simplifications._tipLen_Topo_kMult
-        = 2; // remove all tips of length <= k * X bp  [default '2.500000'] set to 0 to
-             // turn off
-    graph_simplifications._tipLen_RCTC_kMult
-        = 0; // remove tips that pass coverage criteria, of length <= k * X bp  [default
-             // '10.000000'] set to 0 to turn off
-    graph_simplifications._tipRCTCcutoff
-        = 2; // tip relative coverage coefficient: mean coverage of neighbors >  X * tip
-             // coverage default 2.0
+    // remove all tips of length <= k * X bp  [default '2.500000'] set to 0 to turn off
+    graph_simplifications._tipLen_Topo_kMult = 2;
+    // remove tips that pass coverage criteria, of length <= k * X bp  [default
+    // '10.000000'] set to 0 to turn off
+    graph_simplifications._tipLen_RCTC_kMult = 0;
+    // tip relative coverage coefficient: mean coverage of neighbors >  X * tip coverage
+    // default 2.0
+    graph_simplifications._tipRCTCcutoff = 2;
     graph_simplifications.simplify();
 }
 

From a3d0797b4708a75332e03226034cc4e992a542f5 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Tue, 27 Oct 2020 09:37:09 +1000
Subject: [PATCH 4/9] fix linking issues for gatb libraries

---
 CMakeLists.txt                            | 3 +++
 ext/gatb.cmake                            | 7 ++++++-
 include/denovo_discovery/local_assembly.h | 4 ++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b541842..3a731a6f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,7 @@ set(EXTERNAL_LIBS
         gatbcore
         hdf5
         hdf5_tools)
+link_directories(${gatb_binary_dir}/lib)
 
 include(${PROJECT_SOURCE_DIR}/ext/gtest.cmake)
 
@@ -53,6 +54,8 @@ include(${PROJECT_SOURCE_DIR}/ext/gtest.cmake)
 include_directories(SYSTEM
         ${CMAKE_BINARY_DIR}/include
         ${PROJECT_SOURCE_DIR}/cgranges/cpp
+        ${gatb_source_dir}/gatb-core/src
+        ${gatb_binary_dir}/include
         )
 
 #normal includes: warnings will be reported for these
diff --git a/ext/gatb.cmake b/ext/gatb.cmake
index f4a60c3f..e0b3f25c 100644
--- a/ext/gatb.cmake
+++ b/ext/gatb.cmake
@@ -9,7 +9,12 @@ ExternalProject_Add(gatb
         GIT_REPOSITORY https://github.com/GATB/gatb-core.git
         GIT_TAG "v1.4.1"
         PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gatb"
-        SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gatb/src/gatb/gatb-core/gatb-core"
+        SOURCE_SUBDIR gatb-core
         CMAKE_ARGS -DKSIZE_LIST=32
         INSTALL_COMMAND "")
 
+ExternalProject_Get_Property(gatb source_dir binary_dir)
+
+set(gatb_source_dir ${source_dir})
+set(gatb_binary_dir ${binary_dir})
+
diff --git a/include/denovo_discovery/local_assembly.h b/include/denovo_discovery/local_assembly.h
index 83f117ba..3883d8cd 100644
--- a/include/denovo_discovery/local_assembly.h
+++ b/include/denovo_discovery/local_assembly.h
@@ -8,8 +8,8 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include <gatb/debruijn/impl/Simplifications.hpp>
-#include <gatb/gatb_core.hpp>
+#include "gatb/debruijn/impl/Simplifications.hpp"
+#include "gatb/gatb_core.hpp"
 #include <sys/stat.h>
 
 #include <boost/filesystem.hpp>

From c1f764d50383e3ee7b4b96b4ae503e323a3ee320 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Wed, 28 Oct 2020 15:13:27 +1000
Subject: [PATCH 5/9] cleanup commented-out code and usage of cerr and cout

---
 src/de_bruijn/graph.cpp                 | 163 ++----------------------
 src/denovo_discovery/local_assembly.cpp |   3 +-
 src/estimate_parameters.cpp             |   7 +-
 src/index.cpp                           |   4 -
 src/inthash.cpp                         |  39 ++----
 src/kmergraph.cpp                       |  20 +--
 src/kmergraphwithcoverage.cpp           |  92 +------------
 src/localPRG.cpp                        |  60 ---------
 src/localgraph.cpp                      |  64 +---------
 src/localnode.cpp                       |  13 +-
 src/minirecord.cpp                      |   3 -
 src/noise_filtering.cpp                 | 129 ++++---------------
 src/pangenome/pangraph.cpp              |  59 +--------
 src/pangenome/panread.cpp               |  59 ---------
 src/prg/path.cpp                        |  15 +--
 src/seq.cpp                             |   2 -
 src/utils.cpp                           |  70 +++-------
 src/vcf.cpp                             |  19 ---
 18 files changed, 83 insertions(+), 738 deletions(-)

diff --git a/src/de_bruijn/graph.cpp b/src/de_bruijn/graph.cpp
index 522af13a..9ca2bd2f 100644
--- a/src/de_bruijn/graph.cpp
+++ b/src/de_bruijn/graph.cpp
@@ -1,9 +1,6 @@
 #include <deque>
-#include <limits>
-#include <fstream>
 #include <unordered_set>
 #include <set>
-#include <deque>
 #include <iostream>
 #include <memory>
 #include <cassert>
@@ -13,7 +10,6 @@
 
 #include "de_bruijn/graph.h"
 #include "noise_filtering.h"
-#include "utils.h"
 
 #define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl)
 
@@ -72,24 +68,11 @@ bool edge_is_valid(OrientedNodePtr from, OrientedNodePtr to)
     std::deque<uint_least32_t> hashed_node_ids_to = to.first->hashed_node_ids;
     if (!from.second) {
         hashed_node_ids_from = rc_hashed_node_ids(hashed_node_ids_from);
-        // cout << "reverse from" << endl;
     }
 
     if (!to.second) {
         hashed_node_ids_to = rc_hashed_node_ids(hashed_node_ids_to);
-        // cout << "reverse to" << endl;
-    }
-    /*cout << from.second << to.second << " compare (";
-    for (const auto &n : hashed_node_ids_from)
-    {
-        cout << n << " ";
-    }
-    cout << ") to (";
-    for (const auto &n : hashed_node_ids_to)
-    {
-        cout << n << " ";
     }
-    cout << ")" << endl;*/
 
     return overlap_forwards(hashed_node_ids_from, hashed_node_ids_to);
 }
@@ -102,47 +85,26 @@ void debruijn::Graph::add_edge(OrientedNodePtr from, OrientedNodePtr to)
         or assert_msg(
             "edge from " << *from.first << " to " << *to.first << " is invalid"));
 
-    // uint8_t num_edges_added = 0;
     if (from.second
         and from.first->out_nodes.find(to.first->id) == from.first->out_nodes.end()) {
         from.first->out_nodes.insert(to.first->id);
-        // cout << "added edge " << from.first->id << " -> " << to.first->id << " so
-        // out_nodes.size() == "
-        //     << from.first->out_nodes.size() << endl;
-        ////num_edges_added += 1;
     } else if (!from.second
         and from.first->in_nodes.find(to.first->id) == from.first->in_nodes.end()) {
         from.first->in_nodes.insert(to.first->id);
-        // cout << "added edge " << from.first->id << " <- " << to.first->id << " so
-        // in_nodes.size() == "
-        //     << from.first->in_nodes.size() << endl;
-        ////num_edges_added += 1;
     }
 
     if (to.second
         and to.first->in_nodes.find(from.first->id) == to.first->in_nodes.end()) {
         to.first->in_nodes.insert(from.first->id);
-        // cout << "added edge " << to.first->id << " <- " << from.first->id << " so
-        // in_nodes.size() == "
-        //     << to.first->in_nodes.size() << endl;
-        ////num_edges_added += 1;
     } else if (!to.second
         and to.first->out_nodes.find(from.first->id) == to.first->out_nodes.end()) {
         to.first->out_nodes.insert(from.first->id);
-        // cout << "added edge " << to.first->id << " -> " << from.first->id << " so
-        // out_nodes.size() == "
-        //     << to.first->out_nodes.size() << endl;
-        ////num_edges_added += 1;
     }
-
-    // assert(num_edges_added == 2 or assert_msg("did not add edge from " << *from << "
-    // to " << *to));
 }
 
 // Remove all mentions of de bruijn node with id given from graph
 void debruijn::Graph::remove_node(const uint32_t dbg_node_id)
 {
-    // cout << "remove node " << dbg_node_id << endl;
     auto it = nodes.find(dbg_node_id);
     if (it != nodes.end()) {
         // remove this node from lists of out nodes from other graph nodes
@@ -160,92 +122,36 @@ void debruijn::Graph::remove_node(const uint32_t dbg_node_id)
     }
 }
 
-// remove unitig from dbg
-/*void Graph::remove_unitig(const deque<uint32_t>& tig)
-{
-    if (tig.size() < size)
-    {
-        return;
-    }
-
-    // first remove nodes
-    unordered_set<uint32_t> read_ids;
-    bool orientation = overlap_forwards(nodes[tig[0]]->hashed_node_ids,
-nodes[tig[1]]->hashed_node_ids); for (uint i=1; i<nodes.size()-1; ++i)
-    {
-        read_ids.insert(nodes[tig[i]]->read_ids.begin(), nodes[tig[i]]->read_ids.end());
-        orientation *= overlap_forwards(nodes[tig[i]]->hashed_node_ids,
-nodes[tig[i+1]]->hashed_node_ids); remove_node[tig[i]];
-    }
-
-    // then add nodes and edges back to allow continuity
-    NodePtr last_node = nodes[tig[0]];
-    NodePtr new_node;
-    for (uint i=1; i<size; ++i)
-    {
-        deque<uint_least32_t> new_tig;
-        for (uint j=i; j<size; ++j) {
-            new_tig.push_back(nodes[tig.front()]->hashed_node_ids[i]);
-        }
-        for (uint j=size-i; j<size; ++j) {
-            if (orientation)
-            {
-                new_tig.push_back(nodes[tig.back()]->hashed_node_ids[i]);
-            } else {
-                new_tig.push_back(rc_hashed_node_ids(nodes[tig.back()]->hashed_node_ids)[i]);
-            }
-        }
-        assert(new_tig.size() == size);
-        for(auto r : read_ids)
-            new_node = add_node(new_tig, r);
-        add_edge(last_node, new_node);
-        last_node = new_node;
-    }
-    add_edge(last_node, nodes[tig.back()]);
-}*/
-
 // Remove all copies of read from de bruijn node
 void debruijn::Graph::remove_read_from_node(
     const uint32_t read_id, const uint32_t dbg_node_id)
 {
-    // cout << "remove read " << (uint)read_id << " from node " << (uint)dbg_node_id <<
-    // endl;
     auto it = nodes.find(dbg_node_id);
     bool found_read_intersect;
     if (it != nodes.end()) {
-        // cout << "found node " << *(it->second) << endl;
         auto rit = it->second->read_ids.find(read_id);
         if (rit != it->second->read_ids.end()) {
-            // cout << "found read id on node" << endl;
             it->second->read_ids.erase(rit);
-            // cout << "removed read id" << endl;
 
             // if there are no more reads covering it, remove the node
-
             if (it->second->read_ids.empty()) {
-                // cout << "no more reads, so remove node" << endl;
                 remove_node(dbg_node_id);
             } else {
                 // otherwise, remove any outnodes which no longer share a read
-                // cout << "remove outnodes which no longer share a read" << endl;
                 for (std::unordered_set<uint32_t>::iterator nit
                      = it->second->out_nodes.begin();
                      nit != it->second->out_nodes.end();) {
-                    // cout << "out node " << *nit << endl;
                     found_read_intersect = false;
                     for (const auto& r : it->second->read_ids) {
                         if (nodes[*nit]->read_ids.find(r)
                             != nodes[*nit]->read_ids.end()) {
                             found_read_intersect = true;
-                            // cout << " shares a read" << endl;
                             break;
                         }
                     }
                     if (!found_read_intersect) {
-                        // cout << " does not share a read" << endl;
                         nodes[*nit]->in_nodes.erase(dbg_node_id);
                         nit = it->second->out_nodes.erase(nit);
-                        // cout << "removed" << endl;
                     } else {
                         nit++;
                     }
@@ -253,21 +159,17 @@ void debruijn::Graph::remove_read_from_node(
                 for (std::unordered_set<uint32_t>::iterator nit
                      = it->second->in_nodes.begin();
                      nit != it->second->in_nodes.end();) {
-                    // cout << "out node " << *nit << endl;
                     found_read_intersect = false;
                     for (const auto& r : it->second->read_ids) {
                         if (nodes[*nit]->read_ids.find(r)
                             != nodes[*nit]->read_ids.end()) {
                             found_read_intersect = true;
-                            // cout << " shares a read" << endl;
                             break;
                         }
                     }
                     if (!found_read_intersect) {
-                        // cout << " does not share a read" << endl;
                         nodes[*nit]->out_nodes.erase(dbg_node_id);
                         nit = it->second->in_nodes.erase(nit);
-                        // cout << "removed" << endl;
                     } else {
                         nit++;
                     }
@@ -330,27 +232,18 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
         and (nodes[tig.back()]->out_nodes.size() + nodes[tig.back()]->in_nodes.size())
             == 0);
     if (tig_is_empty or node_is_isolated) {
-        // cout << "node is isolated or tig empty" << endl;
         return;
     }
 
-    bool can_extend = nodes[tig.back()]->out_nodes.size()
-        == 1; // and nodes[tig.back()]->in_nodes.size() <= 1;
+    bool can_extend = nodes[tig.back()]->out_nodes.size() == 1;
     bool use_outnodes = true;
     while (can_extend) {
 
-        /*cout << "tig in progress before b: ";
-        for (const auto &n : tig) {
-            cout << n << " ";
-        }
-        cout << endl;*/
-
-        if (use_outnodes) // and find(tig.begin(), tig.end(),
-                          // *nodes[tig.back()]->out_nodes.begin())!=tig.end())
+        if (use_outnodes) {
             tig.push_back(*nodes[tig.back()]->out_nodes.begin());
-        else // if (find(tig.begin(), tig.end(),
-             // *nodes[tig.back()]->in_nodes.begin())!=tig.end())
+        } else {
             tig.push_back(*nodes[tig.back()]->in_nodes.begin());
+        }
 
         if (std::find(nodes[tig.back()]->in_nodes.begin(),
                 nodes[tig.back()]->in_nodes.end(), *----tig.end())
@@ -359,7 +252,6 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.back()]->in_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = true;
-            // cout << "A";
         } else if (std::find(nodes[tig.back()]->out_nodes.begin(),
                        nodes[tig.back()]->out_nodes.end(), *----tig.end())
             != nodes[tig.back()]->out_nodes.end()) {
@@ -367,24 +259,15 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.back()]->out_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = false;
-            // cout << "B";
         } else {
             can_extend = false;
-            // cout << "C";
         }
-
-        /*cout << "tig in progress b: ";
-        for (const auto &n : tig) {
-            cout << n << " ";
-        }
-        cout << endl;*/
     }
 
     if (tig.size() == 1) {
         can_extend = nodes[tig.front()]->in_nodes.size() == 1
             and nodes[tig.front()]->out_nodes.size() <= 1;
         use_outnodes = false;
-        // cout << "D";
     } else {
 
         if (std::find(nodes[tig.front()]->in_nodes.begin(),
@@ -394,7 +277,6 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.front()]->in_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = true;
-            // cout << "E";
         } else if (std::find(nodes[tig.front()]->out_nodes.begin(),
                        nodes[tig.front()]->out_nodes.end(), *++tig.begin())
             != nodes[tig.front()]->out_nodes.end()) {
@@ -402,25 +284,17 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.front()]->out_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = false;
-            // cout << "F";
         } else {
             can_extend = false;
         }
     }
 
     while (can_extend) {
-        /*cout << "tig in progress before f: ";
-        for (const auto &n : tig) {
-            cout << n << " ";
-        }
-        cout << endl;*/
-
-        if (use_outnodes) // and find(tig.begin(), tig.end(),
-                          // *nodes[tig.front()]->out_nodes.begin())!=tig.end())
+        if (use_outnodes) {
             tig.push_front(*nodes[tig.front()]->out_nodes.begin());
-        else // if (find(tig.begin(), tig.end(),
-             // *nodes[tig.front()]->in_nodes.begin())!=tig.end())
+        } else {
             tig.push_front(*nodes[tig.front()]->in_nodes.begin());
+        }
 
         if (std::find(nodes[tig.front()]->in_nodes.begin(),
                 nodes[tig.front()]->in_nodes.end(), *++tig.begin())
@@ -429,7 +303,6 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.front()]->in_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = true;
-            // cout << "G";
         } else if (std::find(nodes[tig.front()]->out_nodes.begin(),
                        nodes[tig.front()]->out_nodes.end(), *++tig.begin())
             != nodes[tig.front()]->out_nodes.end()) {
@@ -437,24 +310,14 @@ void debruijn::Graph::extend_unitig(std::deque<uint32_t>& tig)
                 and nodes[tig.front()]->out_nodes.size() <= 1
                 and tig.front() != tig.back();
             use_outnodes = false;
-            // cout << "H";
         } else {
             can_extend = false;
-            // cout << "I";
-        }
-
-        /*cout << "tig in progress f: ";
-        for (const auto &n : tig)
-        {
-            cout << n << " ";
         }
-        cout << endl;*/
     }
 
-    while (tig.size() > 1
-        and tig.front()
-            == tig.back()) // find(tig.begin(), --tig.end(), tig.back()) == tig.end())
+    while (tig.size() > 1 and tig.front() == tig.back()) {
         tig.pop_back();
+    }
 
     std::stringstream tig_ss;
     for (const auto& n : tig)
@@ -470,8 +333,6 @@ bool debruijn::Graph::found_in_out_nodes(
     for (const auto& i : node_ptr_to_search->out_nodes) {
         if (*nodes.at(i) == *node_ptr_to_find) {
             return true;
-        } else {
-            std::cout << *nodes.at(i) << " != " << *node_ptr_to_find << std::endl;
         }
     }
     return false;
@@ -484,8 +345,6 @@ bool debruijn::Graph::found_in_in_nodes(
     for (const auto& i : node_ptr_to_search->in_nodes) {
         if (*nodes.at(i) == *node_ptr_to_find) {
             return true;
-        } else {
-            std::cout << *nodes.at(i) << " != " << *node_ptr_to_find << std::endl;
         }
     }
     return false;
@@ -506,9 +365,6 @@ bool debruijn::Graph::operator==(const Graph& y) const
         bool found = false;
         for (const auto& s : y.nodes) {
             if (*t.second == *s.second) {
-                // cout << t.first << " " << *t.second << " == " << *s.second << " " <<
-                // s.first << endl;
-
                 found = true;
 
                 // also check the outnodes are the same
@@ -545,7 +401,6 @@ bool debruijn::Graph::operator==(const Graph& y) const
     }
 
     // nodes can't be equal within a graph so don't need to check vice versa
-    // cout << "found all nodes" << endl;
     return true;
 }
 
diff --git a/src/denovo_discovery/local_assembly.cpp b/src/denovo_discovery/local_assembly.cpp
index 600c7a25..003e8611 100644
--- a/src/denovo_discovery/local_assembly.cpp
+++ b/src/denovo_discovery/local_assembly.cpp
@@ -180,6 +180,8 @@ std::pair<DenovoPaths, FoundPaths> LocalAssemblyGraph::get_paths_between(
             abandoned = true;
             break;
         }
+
+        BOOST_LOG_TRIVIAL(trace) << "Trying local assembly with " << std::to_string(required_percent_of_expected_covg) << " * <expected covg>";
         build_paths_between(start_kmer, end_kmer, path_accumulator, tree,
             node_to_distance_to_the_end_node, paths_between_queries, max_path_length,
             expected_coverage, required_percent_of_expected_covg);
@@ -224,7 +226,6 @@ void LocalAssemblyGraph::build_paths_between(const std::string& start_kmer,
             return;
         }
     }
-
     path_accumulator.push_back(start_kmer.back());
 
     if (string_ends_with(path_accumulator, end_kmer)
diff --git a/src/estimate_parameters.cpp b/src/estimate_parameters.cpp
index dcd1cf1e..250fb06a 100644
--- a/src/estimate_parameters.cpp
+++ b/src/estimate_parameters.cpp
@@ -1,6 +1,5 @@
 #include <vector>
 #include <iostream>
-#include <fstream>
 #include <cmath>
 #include <cassert>
 #include <numeric>
@@ -250,14 +249,14 @@ uint32_t estimate_parameters(std::shared_ptr<pangenome::Graph> pangraph,
     // evaluate error rate
     auto mean = fit_mean_covg(kmer_covg_dist, covg / 10);
     auto var = fit_variance_covg(kmer_covg_dist, mean, covg / 10);
-    std::cout << "mean, var: " << mean << " " << var << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "mean, var: " << mean << ", " << var;
     if (mean > var) {
         auto zero_thresh = 2;
         mean = fit_mean_covg(kmer_covg_dist, zero_thresh);
         var = fit_variance_covg(kmer_covg_dist, mean, zero_thresh);
-        std::cout << "new mean, var: " << mean << " " << var << std::endl;
+        BOOST_LOG_TRIVIAL(debug) << "new mean, var: " << mean << ", " << var;
     }
-    std::cout << bin << " " << num_reads << " " << covg << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "nb reads, covg" << num_reads << ", " << covg;
     if ((bin and num_reads > 30 and covg > 30)
         or (not bin and abs(var - mean) < 2 and mean > 10 and num_reads > 30
             and covg > 2)) {
diff --git a/src/index.cpp b/src/index.cpp
index bb379457..cb8ae97e 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -22,8 +22,6 @@
 void Index::add_record(const uint64_t kmer, const uint32_t prg_id,
     const prg::Path& path, const uint32_t knode_id, const bool strand)
 {
-    // cout << "Add kmer " << kmer << " id, path, strand " << prg_id << ", " << path <<
-    // ", " << strand << endl;
     auto it = minhash.find(kmer); // checks if kmer is in minhash
     if (it == minhash.end()) { // no
         auto* newv = new std::vector<MiniRecord>; // get a new vector of MiniRecords -
@@ -32,7 +30,6 @@ void Index::add_record(const uint64_t kmer, const uint32_t prg_id,
         newv->emplace_back(MiniRecord(prg_id, path, knode_id, strand));
         minhash.insert(std::pair<uint64_t, std::vector<MiniRecord>*>(
             kmer, newv)); // add this record to minhash
-        // cout << "New minhash size: " << minhash.size() << endl;
     } else { // yes
         MiniRecord mr(prg_id, path, knode_id,
             strand); // create a new MiniRecord from this minimizer kmer
@@ -41,7 +38,6 @@ void Index::add_record(const uint64_t kmer, const uint32_t prg_id,
                                     // vector of this minimizer
             it->second->push_back(mr); // no, add it
         }
-        // cout << "New minhash entry for  kmer " << kmer << endl;
     }
 }
 
diff --git a/src/inthash.cpp b/src/inthash.cpp
index 07b49f52..b659cc6e 100644
--- a/src/inthash.cpp
+++ b/src/inthash.cpp
@@ -1,6 +1,5 @@
 #include <iostream>
 #include <cassert>
-#include <cmath>
 #include <cstring>
 #include "inthash.h"
 
@@ -38,24 +37,16 @@
  * means hash_64(x, mask)==hash_64(y, mask) if and only if x==y.
  */
 
-unsigned char seq_nt4_table[256] = {
-    0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-};
+unsigned char seq_nt4_table[256] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4,
+    4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
 
 uint32_t nt4(char c) { return seq_nt4_table[(uint8_t)c]; }
 
@@ -138,27 +129,17 @@ std::pair<uint64_t, uint64_t> KmerHash::kmerhash(const std::string& s, const uin
     assert(s.size() == k);
     int c;
     uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, kmer[2] = { 0, 0 };
-    // uint64_t mask1 = pow(4,k) - 1, kh = 0, rckh = 0;
     char myArray[s.size() + 1]; // as 1 char space for null is also required
     strcpy(myArray, s.c_str());
     for (char i : s) {
         c = seq_nt4_table[(uint8_t)i];
-        // cout << s[i] << " -> " << c;
         if (c < 4) { // not an ambiguous base
             kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer
             kmer[1] = (kmer[1] >> 2) | (3ULL ^ c) << shift1; // reverse k-mer
-            // kh += c*pow(4,i);
-            // rckh += (3-c)*pow(4,k-i-1);
         }
-        // cout << "after adding " << s[i] << "=" << c << " kh is now " << kh << " and
-        // kmer[0] is now " << kmer[0] << ", rckh is now " << rckh << " and kmer[1] is
-        // now " << kmer[1] << endl;
     }
-    // cout << "s: " << s << " -> " << kh << endl;
     kmer[0] = hash64(kmer[0], mask);
     kmer[1] = hash64(kmer[1], mask);
-    // cout << "kmer " << s << " has kmer[0] " << kmer[0] << " and kmer[1] " << kmer[1]
-    // << endl;
 
     auto ret = std::make_pair(kmer[0], kmer[1]);
     lookup[s] = ret;
diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp
index e17dd125..bcca5b68 100644
--- a/src/kmergraph.cpp
+++ b/src/kmergraph.cpp
@@ -1,17 +1,13 @@
 #include <sstream>
-#include <fstream>
 #include <cassert>
 #include <limits>
-#include <cstdio> /* NULL */
 #include <cstdlib> /* srand, rand */
-#include <cmath>
 
 #include <boost/log/trivial.hpp>
 
 #include "utils.h"
 #include "kmernode.h"
 #include "kmergraph.h"
-#include "kmergraphwithcoverage.h"
 #include "localPRG.h"
 
 #define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl)
@@ -109,7 +105,6 @@ KmerNodePtr KmerGraph::add_node(const prg::Path& p)
     KmerNodePtr n(std::make_shared<KmerNode>(nodes.size(), p)); // create the node
     nodes.push_back(n); // add it to nodes
     sorted_nodes.insert(n);
-    // nodes[next_id] = make_shared<KmerNode>(next_id, p);
     assert(k == 0 or p.length() == 0 or p.length() == k);
     if (k == 0 and p.length() > 0) {
         k = p.length();
@@ -150,7 +145,6 @@ void KmerGraph::add_edge(KmerNodePtr from, KmerNodePtr to)
     if (from->find_node_ptr_in_out_nodes(to) == from->out_nodes.end()) {
         from->out_nodes.emplace_back(to);
         to->in_nodes.emplace_back(from);
-        // cout << "added edge from " << from->id << " to " << to->id << endl;
     }
 }
 
@@ -163,7 +157,6 @@ void KmerGraph::remove_shortcut_edges()
     std::deque<std::vector<KmerNodePtr>> d;
 
     for (const auto& n : nodes) {
-        // cout << n.first << endl;
         for (const auto& out : n->out_nodes) {
             auto out_node_as_shared_ptr = out.lock();
             for (auto nextOut = out_node_as_shared_ptr->out_nodes.begin();
@@ -262,8 +255,7 @@ void KmerGraph::save(
 
             // TODO: leave as coverage 0 or change this?
             handle << "\tFC:i:" << 0 << "\t"
-                   << "\tRC:i:" << 0
-                   << std::endl; //"\t" << (unsigned)nodes[i].second->num_AT << endl;
+                   << "\tRC:i:" << 0 << std::endl;
 
             for (uint32_t j = 0; j < c->out_nodes.size(); ++j) {
                 handle << "L\t" << c->id << "\t+\t" << c->out_nodes[j].lock()->id
@@ -272,7 +264,7 @@ void KmerGraph::save(
         }
         handle.close();
     } else {
-        std::cerr << "Unable to open kmergraph file " << filepath << std::endl;
+        BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath;
         std::exit(EXIT_FAILURE);
     }
 }
@@ -379,12 +371,10 @@ void KmerGraph::load(const std::string& filepath)
                     to = std::stoi(split_line[1]);
                 }
                 add_edge(nodes[from], nodes[to]);
-                // nodes[from]->outNodes.push_back(nodes.at(to));
-                // nodes[to]->inNodes.push_back(nodes.at(from));
             }
         }
     } else {
-        std::cerr << "Unable to open kmergraph file " << filepath << std::endl;
+        BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath;
         exit(1);
     }
 }
@@ -421,8 +411,7 @@ uint32_t KmerGraph::min_path_length()
 bool KmerGraph::operator==(const KmerGraph& other_graph) const
 {
     // false if have different numbers of nodes
-    if (other_graph.nodes.size()
-        != nodes.size()) { // cout << "different numbers of nodes" << endl;
+    if (other_graph.nodes.size() != nodes.size()) {
         return false;
     }
 
@@ -450,7 +439,6 @@ bool KmerGraph::operator==(const KmerGraph& other_graph) const
             }
         }
     }
-    // otherwise is true
     return true;
 }
 
diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp
index b77abc31..ae4bc90d 100644
--- a/src/kmergraphwithcoverage.cpp
+++ b/src/kmergraphwithcoverage.cpp
@@ -1,8 +1,6 @@
 #include <sstream>
-#include <fstream>
 #include <cassert>
 #include <limits>
-#include <cstdio> /* NULL */
 #include <cstdlib> /* srand, rand */
 #include <cmath>
 #include <cstdint>
@@ -31,7 +29,6 @@ void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate)
     assert(kmer_prg->k != 0);
     assert(0 < e_rate and e_rate < 1);
     binomial_parameter_p = 1 / exp(e_rate * kmer_prg->k);
-    // cout << "using p: " << p << endl;
 }
 
 void KmerGraphWithCoverage::increment_covg(
@@ -82,7 +79,6 @@ void KmerGraphWithCoverage::set_negative_binomial_parameters(
 {
     if (nbin_prob == 0 and nb_fail == 0)
         return;
-    // qcout << "set nb" << endl;
     assert((negative_binomial_parameter_p > 0 and negative_binomial_parameter_p < 1)
         || assert_msg(
             "nb_p " << negative_binomial_parameter_p << " was not set in kmergraph"));
@@ -279,47 +275,6 @@ float KmerGraphWithCoverage::find_max_path(std::vector<KmerNodePtr>& maxpath,
     return prob_path(maxpath, sample_id, prob_model);
 }
 
-/*
-std::vector<std::vector<KmerNodePtr>> KmerGraph::find_max_paths(uint32_t num,
-                                                                const uint32_t
-&sample_id) {
-
-    // save original coverges so can put back at the end
-    std::vector<uint32_t> original_covgs0, original_covgs1;
-    for (uint32_t i = 0; i != nodes.size(); ++i) {
-        original_covgs0.push_back(nodes[i]->get_covg(0, sample_id));
-        original_covgs1.push_back(nodes[i]->get_covg(1, sample_id));
-    }
-
-    // find num max paths
-    //cout << "expected covg " << (uint)(p*num_reads/num) << endl;
-    std::vector<std::vector<KmerNodePtr>> paths;
-    std::vector<KmerNodePtr> maxpath;
-    find_max_path(maxpath, <#initializer#>);
-    //uint min_covg;
-    paths.push_back(maxpath);
-
-    while (paths.size() < num) {
-        for (uint32_t i = 0; i != maxpath.size(); ++i) {
-            maxpath[i]->covg[0] -= std::min(maxpath[i]->covg[0], (uint32_t) (p *
-num_reads / num)); maxpath[i]->covg[1] -= std::min(maxpath[i]->covg[1], (uint32_t) (p *
-num_reads / num));
-        }
-        maxpath.clear();
-        find_max_path(maxpath, <#initializer#>);
-        paths.push_back(maxpath);
-    }
-
-    // put covgs back
-    for (uint32_t i = 0; i != nodes.size(); ++i) {
-        nodes[i]->covg[0] = original_covgs0[i];
-        nodes[i]->covg[1] = original_covgs1[i];
-    }
-
-    return paths;
-}
- */
-
 std::vector<std::vector<KmerNodePtr>> KmerGraphWithCoverage::get_random_paths(
     uint32_t num_paths)
 {
@@ -373,44 +328,6 @@ float KmerGraphWithCoverage::prob_path(const std::vector<KmerNodePtr>& kpath,
     return return_prob_path / len;
 }
 
-/*
-float KmerGraph::prob_paths(const std::vector<std::vector<KmerNodePtr>> &kpaths) {
-    if (kpaths.empty()) {
-        return 0; // is this the correct default?
-    }
-
-    // collect how much coverage we expect on each node from these paths
-    std::vector<uint32_t> path_node_covg(nodes.size(), 0);
-    for (uint32_t i = 0; i != kpaths.size(); ++i) {
-        for (uint32_t j = 0; j != kpaths[i].size(); ++j) {
-            path_node_covg[kpaths[i][j]->id] += 1;
-        }
-    }
-
-    // now calculate max likelihood assuming independent paths
-    float ret_p = 0;
-    uint32_t len = 0;
-    for (uint32_t i = 0; i != path_node_covg.size(); ++i) {
-        if (path_node_covg[i] > 0) {
-            //cout << "prob of node " << nodes[i]->id << " which has path covg " <<
-path_node_covg[i] << " and so we expect to see " <<
-num_reads*path_node_covg[i]/kpaths.size() << " times IS " << prob(nodes[i]->id,
-num_reads*path_node_covg[i]/kpaths.size()) << endl; ret_p += prob(nodes[i]->id,
-num_reads * path_node_covg[i] / kpaths.size(), <#initializer#>); if
-(nodes[i]->path.length() > 0) { len += 1;
-            }
-        }
-    }
-
-    if (len == 0) {
-        len = 1;
-    }
-
-    //cout << "len " << len << endl;
-    return ret_p / len;
-}
-*/
-
 void KmerGraphWithCoverage::save_covg_dist(const std::string& filepath)
 {
     std::ofstream handle;
@@ -452,8 +369,7 @@ void KmerGraphWithCoverage::save(
             }
 
             handle << "\tFC:i:" << get_covg(c->id, 0, sample_id) << "\t"
-                   << "\tRC:i:" << get_covg(c->id, 1, sample_id)
-                   << std::endl; //"\t" << (unsigned)nodes[i].second->num_AT << endl;
+                   << "\tRC:i:" << get_covg(c->id, 1, sample_id) << std::endl;
 
             for (uint32_t j = 0; j < c->out_nodes.size(); ++j) {
                 handle << "L\t" << c->id << "\t+\t" << c->out_nodes[j].lock()->id
@@ -462,7 +378,7 @@ void KmerGraphWithCoverage::save(
         }
         handle.close();
     } else {
-        std::cerr << "Unable to open kmergraph file " << filepath << std::endl;
+        BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath;
         std::exit(EXIT_FAILURE);
     }
 }
@@ -578,12 +494,10 @@ void KmerGraphWithCoverage::load(const std::string& filepath)
                     to = std::stoi(split_line[1]);
                 }
                 kmer_prg->add_edge(kmer_prg->nodes[from], kmer_prg->nodes[to]);
-                // nodes[from]->out_nodes.push_back(nodes.at(to));
-                // nodes[to]->in_nodes.push_back(nodes.at(from));
             }
         }
     } else {
-        std::cerr << "Unable to open kmergraph file " << filepath << std::endl;
+        BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath;
         exit(1);
     }
 }
\ No newline at end of file
diff --git a/src/localPRG.cpp b/src/localPRG.cpp
index 78c4c7f1..12f58c6e 100644
--- a/src/localPRG.cpp
+++ b/src/localPRG.cpp
@@ -1,9 +1,7 @@
 #include <fstream>
 #include <set>
 #include <cassert>
-#include <cmath>
 #include <algorithm>
-#include <numeric>
 #include <cstdlib>
 #include <utility>
 
@@ -47,7 +45,6 @@ bool LocalPRG::isalpha_string(const std::string& s) const
     // Returns if a string s is entirely alphabetic
     for (char j : s)
         if (isalpha(j) == 0) {
-            // std::cout << "Found non-alpha char: " << s[j] << std::endl;
             return false;
         }
     return true;
@@ -55,15 +52,12 @@ bool LocalPRG::isalpha_string(const std::string& s) const
 
 std::string LocalPRG::string_along_path(const prg::Path& p) const
 {
-    // std::cout << p << std::endl;
     assert(p.get_start() <= seq.length());
     assert(p.get_end() <= seq.length());
     std::string s;
     for (const auto& it : p) {
         s += seq.substr(it.start, it.length);
-        // std::cout << s << std::endl;
     }
-    // std::cout << "lengths: " << s.length() << ", " << p.length << std::endl;
     assert(s.length() == p.length()
         || assert_msg("sequence length "
             << s.length() << " is not equal to path length " << p.length()));
@@ -174,9 +168,6 @@ std::vector<LocalNodePtr> LocalPRG::nodes_along_path_core(const prg::Path& p) co
 std::vector<Interval> LocalPRG::split_by_site(const Interval& i) const
 {
     // Splits interval by next_site based on substring of seq in the interval
-    // std::cout << "splitting by site " << next_site << " in interval " << i <<
-    // std::endl;
-
     // Split first by var site
     std::vector<Interval> v; // contains the intervals split by the variant site
     v.reserve(4);
@@ -384,7 +375,6 @@ std::vector<PathPtr> LocalPRG::shift(prg::Path p) const
     std::deque<PathPtr> short_paths = { std::make_shared<prg::Path>(q) };
     std::vector<PathPtr> k_paths;
     bool non_terminus;
-    // uint exp_num_return_seqs = 0;
 
     // first find extensions of the path
     while (!short_paths.empty()) {
@@ -654,7 +644,6 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr<Index>& index, const uint3
                     kmer = string_along_path(*(v[j]));
                     kh = hash.kmerhash(kmer, k);
                     smallest = std::min(smallest, std::min(kh.first, kh.second));
-                    // std::cout << min(kh.first, kh.second) << " ";
                 }
                 for (uint32_t j = 0; j != w; j++) {
                     kmer = string_along_path(*(v[j]));
@@ -850,18 +839,13 @@ std::vector<LocalNodePtr> LocalPRG::localnode_path_from_kmernode_path(
             }
         }
         if (localnode_path[0]->id != 0) {
-            // std::cout << "localnode path still does not start with 0" << std::endl;
             // add the first path to start
             LocalNodePtr next = nullptr;
             while (localnode_path[0]->id != 0 and next != localnode_path[0]) {
-                // std::cout << "look for a previous node to " << *localnode_path[0] <<
-                // std::endl;
                 next = prg.get_previous_node(localnode_path[0]);
                 if (next != nullptr) {
-                    // std::cout << "add " << *next << std::endl;
                     localnode_path.insert(localnode_path.begin(), next);
                 }
-                // std::cout << "new start node is " << *localnode_path[0] << std::endl;
             }
         }
     }
@@ -899,14 +883,10 @@ std::vector<LocalNodePtr> LocalPRG::localnode_path_from_kmernode_path(
             }
         }
         if (localnode_path.back()->id != prg.nodes.size() - 1) {
-            // std::cout << "localnode path still does not end with " <<
-            // prg.nodes.size()-1 << std::endl;
             // add the first path to end
             while (localnode_path.back()->id != prg.nodes.size() - 1
                 and !localnode_path.back()->outNodes.empty()) {
-                // std::cout << "extend " << *localnode_path.back();
                 localnode_path.push_back(localnode_path.back()->outNodes[0]);
-                // std::cout << " with " << *localnode_path.back() << std::endl;
             }
         }
     }
@@ -1188,17 +1168,6 @@ void LocalPRG::
         const std::string& sample_name) const
 {
     BOOST_LOG_TRIVIAL(debug) << "Update VCF with sample path";
-    /*for (uint i=0; i!=sample_path.size(); ++i)
-    {
-        std::cout << *sample_path[i] << " ";
-    }
-    std::cout << std::endl;
-    std::cout << now() << "Using ref path" << std::endl;
-    for (uint i=0; i!=rpath.size(); ++i)
-    {
-        std::cout << *rpath[i] << " ";
-    }
-    std::cout << std::endl;*/
     assert(!prg.nodes.empty()); // otherwise empty nodes -> segfault
 
     // if prg has only one node, simple case
@@ -1231,27 +1200,19 @@ void LocalPRG::
             found_new_site = true;
             sample_id++;
         } else if (found_new_site) {
-            // refpath back == samplepath back
             // add ref allele from previous site to this one
-            // std::cout << "update with ref alleles from " << pos << " to " << pos_to
-            // << std::endl;
             vcf.set_sample_gt_to_ref_allele_for_records_in_the_interval(
                 sample_name, name, pos, pos_to);
             pos = pos_to;
 
             // add new site to vcf
-            // std::cout << "find ref seq" << std::endl;
             for (uint32_t j = 1; j < refpath.size() - 1; ++j) {
                 ref += refpath[j]->seq;
-                // std::cout << ref << std::endl;
             }
-            // std::cout << "find alt seq" << std::endl;
             for (uint32_t j = 1; j < samplepath.size() - 1; ++j) {
                 alt += samplepath[j]->seq;
-                // std::cout << alt << std::endl;
             }
 
-            // std::cout << "add sample gt" << std::endl;
             vcf.add_a_new_record_discovered_in_a_sample_and_genotype_it(
                 sample_name, name, pos, ref, alt);
             found_new_site = false;
@@ -1276,7 +1237,6 @@ void LocalPRG::
             }
             pos_to = pos;
         } else {
-            // std::cout << pos_to << std::endl;
             refpath.erase(refpath.begin(), refpath.end() - 1);
             if (refpath.back()->id != prg.nodes.size() - 1) {
                 ref = "";
@@ -1293,10 +1253,8 @@ void LocalPRG::
             }
         }
     }
-    // std::cout << "add last ref alleles" << std::endl;
     vcf.set_sample_gt_to_ref_allele_for_records_in_the_interval(
         sample_name, name, pos, pos_to);
-    // std::cout << "done" << std::endl;
 }
 
 // Find the path through the PRG which deviates at pos from the ref path with alt
@@ -1305,8 +1263,6 @@ std::vector<LocalNodePtr> LocalPRG::find_alt_path(
     const std::vector<LocalNodePtr>& ref_path, const uint32_t pos,
     const std::string& ref, const std::string& alt) const
 {
-    // std::cout << now() << "Find alt path for PRG " << name << " variant " << pos << "
-    // " << ref << " " << alt << std::endl;
     std::vector<LocalNodePtr> alt_path, considered_path;
     std::deque<std::vector<LocalNodePtr>> paths_in_progress;
     uint32_t ref_added = 0, pos_along_ref_path = 0;
@@ -1327,8 +1283,6 @@ std::vector<LocalNodePtr> LocalPRG::find_alt_path(
             break;
         }
     }
-    // std::cout << "pos " << pos << " pos_along_ref_path " << pos_along_ref_path << "
-    // ref_path.size() " << ref_path.size() << std::endl;
 
     // find the localnodeptr we want to make our way back to
     while (pos_along_ref_path < ref_path.size() - 1
@@ -1339,7 +1293,6 @@ std::vector<LocalNodePtr> LocalPRG::find_alt_path(
     }
     assert(pos_along_ref_path < ref_path.size());
     auto ref_node_to_find = ref_path[pos_along_ref_path];
-    // std::cout << "trying to find " << *ref_node_to_find;
 
     // find an alt path with the required sequence
     if (alt_path.empty() and not ref_path.empty() and ref_path[0]->pos.length == 0)
@@ -1353,14 +1306,7 @@ std::vector<LocalNodePtr> LocalPRG::find_alt_path(
         considered_path = paths_in_progress.front();
         paths_in_progress.pop_front();
 
-        /*std::cout << "consider path ";
-        for (const auto &t : considered_path){
-            std::cout << t->pos << " ";
-        }
-        std::cout << std::endl;*/
-
         auto considered_seq = string_along_path(considered_path);
-        // std::cout << "considered_seq " << considered_seq << std::endl;
 
         if (considered_seq == working_alt) {
             // check if merge with ref path
@@ -1371,11 +1317,6 @@ std::vector<LocalNodePtr> LocalPRG::find_alt_path(
                     alt_path.end(), considered_path.begin(), considered_path.end());
                 alt_path.insert(alt_path.end(), ref_path.begin() + pos_along_ref_path,
                     ref_path.end());
-                /*std::cout << "found alt path ";
-                for (const auto &t : considered_path){
-                    std::cout << t->pos << " ";
-                }
-                std::cout << std::endl;*/
                 return alt_path;
             } else {
                 for (const auto& m : considered_path.back()->outNodes) {
@@ -1544,7 +1485,6 @@ void LocalPRG::add_sample_covgs_to_vcf(VCF& vcf, const KmerGraphWithCoverage& kg
 
     for (auto& recordPointer : vcf.get_records()) {
         auto& record = *recordPointer;
-        // std::cout << record << std::endl;
         // find corresponding ref kmers
         auto end_pos = record.get_ref_end_pos();
 
diff --git a/src/localgraph.cpp b/src/localgraph.cpp
index 5f8e0eee..2ce6de6f 100644
--- a/src/localgraph.cpp
+++ b/src/localgraph.cpp
@@ -9,19 +9,9 @@
 
 #define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl)
 
-LocalGraph::LocalGraph()
-{
-    // reserve space in index
-    // index.reserve(10);
-}
+LocalGraph::LocalGraph() { }
 
-LocalGraph::~LocalGraph()
-{
-    /*for (const auto &c: nodes) {
-        delete c.second;
-    }*/
-    nodes.clear();
-}
+LocalGraph::~LocalGraph() { nodes.clear(); }
 // add the node with a given id and seq if the id is not already in the nodes
 void LocalGraph::add_node(
     const uint32_t& id, const std::string& seq, const Interval& pos)
@@ -33,9 +23,6 @@ void LocalGraph::add_node(
     if (it == nodes.end()) {
         LocalNodePtr n(std::make_shared<LocalNode>(seq, pos, id));
         nodes[id] = n; // add the node to the map
-        // nodes[id] = make_shared<LocalNode>(seq, pos, id);
-        // cout << "Added node " << id << endl;
-
         // add the node to the interval indexes for fast overlap queries
         if (pos.length == 0)
             startIndexOfZeroLengthIntervals[pos.start] = n;
@@ -60,25 +47,9 @@ void LocalGraph::add_edge(const uint32_t& from, const uint32_t& to)
                 << ">" << t->pos.start << " so cannot add edge from node " << *f
                 << " to node " << *t));
         f->outNodes.push_back(t);
-        // cout << "Added edge (" << f->id << ", " << t->id << ")" << endl;
     }
 }
 
-/*void LocalGraph::add_varsite (const uint16_t level, const uint32_t pre_site_id, const
-uint32_t post_site_id)
-{
-    assert(pre_site_id <= post_site_id);
-    while (level >= index.size())
-    {
-        vector<pair<uint32_t, uint32_t>> levelv;
-        levelv.reserve(400);
-        //levelv = {};
-        index.insert(index.end(), 1, levelv);
-    }
-    index[level].push_back(make_pair(pre_site_id, post_site_id));
-    return;
-}*/
-
 void LocalGraph::write_gfa(const std::string& filepath) const
 {
     std::ofstream handle;
@@ -140,7 +111,7 @@ void LocalGraph::read_gfa(const std::string& filepath)
             }
         }
     } else {
-        std::cerr << "Unable to open GFA file " << filepath << std::endl;
+        BOOST_LOG_TRIVIAL(error) << "Unable to open GFA file " << filepath;
         std::exit(1);
     }
 }
@@ -149,8 +120,6 @@ std::vector<PathPtr> LocalGraph::walk(
     const uint32_t& node_id, const uint32_t& pos, const uint32_t& len) const
 { // node_id: where to start the walk, pos: the position in the node_id, len = k+w-1 ->
   // the length that the walk has to go through - we are sketching kmers in a graph
-    // cout << "walking graph from node " << node_id << " pos " << pos << " for length "
-    // << len << endl;
     // walks from position pos in node node for length len bases
     assert(
         (nodes.at(node_id)->pos.start <= pos && nodes.at(node_id)->pos.get_end() >= pos)
@@ -163,35 +132,25 @@ std::vector<PathPtr> LocalGraph::walk(
     prg::Path p, p2;
     std::deque<Interval> d;
 
-    // cout << "pos+len: " << pos+len << " nodes.at(node_id)->pos.get_end(): " <<
-    // nodes.at(node_id)->pos.get_end() << endl;
     if (pos + len
         <= nodes.at(node_id)
                ->pos.get_end()) { // checks if we can go until the end of the kmer
         p.initialize(Interval(
             pos, pos + len)); // create the path containing this interval of the node
-        // cout << "return path: " << p << endl;
         return_paths.push_back(std::make_shared<prg::Path>(p));
-        // cout << "return_paths size: " << return_paths.size() << endl;
         return return_paths;
     }
     uint32_t len_added = std::min(nodes.at(node_id)->pos.get_end() - pos, len);
 
-    // cout << "len: " << len << " len_added: " << len_added << endl;
     if (len_added < len) {
         for (auto it = nodes.at(node_id)->outNodes.begin();
              it != nodes.at(node_id)->outNodes.end(); ++it) {
-            // cout << "Following node: " << (*it)->id << " to add " << len-len_added <<
-            // " more bases" << endl;
             walk_paths = walk((*it)->id, (*it)->pos.start, len - len_added);
-            // cout << "walk paths size: " << walk_paths.size() << endl;
             for (auto& walk_path : walk_paths) {
                 // Note, would have just added start interval to each item in
                 // walk_paths, but can't seem to force result of it2 to be non-const
-                // cout << (*it2) << endl;
                 p2.initialize(Interval(pos, nodes.at(node_id)->pos.get_end()));
                 p2.insert_to_the_end(walk_path->begin(), walk_path->end());
-                // cout << "path: " << p2 << " p2.length: " << p2.length << endl;
                 if (p2.length() == len) {
                     return_paths.push_back(std::make_shared<prg::Path>(p2));
                 }
@@ -204,8 +163,6 @@ std::vector<PathPtr> LocalGraph::walk(
 std::vector<PathPtr> LocalGraph::walk_back(
     const uint32_t& node_id, const uint32_t& pos, const uint32_t& len) const
 {
-    // cout << "start walking back from " << pos << " in node " << node_id << " for
-    // length " << len << endl;
     // walks from position pos in node back through prg for length len bases
     assert(
         (nodes.at(node_id)->pos.start <= pos && nodes.at(node_id)->pos.get_end() >= pos)
@@ -220,13 +177,11 @@ std::vector<PathPtr> LocalGraph::walk_back(
 
     if (nodes.at(node_id)->pos.start + len <= pos) {
         p.initialize(Interval(pos - len, pos));
-        // cout << "return path: " << p << endl;
         return_paths.push_back(std::make_shared<prg::Path>(p));
         return return_paths;
     }
 
     uint32_t len_added = std::min(pos - nodes.at(node_id)->pos.start, len);
-    // cout << "len: " << len << " len_added: " << len_added << endl;
 
     std::vector<LocalNodePtr>::iterator innode;
     if (len_added < len) {
@@ -239,9 +194,7 @@ std::vector<PathPtr> LocalGraph::walk_back(
                 for (uint32_t i = 0; i != walk_paths.size(); ++i) {
                     p2.initialize(*(walk_paths[i]));
                     p2.add_end_interval(Interval(nodes.at(node_id)->pos.start, pos));
-                    // cout << p2 << endl;
                     if (p2.length() == len) {
-                        // cout << "output path: " << p2 << endl;
                         return_paths.push_back(std::make_shared<prg::Path>(p2));
                     }
                 }
@@ -306,9 +259,6 @@ std::vector<LocalNodePtr> LocalGraph::nodes_along_string(
             for (uint32_t j = 0; j != p.back()->outNodes.size(); ++j) {
                 // if the start of query_string matches extended candidate_string, want
                 // to query candidate path extensions
-                // if (
-                // query_string.substr(0,candidate_string.size()+u[i].back()->outNodes[j]->seq.size())
-                // == candidate_string+u[i].back()->outNodes[j]->seq)
                 auto comp_string = candidate_string + p.back()->outNodes[j]->seq;
                 auto comp_length = std::min(query_string.size(), comp_string.size());
                 if (strcasecmp(query_string.substr(0, comp_length).c_str(),
@@ -406,8 +356,7 @@ std::vector<LocalNodePtr> LocalGraph::bottom_path() const
 bool LocalGraph::operator==(const LocalGraph& y) const
 {
     // false if have different numbers of nodes
-    if (y.nodes.size()
-        != nodes.size()) { // cout << "different numbers of nodes" << endl;
+    if (y.nodes.size() != nodes.size()) {
         return false;
     }
 
@@ -415,17 +364,14 @@ bool LocalGraph::operator==(const LocalGraph& y) const
     for (const auto& c : nodes) {
         // if node id doesn't exist
         auto it = y.nodes.find(c.first);
-        if (it == y.nodes.end()) { // cout << "node id doesn't exist" << endl;
+        if (it == y.nodes.end()) {
             return false;
         }
         // or node entries are different
         if (!(*c.second == *(it->second))) {
-            // cout << "node id " << c.first << " exists but has different values" <<
-            // endl;
             return false;
         }
     }
-    // otherwise is true
     return true;
 }
 
diff --git a/src/localnode.cpp b/src/localnode.cpp
index fc228f1d..b153d3e0 100644
--- a/src/localnode.cpp
+++ b/src/localnode.cpp
@@ -22,23 +22,18 @@ std::ostream& operator<<(std::ostream& out, LocalNode const& n)
 
 bool LocalNode::operator==(const LocalNode& y) const
 {
-    if (seq != y.seq) { // cout << "different seq" << endl;
+    if (seq != y.seq) {
         return false;
     }
-    // if (!(pos == y.pos)) {//cout << "different interval" << endl;
-    // return false;}
-    if (id != y.id) { // cout << "different id" << endl;
+    if (id != y.id) {
         return false;
     }
-    if (outNodes.size()
-        != y.outNodes.size()) { // cout << "differnet numbers of out edges" << endl;
+    if (outNodes.size() != y.outNodes.size()) {
         return false;
     }
     for (uint32_t i = 0; i != outNodes.size(); ++i) {
         spointer_values_equal<LocalNode> eq = { outNodes[i] };
-        if (find_if(y.outNodes.begin(), y.outNodes.end(), eq)
-            == y.outNodes.end()) { // cout << "the out edge points to a different node"
-                                   // << endl;
+        if (find_if(y.outNodes.begin(), y.outNodes.end(), eq) == y.outNodes.end()) {
             return false;
         }
     }
diff --git a/src/minirecord.cpp b/src/minirecord.cpp
index 8e0b84d0..a84b7b10 100644
--- a/src/minirecord.cpp
+++ b/src/minirecord.cpp
@@ -1,4 +1,3 @@
-#include <cassert>
 #include <functional>
 #include <iostream>
 #include "minirecord.h"
@@ -16,8 +15,6 @@ MiniRecord::~MiniRecord() {};
 
 bool MiniRecord::operator==(const MiniRecord& y) const
 {
-    // cout << prg_id << "," << y.prg_id << endl;
-    // cout << path << "," << y.path << endl;
     if (prg_id != y.prg_id) {
         return false;
     }
diff --git a/src/noise_filtering.cpp b/src/noise_filtering.cpp
index 4249d301..c22f76dc 100644
--- a/src/noise_filtering.cpp
+++ b/src/noise_filtering.cpp
@@ -4,12 +4,10 @@
 #include <set>
 #include <utility>
 #include <vector>
-#include <fstream>
 #include <cassert>
 #include "utils.h"
 #include "pangenome/pangraph.h"
 #include "pangenome/pannode.h"
-#include "pangenome/panread.h"
 #include "de_bruijn/graph.h"
 #include "minihit.h"
 
@@ -88,20 +86,10 @@ bool overlap_backwards(
 std::deque<uint_least32_t> rc_hashed_node_ids(
     const std::deque<uint_least32_t>& hashed_node_ids)
 {
-    /*for (const auto &n : hashed_node_ids)
-    {
-        cout << n << " ";
-    }*/
     std::deque<uint_least32_t> d;
     for (const auto& i : hashed_node_ids) {
         d.push_front(rc_num(i));
     }
-    /*cout << " is now ";
-    for (const auto &n : d)
-    {
-        cout << n << " ";
-    }
-    cout << endl;*/
     return d;
 }
 
@@ -168,17 +156,8 @@ void dbg_node_ids_to_ids_and_orientations(const debruijn::Graph& dbg,
 
     std::deque<uint_least32_t> hashed_pg_node_ids
         = extend_hashed_pg_node_ids_backwards(dbg, dbg_node_ids);
-    if (hashed_pg_node_ids.empty())
-        hashed_pg_node_ids = extend_hashed_pg_node_ids_forwards(dbg, dbg_node_ids);
     if (hashed_pg_node_ids.empty()) {
-        for (const auto& n : dbg_node_ids) {
-            std::cout << "(";
-            for (const auto& m : dbg.nodes.at(n)->hashed_node_ids) {
-                std::cout << m << " ";
-            }
-            std::cout << ") ";
-        }
-        std::cout << std::endl;
+        hashed_pg_node_ids = extend_hashed_pg_node_ids_forwards(dbg, dbg_node_ids);
     }
     assert(!hashed_pg_node_ids.empty());
     hashed_node_ids_to_ids_and_orientations(hashed_pg_node_ids, node_ids, node_orients);
@@ -189,7 +168,6 @@ void construct_debruijn_graph(
 {
     dbg.nodes.clear();
     dbg.node_hash.clear();
-    // cout << "Removed old nodes" << endl;
 
     debruijn::OrientedNodePtr prev, current;
     std::deque<uint_least32_t> hashed_ids;
@@ -224,10 +202,10 @@ void construct_debruijn_graph(
 void remove_leaves(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph& dbg,
     uint_least32_t covg_thresh)
 {
-    std::cout << now() << "Remove leaves of debruijn graph from pangraph" << std::endl;
-    std::cout << "Start with " << pangraph->nodes.size() << " pg.nodes, "
-              << pangraph->reads.size() << " pg.reads, and " << dbg.nodes.size()
-              << " dbg.nodes" << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "Remove leaves of debruijn graph from pangraph";
+    BOOST_LOG_TRIVIAL(debug) << "Start with " << pangraph->nodes.size() << " pg.nodes, "
+                             << pangraph->reads.size() << " pg.reads, and "
+                             << dbg.nodes.size() << " dbg.nodes";
     bool leaves_exist = true;
     std::unordered_set<uint32_t> leaves;
     std::vector<uint_least32_t> node_ids;
@@ -238,41 +216,25 @@ void remove_leaves(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph&
     while (leaves_exist) {
 
         leaves = dbg.get_leaves(covg_thresh);
-        std::cout << "there are " << leaves.size() << " leaves" << std::endl;
+        BOOST_LOG_TRIVIAL(trace) << "there are " << leaves.size() << " leaves";
 
         if (leaves.empty()) {
             leaves_exist = false;
         }
-        std::cout << "leaves exist is " << leaves_exist << std::endl;
+        BOOST_LOG_TRIVIAL(trace) << "leaves exist is " << leaves_exist;
 
         for (const auto& i : leaves) {
-            std::cout << std::endl << "looking at leaf " << i << ": ";
-            for (const auto& j : dbg.nodes[i]->hashed_node_ids) {
-                std::cout << j << " ";
-            }
-            std::cout << std::endl;
-
             // look up the node ids and orientations associated with this node
             hashed_node_ids_to_ids_and_orientations(
                 dbg.nodes[i]->hashed_node_ids, node_ids, node_orients);
-            std::cout << "looked up node ids" << std::endl;
 
             // remove the last node from corresponding reads
             assert(not dbg.nodes[i]->read_ids.empty());
             for (const auto& r : dbg.nodes[i]->read_ids) {
-                std::cout << "remove from read " << r << ": ";
-                for (const auto& n : pangraph->reads[r]->get_nodes()) {
-                    std::cout << n.lock()->node_id << " ";
-                }
-                std::cout << std::endl;
                 if (pangraph->reads[r]->get_nodes().size() == dbg.size) {
-                    std::cout << "remove read";
                     pangraph->remove_read(r);
-                    std::cout << " done" << std::endl;
                 } else {
-                    std::cout << "remove from read ";
                     pos = pangraph->reads[r]->find_position(node_ids, node_orients);
-                    std::cout << " pos " << pos.first << " " << pos.second;
                     assert(pos.first == 0
                         or pos.first + node_ids.size()
                             == pangraph->reads[r]->get_nodes().size());
@@ -288,11 +250,6 @@ void remove_leaves(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph&
                             (--pangraph->reads[r]->get_nodes().end())->lock()->node_id);
                         node.lock()->remove_read(pangraph->reads[r]);
                     }
-                    std::cout << "read is now " << r << ": ";
-                    for (const auto& n : pangraph->reads[r]->get_nodes()) {
-                        std::cout << n.lock()->node_id << " ";
-                    }
-                    std::cout << "done" << std::endl;
                 }
             }
             auto node_shared_ptr_from_weak_ptr = node.lock();
@@ -303,13 +260,11 @@ void remove_leaves(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph&
 
             // remove dbg node
             dbg.remove_node(i);
-
-            // cout << "pg is now: " << endl << pg << endl;
         }
     }
-    std::cout << "There are now " << pangraph->nodes.size() << " pg.nodes, "
-              << pangraph->reads.size() << " pg.reads, and " << dbg.nodes.size()
-              << " dbg.nodes" << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "There are now " << pangraph->nodes.size()
+                             << " pg.nodes, " << pangraph->reads.size()
+                             << " pg.reads, and " << dbg.nodes.size() << " dbg.nodes";
 }
 
 void find_reads_along_tig(const debruijn::Graph& dbg,
@@ -323,16 +278,9 @@ void find_reads_along_tig(const debruijn::Graph& dbg,
             reads_along_tig.insert(pangraph->reads.at(r));
         }
     }
-    std::cout << "candidate reads ";
-    for (const auto& r : reads_along_tig) {
-        std::cout << r->id << " ";
-    }
-    std::cout << std::endl;
-
     // filter out some which don't really overlap the unitig, keeping those
     // which overlap at least consecutive 2 dbg nodes or only one node
     all_reads_along_tig = true;
-    std::cout << "kept reads along tig: ";
     for (auto r = reads_along_tig.begin(); r != reads_along_tig.end();) {
         if ((*r)->get_nodes().size() > dbg.size
             and (*r)->find_position(pg_node_ids, pg_node_orients, dbg.size + 1).first
@@ -340,11 +288,9 @@ void find_reads_along_tig(const debruijn::Graph& dbg,
             r = reads_along_tig.erase(r);
             all_reads_along_tig = false;
         } else {
-            std::cout << (*r)->id << " ";
             ++r;
         }
     }
-    // cout << endl;
 }
 
 void remove_middle_nodes_of_tig_from_read(std::shared_ptr<pangenome::Graph> pangenome,
@@ -357,7 +303,7 @@ void remove_middle_nodes_of_tig_from_read(std::shared_ptr<pangenome::Graph> pang
     // if pos = 0 (ie read does not include start of tig)
 
     std::pair<uint32_t, uint32_t> pos = r->find_position(node_ids, node_orients);
-    std::cout << "found pos " << pos.first << " " << pos.second << std::endl;
+    BOOST_LOG_TRIVIAL(trace) << "found pos " << pos.first << " " << pos.second;
     auto start_shift = pos.first;
     if (pos.first > 0 or pos.second < r->get_nodes().size() - 1
         or node_ids.size() == r->get_nodes().size())
@@ -389,7 +335,7 @@ void remove_middle_nodes_of_tig_from_read(std::shared_ptr<pangenome::Graph> pang
             end_shift = sub_pos.second + 1;
     }
 
-    std::cout << "remove nodes " << start_shift << " to " << end_shift << std::endl;
+    BOOST_LOG_TRIVIAL(trace) << "remove nodes " << start_shift << " to " << end_shift;
     auto it = r->get_nodes().begin() + start_shift;
     for (auto shift = start_shift; shift < end_shift; ++shift) {
         if (it == r->get_nodes().end()) {
@@ -408,22 +354,17 @@ void remove_middle_nodes_of_tig_from_read(std::shared_ptr<pangenome::Graph> pang
 void filter_unitigs(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph& dbg,
     const uint_least32_t& threshold)
 {
-    std::cout << now() << "Filter unitigs using threshold " << threshold << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "Filter unitigs using threshold " << threshold;
     std::vector<uint_least32_t> node_ids;
     std::vector<bool> node_orients;
     std::unordered_set<pangenome::ReadPtr> reads_along_tig;
     bool all_reads_tig;
 
     std::set<std::deque<uint32_t>> unitigs = dbg.get_unitigs();
-    std::cout << "have " << unitigs.size() << " tigs" << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "have " << unitigs.size() << " tigs";
     for (auto d : unitigs) {
         // look up the node ids and orientations associated with this node
         dbg_node_ids_to_ids_and_orientations(dbg, d, node_ids, node_orients);
-        std::cout << "tig: ";
-        for (const auto& n : node_ids) {
-            std::cout << n << " ";
-        }
-        std::cout << std::endl;
 
         // collect the reads covering that tig
         find_reads_along_tig(
@@ -432,31 +373,18 @@ void filter_unitigs(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph&
         // now if the number of reads covering tig falls below threshold, remove the
         // middle nodes of this tig from the reads
         if (reads_along_tig.size() <= threshold) {
-            std::cout << "not enough reads, so remove the tig from the reads"
-                      << std::endl;
+            BOOST_LOG_TRIVIAL(trace)
+                << "not enough reads, so remove the tig from the reads";
             for (const auto& r : reads_along_tig) {
-                std::cout << "read " << r->id << " was ";
-                for (const auto& n : r->get_nodes()) {
-                    std::cout << n.lock()->node_id << " ";
-                }
-                std::cout << std::endl;
                 remove_middle_nodes_of_tig_from_read(
                     pangraph, dbg, r, node_ids, node_orients);
-                std::cout << "read " << r->id << " is now ";
-                for (const auto& n : r->get_nodes()) {
-                    std::cout << n.lock()->node_id << " ";
-                }
-                std::cout << std::endl;
             }
             // also remove read_ids from each of the corresponding nodes of dbg
-            // cout << "now remove read from dbg nodes" << endl;
             for (uint32_t i = 1; i < d.size() - 1; ++i) {
                 for (const auto& r : reads_along_tig) {
                     dbg.remove_read_from_node(r->id, d[i]);
                 }
             }
-        } else {
-            std::cout << "tig had enough reads" << std::endl;
         }
         reads_along_tig.clear();
     }
@@ -465,7 +393,7 @@ void filter_unitigs(std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph&
 void detangle_pangraph_with_debruijn_graph(
     std::shared_ptr<pangenome::Graph> pangraph, debruijn::Graph& dbg)
 {
-    std::cout << now() << "Detangle pangraph with debruijn graph" << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "Detangle pangraph with debruijn graph";
     std::vector<uint_least32_t> node_ids;
     std::vector<bool> node_orients;
     bool all_reads_tig;
@@ -475,13 +403,6 @@ void detangle_pangraph_with_debruijn_graph(
     for (auto d : unitigs) {
         // look up the node ids and orientations associated with this node
         dbg_node_ids_to_ids_and_orientations(dbg, d, node_ids, node_orients);
-        /*cout << "tig: ";
-        for (const auto &n : node_ids)
-        {
-            cout << n << " ";
-        }
-        cout << endl;*/
-
         // collect the reads covering that tig
         find_reads_along_tig(
             dbg, d, pangraph, node_ids, node_orients, reads_along_tig, all_reads_tig);
@@ -490,20 +411,14 @@ void detangle_pangraph_with_debruijn_graph(
         // if we find a read which doesn't lie along whole tig,
         // split that node by reads and create a new node on the tig
         if (!all_reads_tig and !reads_along_tig.empty()) {
-            // cout << "not all reads contain tig" << endl;
             for (uint32_t i = 0; i < node_ids.size(); ++i) {
-                // cout << "for node " << pg->nodes[node_ids[i]]->node_id << endl;
                 for (const auto& r : pangraph->nodes[node_ids[i]]->reads) {
-                    // cout << "for read " << r->id << endl;
                     if (reads_along_tig.find(r) == reads_along_tig.end()) {
-                        // cout << "split node" << endl;
                         pangraph->split_node_by_reads(
                             reads_along_tig, node_ids, node_orients, node_ids[i]);
-                        // cout << "done" << endl;
                         break;
                     }
                 }
-                // cout << pg << endl;
             }
         }
     }
@@ -512,21 +427,21 @@ void detangle_pangraph_with_debruijn_graph(
 void clean_pangraph_with_debruijn_graph(std::shared_ptr<pangenome::Graph> pangraph,
     const uint_least32_t size, const uint_least32_t threshold, const bool illumina)
 {
-    std::cout << now() << "Construct de Bruijn Graph from PanGraph with size "
-              << (uint32_t)size << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "Construct de Bruijn Graph from PanGraph with size "
+                             << (uint32_t)size;
     debruijn::Graph dbg(size);
     construct_debruijn_graph(pangraph, dbg);
 
     if (not illumina)
         remove_leaves(pangraph, dbg, threshold);
     filter_unitigs(pangraph, dbg, threshold);
-    std::cout << "Finished filtering tigs" << std::endl;
+    BOOST_LOG_TRIVIAL(debug) << "Finished filtering tigs";
 
     // update dbg now that have removed leaves and some inner nodes
-    std::cout << "Reconstruct dbg" << std::endl;
+    BOOST_LOG_TRIVIAL(trace) << "Reconstruct dbg";
     construct_debruijn_graph(pangraph, dbg);
-    std::cout << "Now detangle" << std::endl;
 
+    BOOST_LOG_TRIVIAL(trace) << "Now detangle";
     detangle_pangraph_with_debruijn_graph(pangraph, dbg);
 }
 
diff --git a/src/pangenome/pangraph.cpp b/src/pangenome/pangraph.cpp
index 9f668f83..3cd3bd2e 100644
--- a/src/pangenome/pangraph.cpp
+++ b/src/pangenome/pangraph.cpp
@@ -4,7 +4,6 @@
 #include <set>
 #include <memory>
 #include <vector>
-#include <fstream>
 #include <cassert>
 #include <algorithm>
 #include <boost/filesystem.hpp>
@@ -14,8 +13,6 @@
 #include "pangenome/pannode.h"
 #include "pangenome/panread.h"
 #include "pangenome/pansample.h"
-#include "minihit.h"
-#include "kmergraphwithcoverage.h"
 #include "fastaq_handler.h"
 
 #define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl)
@@ -150,7 +147,6 @@ void pangenome::Graph::add_hits_between_PRG_and_sample(
 // Remove the node n, and all references to it
 std::unordered_map<uint32_t, NodePtr>::iterator pangenome::Graph::remove_node(NodePtr n)
 {
-    // cout << "Remove graph node " << *n << endl;
     // removes all instances of node n and references to it in reads
     for (const auto& r : n->reads) {
         r->remove_all_nodes_with_this_id(n->node_id);
@@ -169,7 +165,6 @@ std::unordered_map<uint32_t, NodePtr>::iterator pangenome::Graph::remove_node(No
 void pangenome::Graph::remove_read(const uint32_t read_id)
 {
     for (const auto& n : reads[read_id]->nodes) {
-        // cout << "looking at read node " << n->node_id;
         auto nSharedPtr = n.lock();
         nSharedPtr->covg -= 1;
         nSharedPtr->reads.erase(reads[read_id]);
@@ -209,11 +204,8 @@ void pangenome::Graph::remove_low_covg_nodes(const uint32_t& thresh)
 {
     BOOST_LOG_TRIVIAL(debug) << "Remove nodes with covg <= " << thresh << std::endl;
     for (auto it = nodes.begin(); it != nodes.end();) {
-        // cout << "look at node " << *(it->second) << endl;
         if (it->second->covg <= thresh) {
-            // cout << "delete node " << it->second->name;
             it = remove_node(it->second);
-            // cout << " so pangraph now has " << nodes.size() << " nodes" << endl;
         } else {
             ++it;
         }
@@ -262,10 +254,7 @@ void pangenome::Graph::split_node_by_reads(std::unordered_set<ReadPtr>& reads_al
 
         // replace the node in the read
         if (it != r->nodes.end()) {
-            // cout << "replace node " << (*it)->node_id << " in this read" << endl;
-            // cout << "read was " << *r << endl;
             r->replace_node_with_iterator(it, n);
-            // cout << "read is now " << *r << endl;
             nodes[node_id]->reads.erase(rit);
             nodes[node_id]->covg -= 1;
             if (nodes[node_id]->covg == 0) {
@@ -273,9 +262,6 @@ void pangenome::Graph::split_node_by_reads(std::unordered_set<ReadPtr>& reads_al
             }
             n->reads.insert(r);
             n->covg += 1;
-            //} else {
-            //    cout  << "read does not contain this node in context of the tig" <<
-            //    endl;
         }
     }
 
@@ -288,34 +274,6 @@ void pangenome::Graph::split_node_by_reads(std::unordered_set<ReadPtr>& reads_al
     }
 }
 
-/*Graph::unordered_set<ReadPtr> find_reads_on_node_path(const vector<uint16_t>
-node_path_ids, const vector<bool> node_path_orients)
-{
-    unordered_set<ReadPtr> reads_on_node_path;
-
-    // collect all reads on nodes
-    for (const auto &i : node_path_ids)
-    {
-        for (const auto &r : nodes[n]->reads)
-        {
-            reads_on_node_path.insert(r);
-        }
-    }
-
-    // remove reads which deviate from path
-    for (auto rit = reads_on_node_path.begin(); rit != reads_on_node_path.end();)
-    {
-        if ((*rit)->find_position(node_path_ids, node_path_orients) ==
-std::numeric_limits<uint>::max())
-        {
-            rit = reads_on_node_path.erase(rit);
-        } else {
-            rit++;
-        }
-    }
-    return reads_on_node_path;
-}*/
-
 // For each node in pangraph, make a copy of the kmergraph and use the hits
 // stored on each read containing the node to add coverage to this graph
 void pangenome::Graph::add_hits_to_kmergraphs(
@@ -467,12 +425,6 @@ bool same_prg_id::operator()(const std::pair<uint32_t, NodePtr>& n) const
 
 bool pangenome::Graph::operator==(const Graph& y) const
 {
-    // false if have different numbers of nodes
-    /*if (y.nodes.size() != nodes.size()) {
-        cout << "different num nodes " << nodes.size() << "!=" << y.nodes.size() <<
-    endl; return false;
-    }*/
-
     // false if have different nodes
     for (const auto& c : nodes) {
         // if node id doesn't exist
@@ -538,9 +490,9 @@ void pangenome::Graph::save_mapped_read_strings(
     // for each node in pangraph, find overlaps and write to a file
     std::vector<std::vector<uint32_t>> read_overlap_coordinates;
     for (const auto& node_ptr : nodes) {
-        std::cout << "Find coordinates for node " << node_ptr.second->name;
+        BOOST_LOG_TRIVIAL(debug)
+            << "Find coordinates for node " << node_ptr.second->name;
         node_ptr.second->get_read_overlap_coordinates(read_overlap_coordinates);
-        std::cout << "." << std::endl;
         fs::create_directories(outdir + "/" + node_ptr.second->get_name());
         outhandle.open(outdir + "/" + node_ptr.second->get_name() + "/"
             + node_ptr.second->get_name() + ".reads.fa");
@@ -571,13 +523,12 @@ void pangenome::Graph::save_mapped_read_strings(
 
 std::ostream& pangenome::operator<<(std::ostream& out, const pangenome::Graph& m)
 {
-    // cout << "printing pangraph" << endl;
     for (const auto& n : m.nodes) {
-        std::cout << n.second->prg_id << std::endl;
-        std::cout << *n.second << std::endl;
+        out << n.second->prg_id << std::endl;
+        out << *n.second << std::endl;
     }
     for (const auto& n : m.reads) {
-        std::cout << *(n.second) << std::endl;
+        out << *(n.second) << std::endl;
     }
 
     return out;
diff --git a/src/pangenome/panread.cpp b/src/pangenome/panread.cpp
index 4dcd54c1..6a87e50e 100644
--- a/src/pangenome/panread.cpp
+++ b/src/pangenome/panread.cpp
@@ -1,7 +1,5 @@
 #include <iostream>
-#include <fstream>
 #include <cassert>
-#include <climits>
 #include <unordered_set>
 #include <set>
 #include <memory>
@@ -110,42 +108,21 @@ std::pair<uint32_t, uint32_t> Read::find_position(
     const std::vector<uint_least32_t>& node_ids, const std::vector<bool>& node_orients,
     const uint16_t min_overlap)
 {
-    /*cout << "searching for ";
-    for (const auto &n : node_ids)
-    {
-        cout << n << " ";
-    }
-    cout << " in ";
-    for (const auto &n : nodes)
-    {
-        cout << n->node_id << " ";
-    }
-    cout << endl;*/
-
     assert(node_ids.size() == node_orients.size());
     assert(not node_ids.empty());
     uint32_t search_pos = 0;
     uint32_t found_pos = 0;
 
-    // cout << "searching forwards for " << node_ids[0] << " " << node_orients[0];
-    // cout << " and backwards for " << node_ids.back() << " " << !node_orients.back()
-    // << endl;
-
     for (uint32_t i = 0; i < nodes.size(); ++i) {
-        // cout << "compare nodes pos " << i << " with node_ids 0" << endl;
         // if first node matches at position i going forwards...
         if (nodes[i].lock()->node_id == node_ids[0]
             and node_orientations[i] == node_orients[0]) {
-            // cout << "start node " << i << " fwd " << nodes[i]->node_id << " " <<
-            // node_orientations[i]; cout << " matches " << node_ids[0] << " and " <<
-            // node_orients[0] << endl;
 
             search_pos = 0;
             found_pos = 0;
             while (i + found_pos < nodes.size()
                 and nodes[i + found_pos].lock()->node_id == node_ids[search_pos]
                 and node_orientations[i + found_pos] == node_orients[search_pos]) {
-                // cout << "fwd " << search_pos << " " << i + found_pos << endl;
                 if (search_pos == node_ids.size() - 1
                     or i + found_pos == nodes.size() - 1) {
                     if (found_pos + 1 >= min_overlap) {
@@ -157,30 +134,20 @@ std::pair<uint32_t, uint32_t> Read::find_position(
                 search_pos++;
                 found_pos++;
             }
-            // cout << "end fwd" << endl;
         }
 
         // if i+node_ids.size() is over the end of nodes, consider partial matches which
         // skip the first <size_overhang> nodes
-        // cout << "compare nodes pos " << 0 << " with node_ids " << i + node_ids.size()
-        // - nodes.size() << endl;
         if (i + node_ids.size() > nodes.size()
             and nodes[0].lock()->node_id == node_ids[i + node_ids.size() - nodes.size()]
             and node_orientations[0]
                 == node_orients[i + node_orients.size() - nodes.size()]) {
 
-            // cout << "start node " << i << " truncated fwd " << nodes[0]->node_id;
-            // cout << " " << node_orientations[0];
-            // cout << " matches " << node_ids[i + node_ids.size() - nodes.size()];
-            // cout << " and " << node_orients[i + node_ids.size() - nodes.size()] <<
-            // endl;
-
             search_pos = i + node_ids.size() - nodes.size();
             found_pos = 0;
             while (found_pos < nodes.size()
                 and nodes[found_pos].lock()->node_id == node_ids[search_pos]
                 and node_orientations[found_pos] == node_orients[search_pos]) {
-                // cout << "fwd " << search_pos << " " << found_pos << endl;
                 if (search_pos == node_ids.size() - 1
                     or found_pos == nodes.size() - 1) {
                     if (found_pos + 1 >= min_overlap) {
@@ -194,14 +161,9 @@ std::pair<uint32_t, uint32_t> Read::find_position(
             }
         }
 
-        // cout << "compare nodes pos " << nodes.size() -1 -i << " with node_ids " << 0
-        // << endl;
         if (nodes[nodes.size() - 1 - i].lock()->node_id == node_ids[0]
             and node_orientations[node_orientations.size() - 1 - i]
                 == !node_orients[0]) {
-            // cout << "start node " << i << " bwd " << nodes[nodes.size() -1
-            // -i]->node_id; cout << " " << node_orientations[nodes.size() -1 -i]; cout
-            // << " matches " << node_ids[0] << " and " << !node_orients[0] << endl;
 
             search_pos = 0;
             found_pos = 0;
@@ -210,8 +172,6 @@ std::pair<uint32_t, uint32_t> Read::find_position(
                     == node_ids[search_pos]
                 and node_orientations[nodes.size() - 1 - i - found_pos]
                     == !node_orients[search_pos]) {
-                // cout << "bwd " << search_pos << " " << nodes.size() -1 -i -found_pos
-                // << endl;
                 if (search_pos == node_ids.size() - 1
                     or i + 1 + found_pos == nodes.size()) {
                     if (found_pos + 1 >= min_overlap) {
@@ -224,29 +184,15 @@ std::pair<uint32_t, uint32_t> Read::find_position(
                 search_pos++;
                 found_pos++;
             }
-            // cout << "end bwd" << endl;
         }
 
         // if we are considering matches which overlap the start backwards, also
         // consider ones which overlap the end backwards by the same amount
-        // cout << "compare nodes pos " << nodes.size() -1 << " with node_ids " <<
-        // nodes.size() -1 - i << endl;
         if (i + node_ids.size() > nodes.size()
-            // and nodes[nodes.size() -1 -i]->node_id == node_ids[i + node_ids.size() -
-            // nodes.size()] and node_orientations[node_orientations.size() -1  -i] ==
-            // !node_orients[i + node_orients.size() - nodes.size()])
             and nodes.back().lock()->node_id
                 == node_ids[i + node_ids.size() - nodes.size()]
             and node_orientations.back()
                 == !node_orients[i + node_orients.size() - nodes.size()]) {
-            // cout << "start node " << i << " truncated bwd " << nodes.back()->node_id;
-            // cout << " " << node_orientations.back();
-            // //cout << " matches " << node_ids[node_ids.size() - nodes.size()-1 - i];
-            // //cout << " and " << !node_orients[node_ids.size() - nodes.size()-1 - i]
-            // << endl;
-            // cout << " matches " << node_ids[i + node_ids.size() - nodes.size()];
-            // cout << " and " << !node_orients[i + node_ids.size() - nodes.size()] <<
-            // endl;
 
             search_pos = i + node_ids.size() - nodes.size();
             found_pos = 0;
@@ -255,7 +201,6 @@ std::pair<uint32_t, uint32_t> Read::find_position(
                     == node_ids[search_pos]
                 and node_orientations[nodes.size() - 1 - found_pos]
                     == !node_orients[search_pos]) {
-                // cout << "bwd " << search_pos << " " << found_pos << endl;
                 if (search_pos == node_ids.size() - 1
                     or i + 1 + found_pos == nodes.size()) {
                     if (found_pos + 1 >= min_overlap) {
@@ -289,7 +234,6 @@ void Read::remove_all_nodes_with_this_id(const uint32_t node_id)
 std::vector<WeakNodePtr>::iterator Read::remove_node_with_iterator(
     std::vector<WeakNodePtr>::iterator nit)
 {
-    //(*nit)->covg -= 1;
     uint32_t d = distance(nodes.begin(), nit);
     node_orientations.erase(node_orientations.begin() + d);
     nit = nodes.erase(nit);
@@ -299,11 +243,8 @@ std::vector<WeakNodePtr>::iterator Read::remove_node_with_iterator(
 void Read::replace_node_with_iterator(
     std::vector<WeakNodePtr>::iterator n_original, NodePtr n)
 {
-    // hits[n->node_id].insert(hits[(*n_original)->node_id].begin(),hits[(*n_original)->node_id].end()
-    // );
     auto it = nodes.erase(n_original);
     nodes.insert(it, n);
-    // hits.erase((*n_original)->node_id);
 }
 
 bool Read::operator==(const Read& y) const
diff --git a/src/prg/path.cpp b/src/prg/path.cpp
index 4d39988a..39f82bb1 100644
--- a/src/prg/path.cpp
+++ b/src/prg/path.cpp
@@ -69,8 +69,7 @@ std::vector<LocalNodePtr> prg::Path::nodes_along_path(const LocalPRG& localPrg)
 prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const
 {
     // function now returns the path starting at position start along the path, rather
-    // than at position start on linear PRG, and for length len cout << "find subpath of
-    // " << *this << " from " << start << " for length " << len << endl;
+    // than at position start on linear PRG, and for length len
     assert(start + len <= length());
     prg::Path p;
     std::deque<Interval> d;
@@ -87,17 +86,12 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const
             p.initialize(d);
             added_len
                 += std::min(len - added_len, interval.length - start + covered_length);
-            /// cout << "added first interval " << p.path.back() << " and added length
-            /// is now " << added_len << endl;
         } else if (covered_length >= start and added_len <= len) {
             p.add_end_interval(Interval(interval.start,
                 std::min(interval.get_end(), interval.start + len - added_len)));
             added_len += std::min(len - added_len, interval.length);
-            // cout << "added interval " << p.path.back() << " and added length is now "
-            // << added_len << endl;
         }
         covered_length += interval.length;
-        // cout << "covered length is now " << covered_length << endl;
         if (added_len >= len) {
             break;
         }
@@ -121,7 +115,6 @@ bool prg::Path::is_branching(const prg::Path& y)
         if (overlap) {
             if (it->start != it2->start) {
                 // had paths which overlapped and now don't
-                // cout << "branch" << endl;
                 return true; // represent branching paths at this point
             }
             ++it2;
@@ -131,17 +124,14 @@ bool prg::Path::is_branching(const prg::Path& y)
             }
         } else {
             for (it2 = y.path.begin(); it2 != y.path.end(); ++it2) {
-                // cout << *it << " " << *it2 << endl;
                 if ((it->get_end() > it2->start and it->start < it2->get_end())
                     or (*it == *it2)) {
                     // then the paths overlap
                     overlap = true;
-                    // cout << "overlap" << endl;
                     // first query the previous intervals if they exist, to see if they
                     // overlap
                     if (it != path.begin() && it2 != y.path.begin()
                         && (--it)->get_end() != (--it2)->get_end()) {
-                        // cout << "coal" << endl;
                         return true; // represent coalescent paths at this point
                     } else {
                         ++it2;
@@ -163,9 +153,6 @@ bool prg::Path::is_subpath(const prg::Path& big_path) const
 {
     if (big_path.length() < length() or big_path.get_start() > get_start()
         or big_path.get_end() < get_end() or is_branching(big_path)) {
-        // cout << "fell at first hurdle " << (big_path.length() < length());
-        // cout << (big_path.start > start) << (big_path.end < end);
-        // cout << (is_branching(big_path)) << endl;
         return false;
     }
 
diff --git a/src/seq.cpp b/src/seq.cpp
index 5331ee4b..646a24e1 100644
--- a/src/seq.cpp
+++ b/src/seq.cpp
@@ -75,7 +75,6 @@ void Seq::add_minimizing_kmers_to_sketch(
     for (const auto& minimizer : window) {
         if (minimizer.canonical_kmer_hash == smallest) {
             sketch.insert(minimizer);
-            // num_minis_found += 1;
         }
     }
 }
@@ -138,7 +137,6 @@ void Seq::minimizer_sketch(const uint32_t w, const uint32_t k)
                           "still has size "
                 << window.size()));
     }
-    // cout << now() << "Sketch size " << sketch.size() << " for read " << name << endl;
 }
 
 std::ostream& operator<<(std::ostream& out, Seq const& data)
diff --git a/src/utils.cpp b/src/utils.cpp
index 206cf71c..4c12119d 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,13 +1,10 @@
 #include <cstring>
-#include <sstream>
 #include <iomanip>
 #include <unordered_map>
 #include <vector>
 #include <iostream>
-#include <fstream>
 #include <cmath>
 #include <cassert>
-#include <cctype>
 #include <set>
 #include <memory>
 #include <ctime>
@@ -18,7 +15,6 @@
 #include "seq.h"
 #include "localPRG.h"
 #include "pangenome/pangraph.h"
-#include "pangenome/panread.h"
 #include "noise_filtering.h"
 #include "minihit.h"
 #include "fastaq_handler.h"
@@ -76,7 +72,6 @@ char complement(char n)
     case 'c':
         return 'G';
     }
-    // assert(false);
     return 'N';
 }
 
@@ -134,7 +129,7 @@ void read_prg_file(std::vector<std::shared_ptr<LocalPRG>>& prgs,
             prgs.push_back(s);
             id++;
         } else {
-            std::cerr << "Failed to make LocalPRG for " << fh.name << std::endl;
+            BOOST_LOG_TRIVIAL(error) << "Failed to make LocalPRG for " << fh.name;
             exit(1);
         }
     }
@@ -151,12 +146,10 @@ void load_PRG_kmergraphs(std::vector<std::shared_ptr<LocalPRG>>& prgs,
         prefix += prgfile.substr(0, pos);
         prefix += "/";
     }
-    // cout << "prefix for kmerprgs dir is " << prefix << endl;
 
     auto dir_num = 0;
     std::string dir;
     for (const auto& prg : prgs) {
-        // cout << "Load kmergraph for " << prg->name << endl;
         if (prg->id % 4000 == 0) {
             dir = prefix + "kmer_prgs/" + int_to_string(dir_num + 1);
             dir_num++;
@@ -189,9 +182,6 @@ void load_vcf_refs_file(const std::string& filepath, VCFRefs& vcf_refs)
 void add_read_hits(const Seq& sequence,
     const std::shared_ptr<MinimizerHits>& minimizer_hits, const Index& index)
 {
-    // cout << now() << "Search for hits for read " << s->name << " which has sketch
-    // size " << s->sketch.size() << " against index of size " << idx->minhash.size() <<
-    // endl; uint32_t hit_count = 0;
     // creates Seq object for the read, then looks up minimizers in the Seq sketch and
     // adds hits to a global MinimizerHits object
     // Seq s(id, name, seq, w, k);
@@ -202,14 +192,9 @@ void add_read_hits(const Seq& sequence,
             // yes, add all hits of this minimizer hit to this kmer
             for (const MiniRecord& miniRecord : *(minhashIt->second)) {
                 minimizer_hits->add_hit(sequence.id, *sequenceSketchIt, miniRecord);
-                //++hit_count;
             }
         }
     }
-    // hits->sort();
-    // cout << now() << "Found " << hit_count << " hits found for read " << s->name << "
-    // so size of MinimizerHits is now "
-    //     << hits->hits.size() + hits->uhits.size() << endl;
 }
 
 void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hits,
@@ -218,7 +203,7 @@ void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
     const float& fraction_kmers_required_for_cluster, const uint32_t min_cluster_size,
     const uint32_t expected_number_kmers_in_short_read_sketch)
 {
-    BOOST_LOG_TRIVIAL(debug) << "Define clusters of hits from the "
+    BOOST_LOG_TRIVIAL(trace) << "Define clusters of hits from the "
                              << minimizer_hits->hits.size() << " hits";
 
     if (minimizer_hits->hits.empty()) {
@@ -245,7 +230,7 @@ void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
                       prgs[(*mh_previous)->get_prg_id()]->kmer_prg.min_path_length(),
                       expected_number_kmers_in_short_read_sketch)
                 * fraction_kmers_required_for_cluster;
-            BOOST_LOG_TRIVIAL(debug)
+            BOOST_LOG_TRIVIAL(trace)
                 << "Length based cluster threshold min("
                 << prgs[(*mh_previous)->get_prg_id()]->kmer_prg.min_path_length()
                 << ", " << expected_number_kmers_in_short_read_sketch << ") * "
@@ -255,9 +240,8 @@ void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
             if (current_cluster.size()
                 > std::max(length_based_threshold, min_cluster_size)) {
                 clusters_of_hits.insert(current_cluster);
-                // cout << "Found cluster of size " << current_cluster.size() << endl;
             } else {
-                BOOST_LOG_TRIVIAL(debug)
+                BOOST_LOG_TRIVIAL(trace)
                     << "Rejected cluster of size " << current_cluster.size()
                     << " < max(" << length_based_threshold << ", " << min_cluster_size
                     << ")";
@@ -271,7 +255,7 @@ void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
         = std::min(prgs[(*mh_previous)->get_prg_id()]->kmer_prg.min_path_length(),
               expected_number_kmers_in_short_read_sketch)
         * fraction_kmers_required_for_cluster;
-    BOOST_LOG_TRIVIAL(debug)
+    BOOST_LOG_TRIVIAL(trace)
         << "Length based cluster threshold min("
         << prgs[(*mh_previous)->get_prg_id()]->kmer_prg.min_path_length() << ", "
         << expected_number_kmers_in_short_read_sketch << ") * "
@@ -279,39 +263,27 @@ void define_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
     if (current_cluster.size() > std::max(length_based_threshold, min_cluster_size)) {
         clusters_of_hits.insert(current_cluster);
     } else {
-        BOOST_LOG_TRIVIAL(debug)
+        BOOST_LOG_TRIVIAL(trace)
             << "Rejected cluster of size " << current_cluster.size() << " < max("
             << length_based_threshold << ", " << min_cluster_size << ")";
     }
 
-    BOOST_LOG_TRIVIAL(debug) << "Found " << clusters_of_hits.size()
+    BOOST_LOG_TRIVIAL(trace) << "Found " << clusters_of_hits.size()
                              << " clusters of hits";
 }
 
 void filter_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hits)
 {
     // Next order clusters, choose between those that overlap by too much
-    BOOST_LOG_TRIVIAL(debug) << "Filter the " << clusters_of_hits.size()
+    BOOST_LOG_TRIVIAL(trace) << "Filter the " << clusters_of_hits.size()
                              << " clusters of hits";
     if (clusters_of_hits.empty()) {
         return;
     }
     // to do this consider pairs of clusters in turn
     auto c_previous = clusters_of_hits.begin();
-    /*cout << "first cluster" << endl;
-    for (set<MinimizerHit*, pComp>::iterator p=c_previous->begin();
-    p!=c_previous->end(); ++p)
-    {
-        cout << **p << endl;
-    }*/
     for (auto c_current = ++clusters_of_hits.begin();
          c_current != clusters_of_hits.end(); ++c_current) {
-        /*cout << "current cluster" << endl;
-        for (set<MinimizerHit*, pComp>::iterator p=c_current->begin();
-        p!=c_current->end(); ++p)
-        {
-            cout << **p << endl;
-        }*/
         if (((*(*c_current).begin())->get_read_id()
                 == (*(*c_previous).begin())->get_read_id())
             && // if on same read and either
@@ -330,16 +302,14 @@ void filter_clusters(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hit
         {
             if (c_previous->size() >= c_current->size()) {
                 clusters_of_hits.erase(c_current);
-                // cout << "erase current" << endl;
                 c_current = c_previous;
             } else {
                 clusters_of_hits.erase(c_previous);
-                // cout << "erase previous" << endl;
             }
         }
         c_previous = c_current;
     }
-    BOOST_LOG_TRIVIAL(debug) << "Now have " << clusters_of_hits.size()
+    BOOST_LOG_TRIVIAL(trace) << "Now have " << clusters_of_hits.size()
                              << " clusters of hits";
 }
 
@@ -348,7 +318,7 @@ void filter_clusters2(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hi
 {
     // Sort clusters by size, and filter out those small clusters which are entirely
     // contained in bigger clusters on reads
-    BOOST_LOG_TRIVIAL(debug) << "Filter2 the " << clusters_of_hits.size()
+    BOOST_LOG_TRIVIAL(trace) << "Filter2 the " << clusters_of_hits.size()
                              << " clusters of hits";
     if (clusters_of_hits.empty()) {
         return;
@@ -359,25 +329,18 @@ void filter_clusters2(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hi
 
     auto it = clusters_by_size.begin();
     std::vector<int> read_v(genome_size, 0);
-    // cout << "fill from " << (*(it->begin()))->read_start_position() << " to " <<
-    // (*--(it->end()))->read_start_position() << endl;
     fill(read_v.begin() + (*(it->begin()))->get_read_start_position(),
         read_v.begin() + (*--(it->end()))->get_read_start_position(), 1);
     bool contained;
     for (auto it_next = ++clusters_by_size.begin(); it_next != clusters_by_size.end();
          ++it_next) {
-        // cout << "read id " << (*(it_next->begin()))->get_prg_id() << endl;
         if ((*(it_next->begin()))->get_read_id() == (*(it->begin()))->get_read_id()) {
             // check if have any 0s in interval of read_v between first and last
             contained = true;
             for (uint32_t i = (*(it_next->begin()))->get_read_start_position();
                  i < (*--(it_next->end()))->get_read_start_position(); ++i) {
-                // cout << i << ":" << read_v[i] << "\t";
                 if (read_v[i] == 0) {
                     contained = false;
-                    // cout << "found unique element at read position " << i << endl;
-                    // cout << "fill from " << i << " to " <<
-                    // (*--(it_next->end()))->get_read_start_position() << endl;
                     fill(read_v.begin() + i,
                         read_v.begin()
                             + (*--(it_next->end()))->get_read_start_position(),
@@ -385,20 +348,16 @@ void filter_clusters2(std::set<MinimizerHitCluster, clusterComp>& clusters_of_hi
                     break;
                 }
             }
-            // cout << endl;
             if (contained) {
-                // cout << "erase cluster so clusters_of_hits has size decrease from "
-                // << clusters_of_hits.size();
                 clusters_of_hits.erase(*it_next);
-                // cout << " to " << clusters_of_hits.size() << endl;
             }
         } else {
-            // cout << "consider new read" << endl;
+            // consider new read
             fill(read_v.begin(), read_v.end(), 0);
         }
         ++it;
     }
-    BOOST_LOG_TRIVIAL(debug) << "Now have " << clusters_of_hits.size()
+    BOOST_LOG_TRIVIAL(trace) << "Now have " << clusters_of_hits.size()
                              << " clusters of hits";
 }
 
@@ -407,7 +366,7 @@ void add_clusters_to_pangraph(
     std::shared_ptr<pangenome::Graph> pangraph,
     const std::vector<std::shared_ptr<LocalPRG>>& prgs)
 {
-    BOOST_LOG_TRIVIAL(debug) << "Add inferred order to PanGraph";
+    BOOST_LOG_TRIVIAL(trace) << "Add inferred order to PanGraph";
     if (clusters_of_hits.empty()) {
         return;
     }
@@ -645,6 +604,7 @@ uint32_t strtogs(const char* str)
     return (uint32_t)(x + .499);
 }
 
-std::string transform_cli_gsize(std::string str) {
+std::string transform_cli_gsize(std::string str)
+{
     return int_to_string(strtogs(str.c_str()));
 }
diff --git a/src/vcf.cpp b/src/vcf.cpp
index 9b2f0a8a..fdd858f8 100644
--- a/src/vcf.cpp
+++ b/src/vcf.cpp
@@ -78,7 +78,6 @@ ptrdiff_t VCF::get_sample_index(const std::string& name)
     // if this sample has not been added before, add a column for it
     auto sample_it = find(samples.begin(), samples.end(), name);
     if (sample_it == samples.end()) {
-        // cout << "this is the first time this sample has been added" << endl;
         samples.push_back(name);
         for (auto& record_ptr : records) {
             record_ptr->add_new_samples(1);
@@ -95,14 +94,10 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it(
     const std::string& sample_name, const std::string& chrom, const uint32_t pos,
     const std::string& ref, const std::string& alt)
 {
-    // cout << "adding gt " << chrom << " " << pos << " " << ref << " vs " <<
-    // sample_name << " " << alt << endl;
     if (ref == "" and alt == "") {
         return;
     }
 
-    // cout << "adding gt " << ref << " vs " << alt << endl;
-
     ptrdiff_t sample_index = get_sample_index(sample_name);
 
     VCFRecord vcf_record(this, chrom, pos, ref, alt);
@@ -112,14 +107,11 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it(
         vcf_record); // TODO: improve this search to log(n) using alt map or sth
     bool vcf_record_was_found = vcf_record_iterator != records.end();
     if (vcf_record_was_found) {
-        // cout << "found record with this ref and alt" << endl;
         (*vcf_record_iterator)
             ->sampleIndex_to_sampleInfo[sample_index]
             .set_gt_from_max_likelihood_path(1);
         vcf_record_pointer = vcf_record_iterator->get();
     } else {
-        // cout << "didn't find alt record for pos " << pos << " ref " << ref << " and
-        // alt " << alt << endl;
         // either we have the ref allele, an alternative allele for alt too nested site,
         // or alt mistake
         bool vcf_record_was_processed = false;
@@ -137,7 +129,6 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it(
                 }
             }
         } else {
-            // cout << "have new allele not in graph" << endl;
             add_record(
                 chrom, pos, ref, alt, "SVTYPE=COMPLEX", "GRAPHTYPE=TOO_MANY_ALTS");
             records.back()
@@ -174,8 +165,6 @@ void VCF::update_other_samples_of_this_record(VCFRecord* reference_record)
                     and other_record->sampleIndex_to_sampleInfo[sample_index]
                             .get_gt_from_max_likelihood_path()
                         == 0) {
-                    // cout << "update my record to have ref allele also for sample " <<
-                    // sample_index << endl; cout << records[record_index] << endl;
                     reference_record->sampleIndex_to_sampleInfo[sample_index]
                         .set_gt_from_max_likelihood_path(0);
                 }
@@ -195,7 +184,6 @@ void VCF::set_sample_gt_to_ref_allele_for_records_in_the_interval(
         if (record->ref_allele_is_inside_given_interval(chrom, pos_from, pos_to)) {
             record->sampleIndex_to_sampleInfo[sample_index]
                 .set_gt_from_max_likelihood_path(0);
-            // cout << "update record " << records[i] << endl;
         }
     }
 }
@@ -316,13 +304,6 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length)
                         .empty()) {
                     assert_msg("VCF::merge_multi_allelic: vcf_record_to_be_merged_in "
                                "has no samples");
-                    /*
-                    records.at(current_vcf_record_position)->clear();
-                    records.at(previous_vcf_record_position)->clear();
-                    merged_VCF.add_record_core(vcf_record_merged);
-                    previous_vcf_record_position = records.size() - 1;
-                    vcf_record_merged = *(records.at(previous_vcf_record_position));
-                     */
                 }
                 vcf_record_merged->merge_record_into_this(
                     *vcf_record_to_be_merged_in_pointer);

From 23328483e1f31488cc71d32c27b16da0bce3d0cd Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Thu, 29 Oct 2020 14:20:39 +1000
Subject: [PATCH 6/9] update readme and type name for discover-k

---
 README.md                              | 5 +++--
 src/denovo_discovery/discover_main.cpp | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e9115ac8..d0f02230 100644
--- a/README.md
+++ b/README.md
@@ -261,7 +261,7 @@ This will look for regions in the pangraph where the reads do not map and attemp
 ```
 $ pandora discover --help
 Quasi-map reads to an indexed PRG, infer the sequence of present loci in the sample and discover novel variants.
-Usage: ./pandora discover [OPTIONS] <TARGET> <QUERY>
+Usage: pandora discover [OPTIONS] <TARGET> <QUERY>
 
 Positionals:
   <TARGET> FILE [required]    An indexed PRG file (in fasta format)
@@ -269,7 +269,7 @@ Positionals:
 
 Options:
   -h,--help                   Print this help message and exit
-  --discover-k INT            K-mer size to use when discovering novel variants [default: 11]
+  --discover-k INT:[0-32)     K-mer size to use when discovering novel variants [default: 11]
   --max-ins INT               Max. insertion size for novel variants. Warning: setting too long may impair performance [default: 15]
   --covg-threshold INT        Positions with coverage less than this will be tagged for variant discovery [default: 3]
   -l INT                      Min. length of consecutive positions below coverage threshold to trigger variant discovery [default: 1]
@@ -303,6 +303,7 @@ Preset:
 
 Filtering:
   --clean                     Add a step to clean and detangle the pangraph
+  --clean-dbg                 Clean the local assembly de Bruijn graph
   --max-covg INT              Maximum coverage of reads to accept [default: 600]
 
 Consensus/Variant Calling:
diff --git a/src/denovo_discovery/discover_main.cpp b/src/denovo_discovery/discover_main.cpp
index f2c6788a..52976ef6 100644
--- a/src/denovo_discovery/discover_main.cpp
+++ b/src/denovo_discovery/discover_main.cpp
@@ -112,7 +112,8 @@ void setup_discover_subcommand(CLI::App& app)
         ->add_option("--discover-k", opt->denovo_kmer_size,
             "K-mer size to use when discovering novel variants")
         ->capture_default_str()
-        ->check(CLI::Range(0, MAX_DENOVO_K))
+        ->check(CLI::Range(0, MAX_DENOVO_K)
+                    .description("[0-" + std::to_string(MAX_DENOVO_K) + ")"))
         ->type_name("INT");
 
     std::string description = "Max. insertion size for novel variants. Warning: "

From 8988378250f05f8e0edd19a772cf316e0931c3ac Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Thu, 29 Oct 2020 16:17:20 +1000
Subject: [PATCH 7/9] add build and test stage to travis

change boost install method

remove travis_retry

change boost build dir

add boost root to cmake

WIP: trying to get cmake building

WIP: dont rely on previous CMAKE_OPTIONS variable

WIP: add missing boost variables to script

WIP: fiddling with boost root

WIP: comment out cache

WIP: try and make boost install simpler

WIP: trying to get pandora to include boost

WIP: try to install boost in /usr

WIP: add sudo

WIP: fix ctest verbose flag

WIP: try and cache boost

WIP: format readme and update travis badges

WIP: remove caching of /usr dirs
---
 .travis.yml   |  41 +++++++++-----
 README.md     | 149 +++++++++++++++++++++++++++++++++++++++-----------
 ci/install.sh |  17 ++++++
 ci/script.sh  |  12 ++++
 4 files changed, 172 insertions(+), 47 deletions(-)
 create mode 100644 ci/install.sh
 create mode 100644 ci/script.sh

diff --git a/.travis.yml b/.travis.yml
index 20a626ca..00b7e9b3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,20 @@ language: cpp
 compiler: gcc
 os: linux
 dist: bionic
+env:
+  - OMP_NUM_THREADS=4  # https://docs.travis-ci.com/user/languages/cpp/#openmp-projects
+
+git:
+  depth: 3  # https://docs.travis-ci.com/user/customizing-the-build/#git-clone-depth
+
+cache:
+  directories:
+    - ${TRAVIS_BUILD_DIR}/deps/boost-1.62.0
+
+branches:
+  only: # build all branches https://docs.travis-ci.com/user/customizing-the-build/#safelisting-or-blocklisting-branches
+    - gh-pages
+    - /.*/
 
 include:
   - os: linux
@@ -9,28 +23,27 @@ include:
       apt:
         packages:
           - clang-format-8
+          - zlib1g-dev
 
 jobs:
+  fast_finish: true
   include:
-    - stage: "Lint"                # naming the Tests stage
-      name: "Format"            # names the first Tests stage job
+    - stage: "Format"
       script:
         - find src/ include/ test/ -type f \( -iname \*.h -o -iname \*.cpp \) | xargs -I _ clang-format -style=file -output-replacements-xml _ | grep -c "<replacement " >/dev/null
         - if [ $? -ne 1 ]; then echo "Not all source and header files are formatted with clang-format"; exit 1; fi
-    - stage: "Build"
-      script: echo "This is where we build Pandora"
-    - stage: "Test"
-      name: "Unit tests"
-      script: echo "This is where we run the unit tests"
-    - stage: "Deploy"
-      name: "DockerHub"
-      script: echo "This is where we deploy a container to Docker Hub"
+    - stage: "Build and Test"
+      env:
+        - BOOST_VERSION="1.62.0"
+        - BOOST_LIBS="system,filesystem,iostreams,log,thread,date_time"
+        - BUILD_TYPE="Debug"
+        - DEPS_DIR="${TRAVIS_BUILD_DIR}/deps"
+      install: bash ci/install.sh
+      script: bash ci/script.sh
 
 stages:
-  - "Lint"
-  - "Build"
-  - "Test"
-  - "Deploy"
+  - "Format"
+  - "Build and Test"
 
 notifications:
   email: false
diff --git a/README.md b/README.md
index d0f02230..0722b151 100644
--- a/README.md
+++ b/README.md
@@ -1,49 +1,89 @@
-[![Build Status](https://travis-ci.org/rmcolq/pandora.svg?branch=master)](https://travis-ci.org/rmcolq/pandora) master
+| Branch             | Status                                                                           |
+|:-------------------|:---------------------------------------------------------------------------------|
+| [`master`][master] | ![Travis (.com) branch](https://img.shields.io/travis/com/rmcolq/pandora/master) |
+| [`dev`][dev]       | ![Travis (.com) branch](https://img.shields.io/travis/com/rmcolq/pandora/dev)    |
 
-[![Build Status](https://travis-ci.com/rmcolq/pandora.svg?token=mxzxNwUzHrkcpsL2i7zU&branch=dev)](https://travis-ci.com/rmcolq/pandora) dev
+[master]: https://github.com/rmcolq/pandora/tree/master
+[dev]: https://github.com/rmcolq/pandora/tree/dev
 
-![Docker Cloud Build Status](https://img.shields.io/docker/cloud/build/rmcolq/pandora)
 
 # Pandora
 
-## Contents
-* [Introduction](#introduction)
-* [Quick Start](#quick-start)
-* [Installation](#installation)
-* [Usage](#usage)
+[TOC]: #
+
+# Table of Contents
+- [Introduction](#introduction)
+- [Quick Start](#quick-start)
+- [Installation](#installation)
+  - [Containers](#containers)
+  - [Installation from source](#installation-from-source)
+- [Usage](#usage)
+  - [Population Reference Graphs](#population-reference-graphs)
+  - [Build index](#build-index)
+  - [Map reads to index](#map-reads-to-index)
+  - [Compare reads from several samples](#compare-reads-from-several-samples)
+  - [Discover novel variants](#discover-novel-variants)
+
 
 ## Introduction
-Pandora is a tool for bacterial genome analysis using a pangenome reference graph (PanRG). It allows gene presence/absence detection and genotyping of SNPs, indels and longer variants in one or a number of samples. Pandora works with Illumina or Nanopore data. Core ideas behind the method are:
- - new genomes look like recombinants (plus mutations) of things seen before
- - we should be analysing nucleotide-level variation everywhere, not just in core genes
- - arbitrary single reference genomes are unnatural and limit comparisons of diverse sets of genomes
 
-The pangenome reference graph (PanRG) is a collection of 'floating' local graphs, each representing some orthologous region of interest (e.g. genes, mobile elements or intergenic regions). See https://github.com/rmcolq/make_prg for a pipeline which can construct these PRGs from a set of aligned sequence files.
+Pandora is a tool for bacterial genome analysis using a pangenome
+reference graph (PanRG). It allows gene presence/absence detection and
+genotyping of SNPs, indels and longer variants in one or a number of
+samples. Pandora works with Illumina or Nanopore data. Core ideas behind
+the method are:
+- new genomes look like recombinants (plus mutations) of things seen
+  before
+- we should be analysing nucleotide-level variation everywhere, not just
+  in core genes
+- arbitrary single reference genomes are unnatural and limit comparisons
+  of diverse sets of genomes
+
+The pangenome reference graph (PanRG) is a collection of 'floating'
+local graphs, each representing some orthologous region of interest
+(e.g. genes, mobile elements or intergenic regions). See
+https://github.com/rmcolq/make_prg for a pipeline which can construct
+these PRGs from a set of aligned sequence files.
 
 Pandora can do the following for a single sample (read dataset):
-- Output inferred mosaic of reference sequences for loci (eg genes) from the PanRG which are present
-- Output a VCF showing the variation found within these loci, with respect to any reference path in the PRG.
+- Output inferred mosaic of reference sequences for loci (eg genes) from
+  the PanRG which are present
+- Output a VCF showing the variation found within these loci, with
+  respect to any reference path in the PRG.
 
 Soon, in a galaxy not so far away, it will allow:
 - discovery of new variation not in the PRG
 
 For a collection of samples, it can:
-- Output a matrix showing inferred copy-number of each locus in each sample genome
-- Output a multisample pangenome VCF showing how including genotype calls for each sample in each of the loci
-- Output one VCF per orthologous-chunk, showing how samples which contained this chunk differed in their gene sequence. Variation is shown with respect to the most informative recombinant path in the PRG.
+- Output a matrix showing inferred copy-number of each locus in each
+  sample genome
+- Output a multisample pangenome VCF showing how including genotype
+  calls for each sample in each of the loci
+- Output one VCF per orthologous-chunk, showing how samples which
+  contained this chunk differed in their gene sequence. Variation is
+  shown with respect to the most informative recombinant path in the
+  PRG.
 
 Warning - this code is still in development.
 
 ## Quick Start
+
 Index PanRG file:
+
 ```
 pandora index -t 8 <panrg.fa>
 ```
-Compare first 30X of each Illumina sample to get pangenome matrix and VCF
+
+Compare first 30X of each Illumina sample to get pangenome matrix and
+VCF
+
 ```
 pandora compare --genotype --illumina --max-covg 30 <panrg.fa> <read_index.tab>
 ```
-Map Nanopore reads from a single sample to get approximate sequence for genes present
+
+Map Nanopore reads from a single sample to get approximate sequence for
+genes present
+
 ```
 pandora map <panrg.fa> <reads.fq>
 ```
@@ -51,21 +91,34 @@ pandora map <panrg.fa> <reads.fq>
 ## Installation
 
 ### Containers
-We highly recommend that you download a containerized image of Pandora. Pandora is hosted on Dockerhub and images can be downloaded with the command:
+
+![Docker Cloud Build Status](https://img.shields.io/docker/cloud/build/rmcolq/pandora)
+
+We highly recommend that you download a containerized image of Pandora.
+Pandora is hosted on Dockerhub and images can be downloaded with the
+command:
+
 ```
 docker pull rmcolq/pandora:latest
 ```
+
 Alternatively, using singularity:
+
 ```
 singularity pull docker://rmcolq/pandora:latest
 ```
+
 NB For consistency, we no longer maintain images on singularity hub.
 
 ### Installation from source
-This is not recommended because the required zlib and boost system installs do not always play nicely.
-If you want to take the risk:
+
+This is not recommended because the required zlib and boost system
+installs do not always play nicely. If you want to take the risk:
 - Requires a Unix or Mac OS.
-- Requires a system install of `zlib`. If this is not already installed, [this](https://geeksww.com/tutorials/libraries/zlib/installation/installing_zlib_on_ubuntu_linux.php) tutorial is helpful or try the following.
+- Requires a system install of `zlib`. If this is not already installed,
+  [this](https://geeksww.com/tutorials/libraries/zlib/installation/installing_zlib_on_ubuntu_linux.php)
+  tutorial is helpful or try the following.
+
 ```
 wget http://www.zlib.net/zlib-1.2.11.tar.gz -O - | tar xzf -
 cd zlib-1.2.11
@@ -73,14 +126,23 @@ cd zlib-1.2.11
 make
 make install
 ```
-- Requires a system installation of `boost` containing the `system`, `filesystem`, `log` (which also depends on `thread` and `date_time`) and `iostreams` libraries. If not already installed use the following or look at [this](https://www.boost.org/doc/libs/1_62_0/more/getting_started/unix-variants.html) guide.
+
+- Requires a system installation of `boost` containing the `system`,
+  `filesystem`, `log` (which also depends on `thread` and `date_time`)
+  and `iostreams` libraries. If not already installed use the following
+  or look at
+  [this](https://www.boost.org/doc/libs/1_62_0/more/getting_started/unix-variants.html)
+  guide.
+
 ```
 wget https://sourceforge.net/projects/boost/files/boost/1.62.0/boost_1_62_0.tar.gz -O - | tar xzf -
 cd boost_1_62_0
 ./bootstrap.sh [--prefix=/prefix/path] --with-libraries=system,filesystem,iostreams,log,thread,date_time
 ./b2 install
 ```
+
 - Download and install `pandora` as follows:
+
 ```
 git clone --single-branch https://github.com/rmcolq/pandora.git --recursive
 cd pandora
@@ -113,11 +175,20 @@ Subcommands:
 ```
 
 ### Population Reference Graphs
-Pandora assumes you have already constructed a fasta-like file of graphs, one entry for each gene/ genome region of interest.
-If you haven't, you will need a multiple sequence alignment for each graph. Precompiled collections of MSA representing othologous gene clusters for a number of species can be downloaded from [here](http://pangenome.de/) and converted to graphs using the pipeline from [here](https://github.com/rmcolq/make_prg).
+
+Pandora assumes you have already constructed a fasta-like file of
+graphs, one entry for each gene/ genome region of interest. If you
+haven't, you will need a multiple sequence alignment for each graph.
+Precompiled collections of MSA representing othologous gene clusters for
+a number of species can be downloaded from [here](http://pangenome.de/)
+and converted to graphs using the pipeline from
+[here](https://github.com/rmcolq/make_prg).
 
 ### Build index
-Takes a fasta-like file of PanRG sequences and constructs an index, and a directory of gfa files to be used by `pandora map` or `pandora compare`. These are output in the same directory as the PanRG file.
+
+Takes a fasta-like file of PanRG sequences and constructs an index, and
+a directory of gfa files to be used by `pandora map` or `pandora
+compare`. These are output in the same directory as the PanRG file.
 
 ```
 $ pandora index --help
@@ -136,10 +207,15 @@ Options:
   -v                          Verbosity of logging. Repeat for increased verbosity
 ```
 
-The index stores (w,k)-minimizers for each PanRG path found. These parameters can be specified, but default to w=14, k=15.
+The index stores (w,k)-minimizers for each PanRG path found. These
+parameters can be specified, but default to w=14, k=15.
 
 ### Map reads to index
-This takes a fasta/q of Nanopore or Illumina reads and compares to the index. It infers which of the PanRG genes/elements is present, and for those that are present it outputs the inferred sequence and a genotyped VCF.
+
+This takes a fasta/q of Nanopore or Illumina reads and compares to the
+index. It infers which of the PanRG genes/elements is present, and for
+those that are present it outputs the inferred sequence and a genotyped
+VCF.
 
 ```
 $ pandora map --help
@@ -199,7 +275,12 @@ Genotyping:
 ```
 
 ### Compare reads from several samples
-This takes Nanopore or Illumina read fasta/q for a number of samples, mapping each to the index. It infers which of the PanRG genes/elements is present in each sample, and outputs a presence/absence pangenome matrix, the inferred sequences for each sample and a genotyped multisample pangenome VCF.
+
+This takes Nanopore or Illumina read fasta/q for a number of samples,
+mapping each to the index. It infers which of the PanRG genes/elements
+is present in each sample, and outputs a presence/absence pangenome
+matrix, the inferred sequences for each sample and a genotyped
+multisample pangenome VCF.
 
 ```
 $ pandora compare --help
@@ -256,7 +337,8 @@ Genotyping:
 
 ### Discover novel variants
 
-This will look for regions in the pangraph where the reads do not map and attempt to locally assemble these regions to find novel variants.
+This will look for regions in the pangraph where the reads do not map
+and attempt to locally assemble these regions to find novel variants.
 
 ```
 $ pandora discover --help
@@ -308,4 +390,5 @@ Filtering:
 
 Consensus/Variant Calling:
   --kmer-avg INT              Maximum number of kmers to average over when selecting the maximum likelihood path [default: 100]
-```
\ No newline at end of file
+```
+
diff --git a/ci/install.sh b/ci/install.sh
new file mode 100644
index 00000000..f473a7bf
--- /dev/null
+++ b/ci/install.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -evu
+
+BOOST_URL="http://sourceforge.net/projects/boost/files/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
+BOOST_DIR="${DEPS_DIR}/boost-${BOOST_VERSION}"
+BOOST_ROOT="/usr"
+export BOOST_DIR
+export BOOST_ROOT
+
+# we cache this boost directory so we don't build it every time
+if [[ -z "$(ls -A "${BOOST_DIR}")" ]]; then
+  mkdir -p "${BOOST_DIR}"
+  { wget --quiet -O - "${BOOST_URL}" | tar --strip-components=1 -xz -C "${BOOST_DIR}"; } || exit 1
+fi
+
+cd "$BOOST_DIR" || exit 1
+{ sudo ./bootstrap.sh --with-libraries="$BOOST_LIBS" --prefix="$BOOST_ROOT" && sudo ./b2 install; } || exit 1
diff --git a/ci/script.sh b/ci/script.sh
new file mode 100644
index 00000000..f2ca0134
--- /dev/null
+++ b/ci/script.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -evu
+
+export CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE "
+
+echo "$CMAKE_OPTIONS"
+mkdir build
+cd build || exit 1
+{ cmake "$CMAKE_OPTIONS" ..  && make -j4; } || exit 1
+export PATH="${PWD}:${PATH}"
+./pandora --help
+ctest -V || exit 1

From abc2508e050f4e72ab98c5f0652c237f47052170 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Fri, 30 Oct 2020 16:58:22 +1000
Subject: [PATCH 8/9] change default merge_dist to 15

---
 README.md                                | 2 +-
 include/denovo_discovery/discover_main.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0722b151..44e0bf79 100644
--- a/README.md
+++ b/README.md
@@ -357,7 +357,7 @@ Options:
   -l INT                      Min. length of consecutive positions below coverage threshold to trigger variant discovery [default: 1]
   -L INT                      Max. length of consecutive positions below coverage threshold to trigger variant discovery [default: 50]
   -P,--pad INT                Padding either side of candidate variant intervals [default: 22]
-  -d,--merge INT              Merge candidate variant intervals within distance [default: 22]
+  -d,--merge INT              Merge candidate variant intervals within distance [default: 15]
   --min-dbg-dp INT            Minimum node/kmer depth in the de Bruijn graph used for discovering variants [default: 2]
   -v                          Verbosity of logging. Repeat for increased verbosity
 
diff --git a/include/denovo_discovery/discover_main.h b/include/denovo_discovery/discover_main.h
index e797a069..614f912c 100644
--- a/include/denovo_discovery/discover_main.h
+++ b/include/denovo_discovery/discover_main.h
@@ -41,7 +41,7 @@ struct DiscoverOptions {
     uint32_t min_candidate_len { 1 };
     uint32_t max_candidate_len { 50 };
     uint16_t candidate_padding { 22 };
-    uint32_t merge_dist { 22 };
+    uint32_t merge_dist { 15 };
     uint32_t min_cluster_size { 10 };
     uint32_t max_num_kmers_to_avg { 100 };
     bool clean_dbg { false };

From f39d8c51e2b6ef3237d3beab801a03e995431a61 Mon Sep 17 00:00:00 2001
From: Michael Hall <michael@mbh.sh>
Date: Mon, 2 Nov 2020 15:46:20 +1000
Subject: [PATCH 9/9] make clean asm graph member variable const

---
 include/denovo_discovery/denovo_discovery.h     | 2 +-
 test/denovo_discovery/denovo_discovery_test.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/denovo_discovery/denovo_discovery.h b/include/denovo_discovery/denovo_discovery.h
index fbaf49b1..8c8431a0 100644
--- a/include/denovo_discovery/denovo_discovery.h
+++ b/include/denovo_discovery/denovo_discovery.h
@@ -20,7 +20,7 @@ class DenovoDiscovery {
 public:
     const uint16_t max_insertion_size;
     const uint16_t min_covg_for_node_in_assembly_graph;
-    bool clean_assembly_graph;
+    const bool clean_assembly_graph;
 
     DenovoDiscovery(const uint16_t& kmer_size, const double& read_error_rate,
         uint16_t max_insertion_size = 15,
diff --git a/test/denovo_discovery/denovo_discovery_test.cpp b/test/denovo_discovery/denovo_discovery_test.cpp
index 462e9901..661ecb79 100644
--- a/test/denovo_discovery/denovo_discovery_test.cpp
+++ b/test/denovo_discovery/denovo_discovery_test.cpp
@@ -211,10 +211,10 @@ TEST(FindPathsThroughCandidateRegionTest, endKmerExistsInStartKmersFindPathAndCy
 TEST(FindPathsThroughCandidateRegionTest,
     doGraphCleaningtwoIdenticalReadsPlusNoiseReturnOnePath)
 {
+    const auto clean { true };
     const int k { 9 };
     const double error_rate { 0.11 };
-    DenovoDiscovery denovo { k, error_rate };
-    denovo.clean_assembly_graph = true;
+    DenovoDiscovery denovo { k, error_rate, 15, 1, clean };
     CandidateRegion candidate_region { Interval(0, 1), "test" };
     candidate_region.max_likelihood_sequence = "ATGCGCTGAGAGTCGGACT";
     candidate_region.pileup