diff --git a/src/cgi/include/cgid_types.hpp b/src/cgi/include/cgid_types.hpp index 98285c9..df64ae4 100644 --- a/src/cgi/include/cgid_types.hpp +++ b/src/cgi/include/cgid_types.hpp @@ -32,8 +32,9 @@ namespace cgi { bool operator() (const MappingResult_CGI &x, const MappingResult_CGI &y) { - return std::tie(x.genomeId, x.querySeqId, x.nucIdentity) - < std::tie(y.genomeId, y.querySeqId, y.nucIdentity); + return std::tie(x.genomeId, x.querySeqId, x.nucIdentity, x.mapRefPosBin) + < std::tie(y.genomeId, y.querySeqId, y.nucIdentity, y.mapRefPosBin); + //Added ref. bin also to make sort output deterministic [issue #46] } } cmp_query_bucket; diff --git a/src/map/include/winSketch.hpp b/src/map/include/winSketch.hpp index 2cdfcda..dbc62c3 100644 --- a/src/map/include/winSketch.hpp +++ b/src/map/include/winSketch.hpp @@ -49,7 +49,7 @@ namespace skch const skch::Parameters ¶m; //Ignore top % most frequent minimizers while lookups - const float percentageThreshold = 0.001; + const float percentageThreshold = 0.0; //Minimizers that occur this or more times will be ignored (computed based on percentageThreshold) int freqThreshold = std::numeric_limits::max(); @@ -123,6 +123,9 @@ namespace skch //sequence counter while parsing file seqno_t seqCounter = 0; + if ( omp_get_thread_num() == 0) + std::cerr << "INFO [thread 0], skch::Sketch::build, window size for minimizer sampling = " << param.windowSize << std::endl; + for(const auto &fileName : param.refSequences) { @@ -238,7 +241,7 @@ namespace skch else { if ( omp_get_thread_num() == 0) - std::cerr << "INFO [thread 0], skch::Sketch::computeFreqHist, With threshold " << this->percentageThreshold << "\%, consider all minimizers during lookup." << std::endl; + std::cerr << "INFO [thread 0], skch::Sketch::computeFreqHist, consider all minimizers during lookup." << std::endl; } }