Skip to content

Commit

Permalink
fixed a weird bcalm bug regarding output id's in multithread mode
Browse files Browse the repository at this point in the history
  • Loading branch information
rchikhi committed Dec 11, 2017
1 parent 13174d2 commit dd651dd
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
22 changes: 16 additions & 6 deletions gatb-core/src/gatb/bcalm2/bglue_algo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,10 @@ void bglue(Storage *storage,
const typename ModelCanon::Kmer kmmerBegin = modelCanon.codeSeed(kmerBegin.c_str(), Data::ASCII);
const typename ModelCanon::Kmer kmmerEnd = modelCanon.codeSeed(kmerEnd.c_str(), Data::ASCII);

ufkmers.union_(uf_mphf.lookup(hasher(kmmerBegin)), uf_mphf.lookup(hasher(kmmerEnd)));
uint32_t v1 = uf_mphf.lookup(hasher(kmmerBegin));
uint32_t v2 = uf_mphf.lookup(hasher(kmmerEnd));

ufkmers.union_(v1,v2);
//ufkmers.union_((hasher(kmmerBegin)), (hasher(kmmerEnd)));

#if 0
Expand All @@ -818,10 +821,9 @@ void bglue(Storage *storage,

};

//setDispatcher (new SerialDispatcher()); // force single thread
Dispatcher dispatcher (nb_threads);
dispatcher.iterate (in->iterator(), createUF);

#if 0
ufmin.printStats("uf minimizers");

Expand All @@ -836,6 +838,7 @@ void bglue(Storage *storage,
if (debug_uf_stats) // for debugging
{
ufkmers.printStats("uf kmers");
//ufkmers.dumpUF("uf.dump");
logging("after computing UF stats");
}

Expand Down Expand Up @@ -929,6 +932,7 @@ void bglue(Storage *storage,

logging( "Allowed " + to_string((max_buffer * nbGluePartitions) /1024 /1024) + " MB memory for buffers");


// partition the glue into many files, à la dsk
auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
&get_UFclass, &gluePartitions,
Expand Down Expand Up @@ -983,7 +987,7 @@ void bglue(Storage *storage,
delete gluePartitions[i]; // takes care of the final flush (this doesn't delete the file, just closes it)
free_memory_vector(gluePartitions);
out.flush();


logging("Done disk partitioning of glue");

Expand All @@ -1010,10 +1014,11 @@ void bglue(Storage *storage,

// glue all partitions using a thread pool
ThreadPool pool(nb_threads);
std::mutex mtx; // lock to avoid a nasty bug when calling output()
for (int partition = 0; partition < nbGluePartitions; partition++)
{
auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,
&get_UFclass, &out, &outLock, &out_id, kmerSize]( int thread_id)
&get_UFclass, &out, &outLock, &out_id, kmerSize, &mtx]( int thread_id)
{
int k = kmerSize;

Expand Down Expand Up @@ -1101,7 +1106,12 @@ void bglue(Storage *storage,

float mean_abundance = get_mean_abundance(abs);
uint32_t sum_abundances = get_sum_abundance(abs);
output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
{
// for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
// anyway this fixes the problem, i'll understand it some other time.
std::lock_guard<std::mutex> lock(mtx);
output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
}
}

free_memory_vector(ordered_sequences_idxs);
Expand Down
2 changes: 1 addition & 1 deletion gatb-core/src/gatb/bcalm2/ograph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ template<size_t span>
void graph3<span>::compaction(uint iL, uint iR,typename graph3<span>::kmerType kmmer){
if(iR!=iL){
typename graph3<span>::kmerType RC=rcb(kmmer);
uint s1(unitigs[iL].size()),s2(unitigs[iR].size());
//uint s1(unitigs[iL].size()),s2(unitigs[iR].size());
bool b1(isNumber(unitigs[iL][0])),b2(isNumber(unitigs[iR][0]));
if(b1 and b2){return compaction(stoi(unitigs[iL]),stoi(unitigs[iR]),kmmer);}
if(b1){return compaction(stoi(unitigs[iL]),iR,kmmer);}
Expand Down
10 changes: 10 additions & 0 deletions gatb-core/src/gatb/bcalm2/unionFind.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <set>
#include <atomic>
#include <iostream>
#include <fstream>
#include <unordered_map>

/**
Expand Down Expand Up @@ -130,6 +131,15 @@ class unionFind {
std::cout << "raw space of UF hash data: " << ( 2*getNumKeys * sizeof(T) ) /1024/1024 << " MB" << std::endl; // 2x because each key of type T is associated to a value of type T
}

// debug function
void dumpUF(std::string file)
{
std::ofstream dumpfile;
dumpfile.open (file);
for (uint32_t i=0; i<size(); ++i)
dumpfile << i << " " << mData[i] << std::endl;
dumpfile.close();
}


mutable std::vector<std::atomic<uint64_t>> mData;
Expand Down

0 comments on commit dd651dd

Please sign in to comment.