Skip to content

Commit

Permalink
updated gfalibs
Browse files Browse the repository at this point in the history
  • Loading branch information
gf777 committed Jun 16, 2024
1 parent 54c1f62 commit 671bf08
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 43 deletions.
2 changes: 1 addition & 1 deletion gfalibs
Submodule gfalibs updated 2 files
+12 −39 include/fastx.h
+140 −123 include/kmer.h
3 changes: 2 additions & 1 deletion include/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class InSequencesDBG : public InSequences {

struct UserInputKreeq : UserInput {

uint8_t covCutOff = 0, depth = 3, backtrackingSpan = 5;
uint32_t covCutOff = 0;
uint8_t depth = 3, backtrackingSpan = 5;
uint64_t maxMem = 0;

};
Expand Down
18 changes: 9 additions & 9 deletions include/kreeq.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,30 +35,30 @@ struct DBGkmer32 {

};

using parallelMap = phmap::parallel_flat_hash_map<uint64_t, DBGkmer,
using ParallelMap = phmap::parallel_flat_hash_map<uint64_t, DBGkmer,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, DBGkmer>>,
8,
phmap::NullMutex>;

using parallelMap32 = phmap::parallel_flat_hash_map<uint64_t, DBGkmer32,
std::hash<uint64_t>,
using ParallelMap32 = phmap::parallel_flat_hash_map<uint64_t, DBGkmer32,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, DBGkmer32>>,
8,
phmap::NullMutex>;

class DBG : public Kmap<DBG, UserInputKreeq, DBGkmer, DBGkmer32> { // CRTP
class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { // CRTP

std::atomic<uint64_t> totMissingKmers{0}, totKcount{0}, totEdgeMissingKmers{0};
UserInputKreeq userInput;

InSequencesDBG *genome;

// subgraph objects
parallelMap32 *DBGsubgraph = new parallelMap32;
std::vector<parallelMap32*> DBGTmpSubgraphs;
ParallelMap32 *DBGsubgraph = new ParallelMap32;
std::vector<ParallelMap32*> DBGTmpSubgraphs;
InSequences GFAsubgraph;

uint64_t totEdgeCount = 0;
Expand Down Expand Up @@ -93,11 +93,11 @@ class DBG : public Kmap<DBG, UserInputKreeq, DBGkmer, DBGkmer32> { // CRTP

bool deleteMap(uint16_t m);

bool mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex, uint16_t m);
bool mergeSubMaps(ParallelMap* map1, ParallelMap* map2, uint8_t subMapIndex, uint16_t m);

bool mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex);
bool mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex);

bool unionSum(parallelMap32* map1, parallelMap32* map2);
bool unionSum(ParallelMap32* map1, ParallelMap32* map2);

void kunion();

Expand Down
34 changes: 17 additions & 17 deletions src/graph-builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ bool DBG::processBuffers(uint16_t m) {
});
}

parallelMap& map = *maps[m]; // the map associated to this buffer
parallelMap32& map32 = *maps32[m];
ParallelMap& map = *maps[m]; // the map associated to this buffer
ParallelMap32& map32 = *maps32[m];
uint64_t map_size = mapSize(map);

bufFile.read(reinterpret_cast<char *>(&pos), sizeof(uint64_t));
Expand Down Expand Up @@ -224,8 +224,8 @@ bool DBG::processBuffers(uint16_t m) {

bool DBG::reloadMap32(uint16_t m) {

parallelMap& map = *maps[m]; // the map associated to this buffer
parallelMap32& map32 = *maps32[m];
ParallelMap& map = *maps[m]; // the map associated to this buffer
ParallelMap32& map32 = *maps32[m];

for (auto pair : map32) {

Expand Down Expand Up @@ -267,14 +267,14 @@ bool DBG::summary(uint16_t m) {
}

std::lock_guard<std::mutex> lck(mtx);
totKmersUnique += kmersUnique;
totKmersDistinct += kmersDistinct;
totUnique += kmersUnique;
totDistinct += kmersDistinct;
totEdgeCount += edgeCount;

for (auto pair : hist) {

finalHistogram[pair.first] += pair.second;
totKmers += pair.first * pair.second;
tot += pair.first * pair.second;
}

return true;
Expand All @@ -283,27 +283,27 @@ bool DBG::summary(uint16_t m) {

void DBG::DBstats() {

uint64_t missing = pow(4,k)-totKmersDistinct;
uint64_t missing = pow(4,k)-totDistinct;

std::cout<<"DBG Summary statistics:\n"
<<"Total kmers: "<<totKmers<<"\n"
<<"Unique kmers: "<<totKmersUnique<<"\n"
<<"Distinct kmers: "<<totKmersDistinct<<"\n"
<<"Total kmers: "<<tot<<"\n"
<<"Unique kmers: "<<totUnique<<"\n"
<<"Distinct kmers: "<<totDistinct<<"\n"
<<"Missing kmers: "<<missing<<"\n"
<<"Total edges: "<<totEdgeCount<<"\n";

}

void DBG::kunion(){ // concurrent merging of the maps that store the same hashes

parallelMap32 map32Total; // first merge high-copy kmers
ParallelMap32 map32Total; // first merge high-copy kmers

for (unsigned int i = 0; i < userInput.kmerDB.size(); ++i) { // for each kmerdb loads the map and merges it

std::string prefix = userInput.kmerDB[i]; // loads the next map
prefix.append("/.map.hc.bin");

parallelMap32 nextMap;
ParallelMap32 nextMap;
phmap::BinaryInputArchive ar_in(prefix.c_str());
nextMap.phmap_load(ar_in);

Expand Down Expand Up @@ -350,13 +350,13 @@ void DBG::kunion(){ // concurrent merging of the maps that store the same hashes

}

bool DBG::mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex, uint16_t m) {
bool DBG::mergeSubMaps(ParallelMap* map1, ParallelMap* map2, uint8_t subMapIndex, uint16_t m) {

auto& inner = map1->get_inner(subMapIndex); // to retrieve the submap at given index
auto& submap1 = inner.set_; // can be a set or a map, depending on the type of map1
auto& inner2 = map2->get_inner(subMapIndex);
auto& submap2 = inner2.set_;
parallelMap32& map32 = *maps32[m];
ParallelMap32& map32 = *maps32[m];

for (auto pair : submap1) { // for each element in map1, find it in map2 and increase its value

Expand Down Expand Up @@ -433,7 +433,7 @@ bool DBG::mergeSubMaps(parallelMap* map1, parallelMap* map2, uint8_t subMapIndex

// subgraph functions

bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapIndex) {
bool DBG::mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex) {

auto& inner = map1->get_inner(subMapIndex); // to retrieve the submap at given index
auto& submap1 = inner.set_; // can be a set or a map, depending on the type of map1
Expand Down Expand Up @@ -476,7 +476,7 @@ bool DBG::mergeSubMaps(parallelMap32* map1, parallelMap32* map2, uint8_t subMapI
}


bool DBG::unionSum(parallelMap32* map1, parallelMap32* map2) {
bool DBG::unionSum(ParallelMap32* map1, ParallelMap32* map2) {

std::vector<std::function<bool()>> jobs;

Expand Down
22 changes: 11 additions & 11 deletions src/kreeq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ bool DBG::evaluateSegment(uint32_t s, std::array<uint16_t, 2> mapRange) {

uint64_t key, i;

parallelMap *map;
parallelMap32 *map32;
ParallelMap *map;
ParallelMap32 *map32;

// kreeq QV
bool isFw = false;
Expand Down Expand Up @@ -289,7 +289,7 @@ void DBG::correctSequences() {

bool DBG::searchGraph(std::array<uint16_t, 2> mapRange) { // stub

parallelMap* genomeDBG = new parallelMap;
ParallelMap* genomeDBG = new ParallelMap;

std::vector<InSegment*> *inSegments = genome->getInSegments();

Expand All @@ -310,7 +310,7 @@ bool DBG::searchGraph(std::array<uint16_t, 2> mapRange) { // stub

uint64_t key, i;
bool isFw = false;
parallelMap *map;
ParallelMap *map;

for(uint64_t c = 0; c<kcount; ++c){

Expand Down Expand Up @@ -341,7 +341,7 @@ std::pair<DBGkmer*,bool> DBG::findDBGkmer(uint8_t *origin) {
bool isFw = false;
key = hash(origin, &isFw);
i = key % mapCount;
parallelMap *map = maps[i];
ParallelMap *map = maps[i];
auto got = map->find(key);
if (got != map->end())
return std::make_pair(&(got->second), isFw);
Expand Down Expand Up @@ -560,7 +560,7 @@ bool DBG::detectAnomalies(InSegment *inSegment, std::vector<uint64_t> &anomalies
threadLog.setId(inSegment->getuId());

std::string sHeader = inSegment->getSeqHeader();
parallelMap *map;
ParallelMap *map;
uint64_t key, i;
bool isFw = false, anomaly = false;

Expand Down Expand Up @@ -612,7 +612,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
threadLog.setId(inSegment->getuId());

std::string sHeader = inSegment->getSeqHeader();
parallelMap *map;
ParallelMap *map;
uint64_t key, i;
bool isFw = false;
std::vector<std::deque<DBGpath>> variants;
Expand Down Expand Up @@ -918,9 +918,9 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m
threadLog.setId(inSegment->getuId());

std::string sHeader = inSegment->getSeqHeader();
parallelMap *map;
parallelMap32 *map32;
parallelMap32 *segmentSubmap = new parallelMap32;
ParallelMap *map;
ParallelMap32 *map32;
ParallelMap32 *segmentSubmap = new ParallelMap32;
uint64_t key, i;
bool isFw = false;
std::vector<uint64_t> segmentCoordinates;
Expand Down Expand Up @@ -983,7 +983,7 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m

void DBG::mergeSubgraphs() {

for (parallelMap32 *map1 : DBGTmpSubgraphs) {
for (ParallelMap32 *map1 : DBGTmpSubgraphs) {
unionSum(map1, DBGsubgraph);
delete map1;
}
Expand Down
8 changes: 4 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ int main(int argc, char **argv) {
{"input-reads", required_argument, 0, 'r'},
{"tmp-prefix", required_argument, 0, 't'},
{"max-memory", required_argument, 0, 'm'},
{"threads", required_argument, 0, 'j'},

{"threads", required_argument, 0, 'j'},
{"verbose", no_argument, &verbose_flag, 1},
{"cmd", no_argument, &cmd_flag, 1},
{"version", no_argument, 0, 'v'},
Expand Down Expand Up @@ -173,9 +173,8 @@ int main(int argc, char **argv) {
break;
case 't': // prefix for temporary files
userInput.prefix = optarg;
break;

case 'm': // prefix for temporary files
break;
case 'm': // max memory
userInput.maxMem = atof(optarg);
break;
case 'v': // software version
Expand All @@ -185,6 +184,7 @@ int main(int argc, char **argv) {
case 'h': // help
printf("kreeq [command]\n");
printf("\nOptions:\n");
printf("\t-c --coverage-cutoff coverage cutoff.\n");
printf("\t-d --database kreeq database to load.\n");
printf("\t-f --input-sequence sequence input file (fasta,gfa1/2).\n");
printf("\t-r --input-reads read input files (fastq).\n");
Expand Down

0 comments on commit 671bf08

Please sign in to comment.