Skip to content

Commit

Permalink
include original sequence and colored nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
gf777 committed Jun 20, 2024
1 parent 76f460c commit 18f4273
Show file tree
Hide file tree
Showing 136 changed files with 100 additions and 30 deletions.
39 changes: 25 additions & 14 deletions include/kreeq.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,29 @@ struct DBGkmer32 {

};

using ParallelMap = phmap::parallel_flat_hash_map<uint64_t, DBGkmer,
struct DBGkmer32color : public DBGkmer32 {
uint8_t color = 0;

DBGkmer32color() {}

DBGkmer32color(const DBGkmer32& dbgkmer32) {
std::copy(std::begin(dbgkmer32.fw), std::end(dbgkmer32.fw), std::begin(fw));
std::copy(std::begin(dbgkmer32.bw), std::end(dbgkmer32.bw), std::begin(bw));
cov = dbgkmer32.cov;
}
};

template<typename T>
class PM : public phmap::parallel_flat_hash_map<uint64_t, T,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, DBGkmer>>,
std::allocator<std::pair<const uint64_t, T>>,
8,
phmap::NullMutex>;
phmap::NullMutex> {};

using ParallelMap32 = phmap::parallel_flat_hash_map<uint64_t, DBGkmer32,
std::hash<uint64_t>,
std::equal_to<uint64_t>,
std::allocator<std::pair<const uint64_t, DBGkmer32>>,
8,
phmap::NullMutex>;
using ParallelMap = PM<DBGkmer>;
using ParallelMap32 = PM<DBGkmer32>;
using ParallelMap32color = PM<DBGkmer32color>;

class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { // CRTP

Expand All @@ -145,8 +155,8 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //
InSequencesDBG *genome;

// subgraph objects
ParallelMap32 *DBGsubgraph = new ParallelMap32;
std::vector<ParallelMap32*> DBGTmpSubgraphs;
ParallelMap32color *DBGsubgraph = new ParallelMap32color;
std::vector<ParallelMap32color*> DBGTmpSubgraphs;
InSequences GFAsubgraph;

uint64_t totEdgeCount = 0;
Expand Down Expand Up @@ -185,7 +195,8 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //

bool mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapIndex);

bool unionSum(ParallelMap32* map1, ParallelMap32* map2);
template<typename MAPTYPE>
bool unionSum(MAPTYPE* map1, MAPTYPE* map2);

void kunion();

Expand Down Expand Up @@ -229,9 +240,9 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //

void DFS();

void summary(ParallelMap32& DBGsubgraph);
void summary(ParallelMap32color& DBGsubgraph);

ParallelMap32 DFSpass(ParallelMap32* candidates, std::array<uint16_t, 2> mapRange);
ParallelMap32color DFSpass(ParallelMap32color* candidates, std::array<uint16_t, 2> mapRange);

void mergeSubgraphs();

Expand Down
4 changes: 2 additions & 2 deletions src/graph-builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,8 @@ bool DBG::mergeSubMaps(ParallelMap32* map1, ParallelMap32* map2, uint8_t subMapI

}


bool DBG::unionSum(ParallelMap32* map1, ParallelMap32* map2) {
template<typename MAPTYPE>
bool DBG::unionSum(MAPTYPE* map1, MAPTYPE* map2) {

std::vector<std::function<bool()>> jobs;

Expand Down
62 changes: 48 additions & 14 deletions src/kreeq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,7 @@ void DBG::subgraph() {

}

void DBG::summary(ParallelMap32& DBGsubgraph) {
void DBG::summary(ParallelMap32color& DBGsubgraph) {

uint64_t tot = 0, kmersUnique = 0, kmersDistinct = DBGsubgraph.size(), edgeCount = 0;
phmap::parallel_flat_hash_map<uint64_t, uint64_t> hist;
Expand Down Expand Up @@ -948,7 +948,7 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m
std::string sHeader = inSegment->getSeqHeader();
ParallelMap *map;
ParallelMap32 *map32;
ParallelMap32 *segmentSubmap = new ParallelMap32;
ParallelMap32color *segmentSubmap = new ParallelMap32color;
uint64_t key, i;
bool isFw = false;
std::vector<uint64_t> segmentCoordinates;
Expand Down Expand Up @@ -990,12 +990,43 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m

if (got != map->end()) {
if (got->second.cov != 255) {
segmentSubmap->insert(*got);
DBGkmer32color dbgKmer32color(got->second);
dbgKmer32color.color = 1;
segmentSubmap->insert(std::make_pair(got->first,dbgKmer32color));
}else{
map32 = maps32[i];
auto got = map32->find(key);
segmentSubmap->insert(*got);
DBGkmer32color dbgKmer32color(got->second);
dbgKmer32color.color = 1;
segmentSubmap->insert(std::make_pair(got->first,dbgKmer32color));
}
}else{ // construct the kmer

DBGkmer32color dbgKmer32color;
dbgKmer32color.color = 2;
edgeBit edges;

if (isFw){
if (ctoi[*(first+p+k)] <= 3)
edges.assign(ctoi[*(first+p+k)]);
if (p > 0 && *(str+p-1) <= 3)
edges.assign(4+*(str+p-1));
}else{
if (p > 0 && *(str+p-1) <= 3)
edges.assign(3-*(str+p-1));
if (ctoi[*(first+p+k)] <= 3)
edges.assign(4+3-ctoi[*(first+p+k)]);
}

for (uint64_t w = 0; w<4; ++w) { // update weights

dbgKmer32color.fw[w] += edges.read(w);
dbgKmer32color.bw[w] += edges.read(4+w);
}
if (dbgKmer32color.cov < LARGEST)
++dbgKmer32color.cov; // increase kmer coverage

segmentSubmap->insert(std::make_pair(key,dbgKmer32color));
}
}
}
Expand All @@ -1011,8 +1042,8 @@ bool DBG::DBGsubgraphFromSegment(InSegment *inSegment, std::array<uint16_t, 2> m

void DBG::DFS() {

ParallelMap32 candidates, newCandidates;
ParallelMap32* subgraph = DBGsubgraph;
ParallelMap32color candidates, newCandidates;
ParallelMap32color* subgraph = DBGsubgraph;

std::array<uint16_t, 2> mapRange = {0,0};
for (uint8_t i = 0; i < userInput.kmerDepth; ++i) {
Expand All @@ -1032,9 +1063,9 @@ void DBG::DFS() {
DBGsubgraph->insert(candidates.begin(), candidates.end());
}

ParallelMap32 DBG::DFSpass(ParallelMap32* subgraph, std::array<uint16_t, 2> mapRange) {
ParallelMap32color DBG::DFSpass(ParallelMap32color* subgraph, std::array<uint16_t, 2> mapRange) {

ParallelMap32 newCandidates;
ParallelMap32color newCandidates;

for (auto pair : *subgraph) {

Expand Down Expand Up @@ -1066,11 +1097,13 @@ ParallelMap32 DBG::DFSpass(ParallelMap32* subgraph, std::array<uint16_t, 2> mapR
if (got != map->end()) {

if (got->second.cov != 255) {
newCandidates.insert(*got);
DBGkmer32color dbgKmer32color(got->second);
newCandidates.insert(std::make_pair(got->first,dbgKmer32color));
}else{
map32 = maps32[m];
auto got = map32->find(key);
newCandidates.insert(*got);
DBGkmer32color dbgKmer32color(got->second);
newCandidates.insert(std::make_pair(got->first,dbgKmer32color));
}
}
}
Expand Down Expand Up @@ -1106,11 +1139,13 @@ ParallelMap32 DBG::DFSpass(ParallelMap32* subgraph, std::array<uint16_t, 2> mapR

if (got != map->end()) {
if (got->second.cov != 255) {
newCandidates.insert(*got);
DBGkmer32color dbgKmer32color(got->second);
newCandidates.insert(std::make_pair(got->first,dbgKmer32color));
}else{
map32 = maps32[m];
auto got = map32->find(key);
newCandidates.insert(*got);
DBGkmer32color dbgKmer32color(got->second);
newCandidates.insert(std::make_pair(got->first,dbgKmer32color));
}
}
}
Expand All @@ -1123,11 +1158,10 @@ ParallelMap32 DBG::DFSpass(ParallelMap32* subgraph, std::array<uint16_t, 2> mapR

void DBG::mergeSubgraphs() {

for (ParallelMap32 *map1 : DBGTmpSubgraphs) {
for (ParallelMap32color *map1 : DBGTmpSubgraphs) {
unionSum(map1, DBGsubgraph);
delete map1;
}

}

void DBG::DBGgraphToGFA() {
Expand Down
17 changes: 17 additions & 0 deletions test.gfa
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
H VN:Z:1.2
S 0 GAAATGATAGCATGACTCAGACTGATCAGATCGA RC:i:42
S 1 ATCGATCGATGCATGCATTCGTACATCACTGCATGTACG RC:i:42
S 2 CTGAGTCATGCTATCATTTCAATCGATCGATGCATGCATTC RC:i:21
S 3 CTACGATCAGATCGACTGACTCGTACATGCAGTGATGTACG RC:i:21
S 4 GAATGCATGCATCGATCGATCGAAATGATAGCATGACTCAG RC:i:21
S 5 CTACGATCAGATCGACTGACACGTACATGCAGTGATGTACG RC:i:21
S 6 GTCAGTCGATCTGATCGTAGTATG RC:i:42
L 0 - 2 + 1N20M KC:i:1
L 2 + 1 + 1N20M KC:i:1
L 4 - 1 + 1N20M KC:i:1
L 1 + 3 - 1N20M KC:i:1
L 3 - 6 + 1N20M KC:i:1
L 5 + 1 - 1N20M KC:i:1
L 4 + 0 + 1N20M KC:i:1
L 6 - 5 + 1N20M KC:i:1
P path1 0-,2+,1+,3-,6+ 1N20M,1N20M,1N20M,1N20M
2 changes: 2 additions & 0 deletions testFiles/random11.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>sequence7 - two point errors overlapping (25:T>A, 35:C>T)
CATACTACGATCAGATCGACTGACaCGTACATGCtGTGATGTACGAATGCATGCATCGATCGATCGAAATGATAGCATGACTCAGACTGATCAGATCGA
4 changes: 4 additions & 0 deletions testFiles/random11.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@sequence1 - no errors
CATACTACGATCAGATCGACTGACTCGTACATGCAGTGATGTACGAATGCATGCATCGATCGATCGAAATGATAGCATGACTCAGACTGATCAGATCGA
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 changes: 2 additions & 0 deletions testFiles/random11.kreeq/.index
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
21
128
Binary file added testFiles/random11.kreeq/.map.0.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.1.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.10.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.100.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.101.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.102.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.103.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.104.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.105.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.106.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.107.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.108.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.109.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.11.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.110.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.111.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.112.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.113.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.114.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.115.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.116.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.117.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.118.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.119.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.12.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.120.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.121.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.122.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.123.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.124.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.125.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.126.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.127.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.13.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.14.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.15.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.16.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.17.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.18.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.19.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.2.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.20.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.21.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.22.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.23.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.24.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.25.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.26.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.27.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.28.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.29.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.3.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.30.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.31.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.32.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.33.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.34.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.35.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.36.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.37.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.38.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.39.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.4.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.40.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.41.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.42.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.43.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.44.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.45.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.46.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.47.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.48.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.49.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.5.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.50.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.51.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.52.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.53.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.54.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.55.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.56.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.57.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.58.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.59.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.6.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.60.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.61.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.62.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.63.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.64.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.65.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.66.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.67.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.68.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.69.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.7.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.70.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.71.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.72.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.73.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.74.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.75.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.76.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.77.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.78.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.79.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.8.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.80.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.81.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.82.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.83.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.84.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.85.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.86.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.87.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.88.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.89.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.9.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.90.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.91.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.92.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.93.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.94.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.95.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.96.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.97.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.98.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.99.bin
Binary file not shown.
Binary file added testFiles/random11.kreeq/.map.hc.bin
Binary file not shown.

0 comments on commit 18f4273

Please sign in to comment.