-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* divide into modules. * refactoring into C++ form * add sequence analyze
- Loading branch information
Showing
7 changed files
with
336 additions
and
140 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#include "SequenceAnalyzer.h" | ||
#include <fstream> | ||
#include <iostream> | ||
|
||
|
||
sequence_table* SequenceAnalyzer::divide_by_thread(const char* filename) | ||
{ | ||
std::ifstream in; | ||
char buffer[MAXBUFFER]; | ||
struct contents tmp; | ||
std::string str; | ||
int num_line=0; | ||
sequence_table* table = new sequence_table; | ||
in.open(filename); | ||
while(!in.eof()) | ||
{ | ||
in.getline(buffer, MAXBUFFER); | ||
str = buffer; | ||
//delimeter process | ||
//std::cout<<str2<<std::endl; | ||
num_line++; | ||
size_t pos = str.find_first_of(del.c_str()); | ||
int num_thread = atoi(str.substr(0,pos).c_str() ); | ||
str = str.substr(pos+1 ); | ||
pos = str.find("("); | ||
str = str.substr(0, pos); | ||
|
||
tmp.pos = num_line; | ||
tmp.body = str; | ||
(*table)[num_thread].push_back(tmp); | ||
|
||
} | ||
|
||
in.close(); | ||
return table; | ||
} | ||
void SequenceAnalyzer::print_sequence_table_into_file(sequence_table& table, const char* prefix_filename ) | ||
{ | ||
std::ofstream out; | ||
std::string filename; | ||
for(sequence_table::iterator it = table.begin(); it!= table.end(); it++) | ||
{ | ||
filename = prefix_filename; | ||
filename += std::to_string(it->first); | ||
out.open(filename.c_str()); | ||
std::vector<struct contents >& c = it->second; | ||
for(std::vector<struct contents>::iterator it_v = c.begin(); it_v!= c.end(); it_v++) | ||
out<< it_v->body <<std::endl; | ||
out.close(); | ||
} | ||
} | ||
|
||
void SequenceAnalyzer::compare(const sequence& s1, const sequence& s2) | ||
{ | ||
int i=0; | ||
int comp_size = s1.size() < s2.size(); | ||
int size = comp_size ? s1.size(): s2.size(); | ||
|
||
for(int i=0; i< size ; i++) | ||
{ | ||
if( s1[i].body != s2[i].body) | ||
{ | ||
std::cout<<s1[i].pos <<" : <<< "<< s1[i].body<< std::endl; | ||
std::cout<<s2[i].pos <<" : >>> "<< s2[i].body<< std::endl; | ||
return ; | ||
} | ||
} | ||
|
||
if(comp_size < 0 ) | ||
std::cout<< "left sequence is subset of right sequence"<<std::endl; | ||
else if(comp_size >0) | ||
std::cout<<"right sequence is subset of left sequence"<<std::endl; | ||
else | ||
std::cout<<"exactly equal sequence"<<std::endl; | ||
|
||
|
||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#ifndef SEQUENCE_ANALYZER_H_ | ||
#define SEQUENCE_ANALYZER_H_ | ||
|
||
#include <vector> | ||
#include <map> | ||
#include <string> | ||
|
||
enum { | ||
MAXBUFFER= 8192 | ||
} ; | ||
|
||
struct contents | ||
{ | ||
int pos;//line number where the contents exist | ||
std::string body; | ||
}; | ||
|
||
typedef std::vector<struct contents> sequence; | ||
typedef std::map<int, sequence > sequence_table; | ||
/* | ||
* Analyzing format | ||
* thread_number key(some parameters) any other things | ||
* ex) | ||
* 1 fopen("myfile.txt","rt") somebodyhelpme | ||
* | ||
* | ||
** | ||
*/ | ||
class SequenceAnalyzer | ||
{ | ||
private: | ||
std::string del; | ||
|
||
|
||
public: | ||
SequenceAnalyzer() : del(" \t") {} | ||
inline void setdelimeter(const char* delimeter) | ||
{ | ||
del = delimeter; | ||
} | ||
sequence_table* divide_by_thread(const char* filename); | ||
void print_sequence_table_into_file(sequence_table& table, const char* prefix_filename ); | ||
void compare(const sequence& s1, const sequence& s2); | ||
|
||
|
||
|
||
}; | ||
|
||
|
||
|
||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#include "SetAnalyzer.h" | ||
#include <vector> | ||
#include <map> | ||
#include <fstream> | ||
|
||
|
||
int SetAnalyzer::read_with_delimeter(std::set<std::string>& filtered_set, const char* filename, bool append) | ||
{ | ||
std::ifstream in; | ||
char buffer[255]; | ||
|
||
if(!append) | ||
filtered_set.clear(); | ||
in.open(filename); | ||
while(!in.eof()) | ||
{ | ||
std::vector<std::string> temp_storage; | ||
in.getline(buffer, 255); | ||
//delimeter process | ||
std::string str = buffer; | ||
std::size_t prepos=0, pos = str.find_first_of(del.c_str()); | ||
while(pos != std::string::npos) | ||
{ | ||
temp_storage.push_back(str.substr(prepos, pos-prepos)); | ||
prepos = pos+1; | ||
while( ( pos = str.find_first_of(del.c_str(),pos+1)) != std::string::npos ) | ||
if( pos != prepos) | ||
break; | ||
else | ||
prepos = pos+1; | ||
} | ||
temp_storage.push_back(str.substr(prepos, pos-prepos)); | ||
str=""; | ||
for(std::vector<std::string>::iterator it= temp_storage.begin(); it!= temp_storage.end(); it++) | ||
str += *it + " "; | ||
//delimeter process end | ||
|
||
filtered_set.insert(str); | ||
} | ||
in.close(); | ||
return 0; | ||
} | ||
std::set<std::string>* SetAnalyzer::get_diffset(const std::set<std::string>& set1, const std::set<std::string>& set2) | ||
{ | ||
std::set<std::string>* diff1_2 = new std::set<std::string>; | ||
//set1 - set2 | ||
*diff1_2 = set1; | ||
for(std::set<std::string>::iterator it = set2.begin(); it!= set2.end(); it++) | ||
diff1_2->erase(*it); | ||
return diff1_2; | ||
} | ||
|
||
void analyze_with_key(const std::set<std::string>& diff1_2, const std::set<std::string>& diff2_1)// developing.... | ||
{ | ||
std::map<std::string, std::string > orghash; | ||
for(std::set<std::string>::iterator it = diff1_2.begin(); it!= diff1_2.end(); it++) | ||
{ | ||
std::string str = *it; | ||
std::size_t pos = str.find_first_of(" "); | ||
orghash[str.substr(0,pos)] = str.substr(pos+1); | ||
} | ||
for(std::set<std::string>::iterator it = diff2_1.begin(); it!= diff2_1.end(); it++) | ||
{ | ||
std::string str = *it; | ||
std::size_t pos = str.find_first_of(" "); | ||
if(!orghash[str.substr(0,pos)].empty()) | ||
{ | ||
std::string outputstr=""; | ||
std::cout<< str.substr(0,pos)<< " " <<orghash[str.substr(0,pos)]<<std::endl; | ||
for(int i=0; i<pos+1; i++) | ||
outputstr += " "; | ||
|
||
str = orghash[str.substr(0,pos)]; | ||
std::size_t prepos=0, pos = str.find_first_of(" "); | ||
while(pos != std::string::npos) | ||
{ | ||
|
||
str.substr(prepos, pos-prepos); | ||
prepos = pos+1; | ||
// pos = str.find_first_of(" \t",pos+1); | ||
while( ( pos = str.find_first_of(" \t",pos+1)) != std::string::npos ) | ||
if( pos != prepos) | ||
break; | ||
else | ||
prepos = pos+1; | ||
} | ||
|
||
} | ||
} | ||
|
||
|
||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#ifndef SETANALYZER_H_ | ||
#define SETANALYZER_H_ | ||
|
||
#include <string> | ||
#include <set> | ||
|
||
|
||
|
||
class SetAnalyzer | ||
{ | ||
private: | ||
// std::map<std::string, std::string> orghash; | ||
std::string del; | ||
public: | ||
SetAnalyzer() : del("\t") | ||
{ | ||
}; | ||
inline void setdelimeter(const char* delimeter) | ||
{ | ||
del = delimeter; | ||
} | ||
int read_with_delimeter(std::set<std::string>& filtered_set, const char* filename, bool append=false); | ||
std::set<std::string>* SetAnalyzer::get_diffset(const std::set<std::string>& set1, const std::set<std::string>& set2); | ||
|
||
|
||
}; | ||
|
||
|
||
|
||
#endif | ||
|
Oops, something went wrong.