Skip to content

Commit

Permalink
added option to get output as matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
cjain7 committed Jul 24, 2018
1 parent ae39308 commit 94a0d7a
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/cgi/core_genome_identity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ int main(int argc, char** argv)

//Initialize the files to delete the existing content
{
std::ofstream outstrm1(fileName);
#ifdef DEBUG
std::ofstream outstrm2(fileName + ".map.1way");
std::ofstream outstrm3(fileName + ".map.2way");
Expand Down Expand Up @@ -88,4 +87,7 @@ int main(int argc, char** argv)
}

cgi::outputCGI (parameters, finalResults, fileName);

if (parameters.matrixOutput)
cgi::outputPhylip (parameters, finalResults, fileName);
}
92 changes: 91 additions & 1 deletion src/cgi/include/computeCoreIdentity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ namespace cgi
//sort result by identity
std::sort(CGI_ResultsVector.rbegin(), CGI_ResultsVector.rend());

std::ofstream outstrm(fileName, std::ios::app);
std::ofstream outstrm(fileName);

//Report results
for(auto &e : CGI_ResultsVector)
Expand All @@ -307,6 +307,96 @@ namespace cgi

outstrm.close();
}

/**
* @brief output FastANI results as lower triangular matrix
* @param[in] parameters algorithm parameters
* @param[in] CGI_ResultsVector results
* @param[in] fileName file name where results will be reported
*/
void outputPhylip(skch::Parameters &parameters,
std::vector<cgi::CGI_Results> &CGI_ResultsVector,
std::string &fileName)
{
std::unordered_map <std::string, int> genome2Int; // name of genome -> integer
std::unordered_map <int, std::string> genome2Int_rev; // integer -> name of genome

//Assign unique index to the set of query and reference genomes
for(auto &e : parameters.querySequences)
{
auto id = genome2Int.size();
if( genome2Int.find(e) == genome2Int.end() )
{
genome2Int [e] = id;
genome2Int_rev [id] = e;
}
}

for(auto &e : parameters.refSequences)
{
auto id = genome2Int.size();
if( genome2Int.find(e) == genome2Int.end() )
{
genome2Int [e] = id;
genome2Int_rev [id] = e;
}
}

int totalGenomes = genome2Int.size();

//create a square 2-d matrix
std::vector< std::vector<float> > fastANI_matrix (totalGenomes, std::vector<float> (totalGenomes, 0.0));

//transform FastANI results into 3-tuples
for(auto &e : CGI_ResultsVector)
{
if(e.countSeq >= parameters.minFragments)
{
int qGenome = genome2Int [ parameters.querySequences[e.qryGenomeId] ];
int rGenome = genome2Int [ parameters.refSequences[e.refGenomeId] ];

if (qGenome != rGenome) //ignore if both genomes are same
{
if (qGenome > rGenome)
{
if (fastANI_matrix[qGenome][rGenome] > 0)
fastANI_matrix[qGenome][rGenome] = (fastANI_matrix[qGenome][rGenome] + e.identity)/2;
else
fastANI_matrix[qGenome][rGenome] = e.identity;
}
else
{
if (fastANI_matrix[rGenome][qGenome] > 0)
fastANI_matrix[rGenome][qGenome] = (fastANI_matrix[rGenome][qGenome] + e.identity)/2;
else
fastANI_matrix[rGenome][qGenome] = e.identity;
}
}
}
}

std::ofstream outstrm(fileName + ".matrix");

outstrm << totalGenomes << "\n";

//Report matrix
for (int i = 0; i < totalGenomes; i++)
{
//output genome name
outstrm << genome2Int_rev[i];

for (int j = 0; j < i; j++)
{
//output ani values
//average if computed twice
std::string val = fastANI_matrix[i][j] > 0.0 ? std::to_string (fastANI_matrix[i][j]) : "NA";
outstrm << "\t" << val;
}
outstrm << "\n";
}

outstrm.close();
}
}

#endif
1 change: 1 addition & 0 deletions src/map/include/map_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ namespace skch
std::string outFileName; //output file name
bool reportAll; //Report all alignments if this is true
bool visualize; //Visualize the conserved regions of two genomes
bool matrixOutput; //report fastani results as lower triangular matrix
};
}

Expand Down
10 changes: 9 additions & 1 deletion src/map/include/parseCmdArgs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Example usage: \n\
cmd.defineOption("queryList", "a file containing list of query genome files, one genome per line", ArgvParser::OptionRequiresValue);
cmd.defineOptionAlternative("queryList","ql");

cmd.defineOption("kmer", "kmer size <= 16 [default 16]", ArgvParser::OptionRequiresValue);
cmd.defineOption("kmer", "kmer size <= 16 [default : 16]", ArgvParser::OptionRequiresValue);
cmd.defineOptionAlternative("kmer","k");

cmd.defineOption("fragLen", "fragment length [default : 3,000]", ArgvParser::OptionRequiresValue);
Expand All @@ -58,6 +58,8 @@ Example usage: \n\

cmd.defineOption("visualize", "output mappings for visualization, can be enabled for single genome to single genome comparison only [disabled by default]");

cmd.defineOption("matrix", "also output ANI values as lower triangular matrix (format inspired from phylip). If enabled, you should expect an output file with .matrix extension [disabled by default]");

cmd.defineOption("output", "output file name", ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);
cmd.defineOptionAlternative("output","o");
}
Expand Down Expand Up @@ -238,6 +240,12 @@ Example usage: \n\
else
parameters.visualize = false;


if(cmd.foundOption("matrix"))
parameters.matrixOutput = true;
else
parameters.matrixOutput = false;

//Parse algorithm parameters
if(cmd.foundOption("kmer"))
{
Expand Down

0 comments on commit 94a0d7a

Please sign in to comment.