Skip to content

Commit

Permalink
parse_pangenome_matrix.pl now checks that taxon names in -A/-B lists …
Browse files Browse the repository at this point in the history
…match those in pangenome_matrix.tab
  • Loading branch information
eead-csic-compbio committed Mar 23, 2016
1 parent fe5038f commit fef10c9
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@
11032016: created pfam_enrich.pl to calculate Pfam-domain enrichment of get_homologues[-est] clusters
11032016: created sub parse_Pfam_freqs in marfil_homology.pm
11032016: manuals updated
23032016: parse_pangenome_matrix.pl now checks that taxon names in -A/-B lists match those in pangenome_matrix.tab
33 changes: 25 additions & 8 deletions parse_pangenome_matrix.pl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
my (%cluster_names,%pangemat,$col,$cluster_dir);
my (%included_input_filesA,%included_input_filesB);
my ($n_of_clusters,$n_of_includedA,$n_of_includedB) = (0,0,0);
my ($outfile_root,$outpanfileA,$outexpanfileA);
my ($outfile_root,$outpanfileA,$outexpanfileA,$taxon);
my ($shell_input,$shell_output_png,$shell_output_pdf,$shell_circle_png,$shell_circle_pdf,$shell_estimates);
my ($cloudlistfile,$shelllistfile,$softcorelistfile,$corelistfile);
my (@pansetA,@pansetB,@expA,@expB,@shell);
Expand Down Expand Up @@ -208,11 +208,16 @@
while(<INCL>)
{
next if(/^#/ || /^$/);
$included_input_filesB{(split)[0]} = 1;
$taxon = (split)[0];
$included_input_filesB{$taxon} = 1;
if(!$pangemat{$taxon})
{
die "# cannot match $taxon in $INP_matrix (included in $INP_includeB)\n";
}
}
close(INCL);
$n_of_includedB = scalar(keys(%included_input_filesB));
print "# taxa included in group B = $n_of_includedB\n\n";
print "# taxa included in group B = $n_of_includedB\n\n";
}
elsif($needAB)
{
Expand All @@ -221,7 +226,12 @@
while(<INCL>)
{
next if(/^#/ || /^$/);
$included_input_filesA{(split)[0]} = 1;
$taxon = (split)[0];
$included_input_filesA{$taxon} = 1;
if(!$pangemat{$taxon})
{
die "# cannot match $taxon in $INP_matrix (included in $INP_includeA)\n";
}
}
close(INCL);
$n_of_includedA = scalar(keys(%included_input_filesA));
Expand All @@ -232,21 +242,28 @@
while(<INCL>)
{
next if(/^#/ || /^$/);
$included_input_filesB{(split)[0]} = 1;
$taxon = (split)[0];
$included_input_filesB{$taxon} = 1;
if(!$pangemat{$taxon})
{
die "# cannot match $taxon in $INP_matrix (included in $INP_includeB)\n";
}
}
close(INCL);
$n_of_includedB = scalar(keys(%included_input_filesB));
print "# taxa included in group B = $n_of_includedB\n\n";
}



## 3) perform requested operations
if($INP_absentB)
{
print "\n# finding genes which are absent in B ...\n";
foreach $col (1 .. $n_of_clusters)
{
my ($presentA,$absentA,$absentB,$presentB) = (0,0,0,0);
foreach my $taxon (keys(%pangemat))
foreach $taxon (keys(%pangemat))
{
if($pangemat{$taxon}[$col])
{
Expand Down Expand Up @@ -287,7 +304,7 @@
foreach $col (1 .. $n_of_clusters)
{
my ($presentA,$absentA,$absentB,$presentB) = (0,0,0,0);
foreach my $taxon (keys(%pangemat))
foreach $taxon (keys(%pangemat))
{
if($pangemat{$taxon}[$col])
{
Expand Down Expand Up @@ -340,7 +357,7 @@
{
my ($presentA,$presentB,@sizeA,@sizeB) = (0,0);
my ($minA,$maxA,$minB,$maxB);
foreach my $taxon (keys(%pangemat))
foreach $taxon (keys(%pangemat))
{
if($pangemat{$taxon}[$col])
{
Expand Down

0 comments on commit fef10c9

Please sign in to comment.