-
Notifications
You must be signed in to change notification settings - Fork 0
/
raxml_nexusPartConvert.pl
executable file
·108 lines (76 loc) · 2.68 KB
/
raxml_nexusPartConvert.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/perl -w
=head1 NAME
raxml_nexusPartConvert.pl
=head1 SYNOPSIS
perl raxml_nexusPartConvert.pl -m aln.nex -r DNA > raxml_partition_file
Options:
--help Show brief help and exit
--matrixfile Your input alignment in Nexus format
--raxstring DNA or AA model to be used
=head1 DESCRIPTION
Given a Nexus matrix, generate a RAxML partition guide file
=head1 AUTHOR
Apurva Narechania
anarechania *a|t* amnh.org
=head1 COPYRIGHT
Copyright (c) 2008-2009 American Museum of Natural History
This library is free software;
you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
# ----------------------------------------------------
#####TODO:
#####
#####SETUP#####
use strict;
use Getopt::Long;
use Pod::Usage;
my ($help, $matrixfile, $raxstring);
GetOptions(
'h|help' => \$help,
'm|matrixfile=s' => \$matrixfile,
'r|raxstring=s' => \$raxstring,
) or pod2usage;
pod2usage if $help;
#####MAIN#####
# parse some basic stuff out of the Nexus file
my $nexus = parse_nexus ($matrixfile);
# write out the raxml format for partitions
foreach my $cs (sort keys %{$nexus->{'charset'}}){
print "$raxstring, $cs=$nexus->{'charset'}->{$cs}\n";
}
#####SUBS#####
sub parse_nexus{
my $alignfile = shift;
open (NEX, "$alignfile");
my $charset = {};
my $nexus = {};
while (my $line = <NEX>){
chomp $line;
# take only first instances of all of these things
# header only
($nexus->{'nchar'} = $1) if (($line =~m/nchar\s*=\s*(\d+)/i) and (!$nexus->{'nchar'}));
($nexus->{'ntax'} = $1) if (($line =~m/ntax\s*=\s*(\d+)/i) and (!$nexus->{'ntax'}));
($nexus->{'datatype'} = $1) if (($line =~m/datatype\s*=\s*(\w+)/i) and (!$nexus->{'datatype'}));
($nexus->{'missing'} = $1) if (($line =~m/missing\s*=\s*(.{1})/i) and (!$nexus->{'missing'}));
($nexus->{'gap'} = $1) if (($line =~m/gap\s*=\s*(.{1})/i) and (!$nexus->{'gap'}));
if ($line =~m/outgroup/i){
$line =~s/outgroup//ig;
$line =~s/\s+//g;
$line =~s/\;//g;
# any instances of more than one outgroup???? <====FIX
$nexus->{'outgroup'} = $line;
}
if ($line =~m/charset/i){
$line =~s/charset//ig;
$line =~s/\s+//g;
$line =~s/\;//g;
my ($partition, $coords) =
split (/\=/, $line);
$charset->{$partition} = $coords;
}
$nexus->{'charset'} = $charset;
}
close (NEX);
return ($nexus);
}