-
Notifications
You must be signed in to change notification settings - Fork 0
/
concat-afas.pl
executable file
·102 lines (87 loc) · 2.02 KB
/
concat-afas.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#! /usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Carp::Always;
# use FindBin;
# use lib "$FindBin::Bin";
# use Xyzzy;
use constant { TRUE => 1, FALSE => 0 };
# ------------------------------------------------------------------------
# Process the command line
# ------------------------------------------------------------------------
use File::Basename;
use Getopt::Std;
our $opt_h;
sub usage {
my $progname = basename($0);
print STDERR "Usage: $progname [options] a.afa b.afa ...\n";
print STDERR "-h - print help\n";
exit(@_);
}
my $stat = getopts('h');
if (!$stat) {
usage(1);
}
if ($opt_h) {
usage();
}
if (scalar(@ARGV) == 0) {
usage(1);
}
# ------------------------------------------------------------------------
my %strains;
my @sequences;
sub emit {
my ($seqs,$seq,$defline) = @_;
if (!defined($defline)) {
return;
}
($defline =~ /^>([^ ]*)/) || die "defline=\"$defline\",";
my $strain = $1;
$strains{$strain} = TRUE;
(!defined($seqs->{$strain})) || die "more than one sequence: $strain,";
$seqs->{$strain} = $seq;
}
foreach my $afa_name ( @ARGV ) {
my $defline = undef;
my $seq = "";
my $seqs = {};
open(my $afa_fh,"<", $afa_name) || die "cannot open <<$afa_name>>,";
while (<$afa_fh>) {
chomp;
if ( /^>/ ) {
emit($seqs,$seq,$defline);
$defline = $_;
$seq = "";
} else {
s/ //g;
$seq .= $_;
}
}
emit($seqs,$seq,$defline);
close $afa_fh;
push @sequences, $seqs;
}
# ------------------------------------------------------------------------
use constant { WIDTH => 70 };
strain:
foreach my $strain (sort (keys %strains)) {
my $seq = "";
foreach my $seqs ( @sequences ) {
my $subseq = $seqs->{$strain};
if (!defined($subseq)) {
print STDERR "# Dropping $strain\n";
next strain;
}
$seq .= $subseq;
}
print ">$strain\n";
while (length($seq) > WIDTH) {
my $s = substr($seq,0,WIDTH);
$seq = substr($seq,WIDTH);
print "$s\n";
}
if (length($seq) > 0) {
print "$seq\n";
}
}