-
Notifications
You must be signed in to change notification settings - Fork 5
/
batch_fasta_revcom.pl
143 lines (111 loc) · 3.44 KB
/
batch_fasta_revcom.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/perl -w
use Bio::SeqIO; # Get seq objects
use Getopt::Long; # Get command line options
#-----------------------------+
# VARIABLE SCOPE |
#-----------------------------+
# Required variables
my $indir;
my $outdir;
# Booleans
my $quiet = 0;
my $verbose = 0;
my $show_help = 0;
my $show_usage = 0;
my $show_man = 0;
my $show_version = 0;
my $do_copy = 0;
my $do_seq_data = 0; # Create files in outdir with sequence data
#-----------------------------+
# COMMAND LINE OPTIONS |
#-----------------------------+
my $ok = GetOptions(# REQUIRED OPTIONS
"i|indir=s" => \$indir,
"o|outdir=s" => \$outdir,
# ADDITIONAL OPTIONS
"usage" => \$show_usage,
"version" => \$show_version,
"man" => \$show_man,
"h|help" => \$show_help,);
#-----------------------------+
# CHECK FOR SLASH IN DIR |
# VARIABLES |
#-----------------------------+
# If the indir does not end in a slash then append one
# TO DO: Allow for backslash
unless ($indir =~ /\/$/ ) {
$indir = $indir."/";
}
unless ($outdir =~ /\/$/ ) {
$outdir = $outdir."/";
}
#-----------------------------+
# CREATE THE OUT DIR |
# IF IT DOES NOT EXIST |
#-----------------------------+
unless (-e $outdir) {
print "Creating output dir ...\n" if $verbose;
mkdir $outdir ||
die "Could not create the output directory:\n$outdir";
}
#-----------------------------+
# Get the FASTA files from the|
# directory provided by the |
# var $indir |
#-----------------------------+
opendir( DIR, $indir )
|| die "Can't open directory:\n$indir";
my @fasta_files = grep /\.txt$|\.fasta$|\.fa$/, readdir DIR ;
closedir( DIR );
my $num_fasta_files = @fasta_files;
if ($num_fasta_files == 0) {
print "\a";
print "No fasta files were found in the input direcotry:\n";
print "$indir\n";
print "Fasta file MUST have the fasta or fa extension to be".
" recognized as fasta files\n";
exit;
}
my $fasta_file_num =0;
for my $ind_fasta_file (@fasta_files) {
my $ind_report_num=0;
$fasta_file_num++;
print STDERR "Processing $fasta_file_num of $num_fasta_files\n";
#-----------------------------+
# GET ROOT FILE NAME |
#-----------------------------+
if ($ind_fasta_file =~ m/(.*)\.masked\.fasta$/ ) {
$name_root = "$1";
}
elsif ($ind_fasta_file =~ m/(.*)\.fasta$/ ) {
$name_root = "$1";
}
elsif ($ind_fasta_file =~ m/(.*)\.fa$/ ) {
$name_root = "$1";
}
elsif ($ind_fasta_file =~ m/(.*)\.txt$/ ) {
$name_root = "$1";
}
else {
$name_root = "UNDEFINED";
}
my $in_fasta_path = $indir.$ind_fasta_file;
my $out_fasta_path = $outdir.$ind_fasta_file;
print STDERR "IN: $in_fasta_path\n" if $verbose;
print STDERR "OUT: $out_fasta_path\n" if $verbose;
my $seq_in = Bio::SeqIO->new('-file' => "<$in_fasta_path");
my $seq_out = Bio::SeqIO->new('-file' => ">$out_fasta_path");
# open (FASTAOUT, ">$out_fasta_path") ||
# die "Can not open output file:\n$out_fasta_path\n";
#-----------------------------+
# PROCESS AND FLIP |
#-----------------------------+
while (my $seq = $seq_in->next_seq) {
my $revcom = $seq->revcom();
my $id = $seq->display_id();
$id = $id."_rev";
$revcom->display_id($id);
$seq_out->write_seq($revcom);
}
}
exit;