-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_sequences_blastTable.pl
59 lines (59 loc) · 2.03 KB
/
extract_sequences_blastTable.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!usr/bin/perl -w
use strict;
use warnings;
# *************************************************
# * Copyright 2013 Shakti Kumar
# *
# * extract_sequences_blastTable.pl is a part of highly diverse and essential protease extraction.
# * It free software. User can redistribute it and/or modify
# * It under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * (at your option) any later version.
# *
# * It is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License.
# * If not, see <http://www.gnu.org/licenses/>.
# ***************************************************
my(@SeqArray, @AllProteaseSeqArray, $i, $j, $limit);
my(@AllFastaLineArray, $line, @lineArray, $k, $lwlimit, $uplimit);
open(INFILE1, "those_sequences_whose_Eval_lessthaneq_04_and_seqID_lessthaneq_35");
open(INFILE2, "helminth_protozoan_protease_sequences.fasta");
open(OUTFILE, ">Highly_Diverse_and_Essential_sequences");
while(@SeqArray = <INFILE2>)
{
@AllProteaseSeqArray = @SeqArray;
}
push(@AllProteaseSeqArray, ">");
$j = 0;
for($i = 0; $i<@AllProteaseSeqArray; $i++)
{
if($AllProteaseSeqArray[$i] =~ /^>/)
{
$AllFastaLineArray[$j] = $i;
$j++;
}
}
while($line = <INFILE1>)
{
$line =~ s/^\s+|\s+$//g;
@lineArray = split(/\s+/, $line);
for($k=0; $k<@AllFastaLineArray-1; $k++)
{
$lwlimit = $AllFastaLineArray[$k];
$uplimit = $AllFastaLineArray[$k+1];
if($AllProteaseSeqArray[$lwlimit] =~ /$lineArray[0]/g)
{
print OUTFILE ($AllProteaseSeqArray[$lwlimit]);
$limit = $lwlimit + 1;
while($limit<$uplimit)
{
print OUTFILE ($AllProteaseSeqArray[$limit]);
$limit++;
}
}
}
}