-
Notifications
You must be signed in to change notification settings - Fork 24
/
agp2fasta.pl
executable file
·49 lines (37 loc) · 1.02 KB
/
agp2fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/perl -w
#
# script AGPFILE FASTA
use strict;
use Bio::DB::Fasta;
use Bio::Seq;
use Bio::SeqIO;
open(AGP,shift()) or die $!;
my %chr;
my $db = Bio::DB::Fasta->new(shift());
my $seq_out = Bio::SeqIO->new('-file' => ">supercontigs.fa",'-format' => 'fasta');
my ($lastid,$last_seq);
while(<AGP>){
chomp;
my @F = split /\s+/;
$lastid=$F[0] unless $lastid;
if ($F[0] ne $lastid){
print_seq($lastid,$last_seq);
$lastid=$F[0];
$last_seq='';
}
# extend temp string if it's too short
do{$last_seq.= ' ' x 10_000;} while length $last_seq < $F[2] ;
if($F[4] !~ m/N/){
my ($start,$stop) = $F[8] eq '+'?($F[6], $F[7]):($F[7], $F[6]);
my $s = substr $last_seq, $F[1], $F[7], $db->seq($F[5],$start,$stop);
}else{
my $s = substr $last_seq, $F[1], $F[5], "N" x $F[5] ;
}
}
print_seq($lastid,$last_seq);
sub print_seq{
my($id,$seq)=@_;
$seq=~s/\s+//g;
my $seqobj = Bio::Seq->new( -display_id => "$id", -seq => $seq);
$seq_out->write_seq($seqobj);
}