-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenmark_to_gff.pl
78 lines (59 loc) · 1.34 KB
/
genmark_to_gff.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
my $line;
my @temp;
my $start;
my $end;
my $score;
my $frame;
my $brin;
my $note;
my $scoreatg;
my $cpt;
my $cpt2;
my $prev_line;
my @temp2;
my $endatg;
$cpt = 0;
$cpt2 = 0;
$frame = '.';
open (HMM, '<', "genmark.out") or die "genmark.out isn't in this file !\n";
open (GFF, '>', "genmark.gff");
while(<HMM>)
{
next if ($. <22); #selon le formatage de genmark, les CDS commencent toujours ligne 22
$line = $_;
@temp = split(/\s+/, $line); #on utilise le diviseur espace
@temp2 = split(/\s+/, $prev_line);
if ($temp2[2] eq $temp[2])
{
$cpt2++;
$note = "GM_CDS_$cpt.$cpt2";
}
else
{
$cpt++;
$note = "GM_CDS_$cpt";
}
$start = $temp[1];
$end = $temp[2];
$score = $temp[6];
$brin = $temp[3];
if(substr($brin,0,11) eq 'complement')
{
$brin = '-';
}
else
{
$brin = '+';
}
$endatg = $start + 2;
$scoreatg = $temp[7];
if (substr($line,0,4) eq "List")
{
last;
}
print GFF "1_5404\tGenMark\tCDS\t$start\t$end\t$score\t$brin\t$frame\t$note\n" unless $temp[1] eq '';
print GFF "1_5404\tGenMark\tATG\t$start\t$endatg\t$scoreatg\t$brin\t$frame\n" unless $temp[1] eq '';
$prev_line = $line;
}
close(HMM);
close(GFF);