-
Notifications
You must be signed in to change notification settings - Fork 7
/
gff2gtf.pl
executable file
·98 lines (79 loc) · 1.99 KB
/
gff2gtf.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use File::Basename;
use Bio::FeatureIO;
use FileHandle;
use POSIX 'isatty';
my $infile=$ARGV[0];
my $fh;
if ($infile) {
die "Wrong input file ($infile)" unless (-e $infile);
$fh = FileHandle->new;
$fh->open("<$infile");
} else {
unless (isatty(*STDIN)) {
$fh = \*STDIN;
} else {
die "Missing input file or STDIN data";
}
}
my %parent;
my @data;
my %GeneID;
while(<$fh>) {
if ($_=~/^#/) {
next;
} else {
chomp;
my (@F) = split(/\t/, $_);
if ($F[2] =~ /exon|CDS/i) {
push(@data, \@F);
} else {
my ($ID)=$F[8]=~/ID=([^;]+)\b/;
die "Missing ID ($_)" unless ($ID);
if ($F[2] ne 'gene') {
my ($Parent)=$F[8]=~/Parent=([^;]+)\b/;
die "Missing Parent ($_)" unless ($Parent);
$parent{ $ID } = $Parent;
} else {
my ($GID)=$F[8]=~/GeneID:([^;,]+)\b/;
unless ($GID) {
my ($gene)=$F[8]=~/gene=([^;]+)\b/;
if ($gene) {
($GID)=$gene=~/LOC(\d+)/;
}
}
if ($GID) {
$GeneID{ $ID } = $GID;
}
}
}
}
}
$fh->close();
foreach my $ar_data (@data) {
my @F = @{ $ar_data };
my ($ID)=$F[8]=~/ID=([^;]+)\b/;
die "Missing ID ($_)" unless ($ID);
my ($Parent)=$F[8]=~/Parent=([^;]+)\b/;
die "Missing Parent ($_)" unless ($Parent);
my ($gene)=$F[8]=~/gene=([^;]+)\b/;
my (@transcript_ids, $gene_id);
@transcript_ids=split(/,/,$Parent);
foreach my $transcript_id (@transcript_ids) {
unless (exists $parent{ $transcript_id }) {
die "Not found Parent (TranscriptID) for $transcript_id"
} else {
if ($F[2] =~/RNA/) {
unless (exists $GeneID{ $parent{ $transcript_id } }) {
die "Not found Parent (GeneID) for $parent{ $transcript_id }";
}
}
}
$gene_id=((exists $GeneID{ $parent{ $transcript_id } }) ? $GeneID{ $parent{ $transcript_id } } : $parent{ $transcript_id });
$F[8]="gene_id \"$gene_id\"; ".(($gene) ? "gene_name=\"$gene\"; " : "")."transcript_id \"$transcript_id\";";
print join("\t", @F),"\n";
}
}