-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathannotate_admire.pl
143 lines (119 loc) · 4.71 KB
/
annotate_admire.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
=head1 LICENSE
Copyright [2017] [Ninad Oak]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head2 USAGE
## Step-by-step Guide:
(1) Download the github repository nroak/ADmiRE/annotate_tsv_using_perl/
(2) cd /annotate_tsv_using_perl
(3) gzip -d ADmiRE.tab.gz
(4) perl annotate_admire.pl [--input INPUT_FILE] [--output OUTPUT_FILE] [--admire_path=PATH] [--chr=NUMBER] [--pos=NUMBER]
# Detailed desription of the input options:
--input: INPUT_FILE
--output: OUTPUT_FILE (Default: INPUT_FILE.ADmiRE.tab)
--admire_path: Path to ADmiRE.tab database. (Default: same directory with annotate_admire.pl)
--chr: Column number in the INPUT_FILE with chromosome information. (Default: 1 -1st column)
--pos: Column number in the INPUT_FILE with base position information. (Default: 2 -2nd column)
=cut
=head1 HEADERS
ADmiRE Headers
1 Chromosome
2 Start
3 Stop
4 MIRNA
5 MIRNA_Domain
6 Family_Name
7 Precursor_Pos
8 Predicted_Motif
9 Mature_Name
10 Mature_Pos
11 High_Confidence
12 Robust_FANTOM5
13 Conserved_ADmiRE
14 AF_Quantile_gnomAD
15 HMDD_KnownDisease_PMID
16 HMDD_ValidatedTargets_Disease_PMID
17 miRTarBase_NumberOfTargets
18 miRTarBase_TargetGene_ValidationType
19 miRTarBase_Reference_PMID
20 TranscriptionFactor_PMID
21 Phylop_100way
22 Phastcons_100way
23 gnomAD_Count
24 gnomAD_MAF
=cut
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long qw(GetOptions);
use Pod::Usage;
use File::Basename;
my $dirname = dirname(__FILE__);
$"="\t";
my $input;
my $output;
my $admire_path= $dirname."/";
my $chr = "1";
my $pos = "2";
my $help;
GetOptions ("input|i=s" => \$input, # string
"output|o:s" => \$output, # string
"admire_path|p:s" => \$admire_path, # string
"chr|c:i" => \$chr, # numeric
"pos|c:i" => \$pos, # numeric
'help|h' => \$help) # flag
or die("Usage: perl annotate_admire.pl [--input FILE] [--output FILE] [--admire_path=PATH] [--chr=NUMBER] [--pos=NUMBER]\n");
if (defined $input) {
} else {
print "Option input requires an argument
Usage: perl annotate_admire.pl [--input FILE] [--output FILE] [--admire_path=PATH] [--chr=NUMBER] [--pos=NUMBER]\n";
exit 1
}
if (defined $output) {
} else {
$output=$input.".admire.tab";
}
##### READ ADMIRE INTO HASH
my $input_db= $admire_path."ADmiRE.tab";
my %mirbase;
open FD, "<$input_db" or die "No Database";
foreach my $line_db (<FD>){
$line_db =~ s/[\r\n]//g;
my @entry_db = split (/\t/,$line_db);
my $hash_key1=$entry_db[0]."_".$entry_db[1];
my $hash_key2=$entry_db[3];
my @hash_value= @entry_db[0..$#entry_db];
if ($entry_db[0] ne "Chromosome" ){push (@{$mirbase{$hash_key1}{$hash_key2}}, @hash_value);}
}
close FD;
## READ VARIANT FILE AND ANNOTATE
open FP, "<$input" or die "No Variant File";
open FO, ">$output" or die "No Output File";
my @empty;
push @empty, "NA" foreach(1..21);
$chr -= 1;
$pos -= 1;
ALLMUT: while (my $line_maf = <FP>){
$line_maf =~ s/\r+\n$//g;
#print $line_maf."\n";
my @entry_maf = split ("\t",$line_maf);
chomp @entry_maf;
if ($line_maf =~ /^\#/ || $line_maf =~ /^CHROM/ || $line_maf =~ /^Hugo_Symbol/|| $line_maf =~ /^chr/) { print FO "@entry_maf\tMIRNA\tMIRNA_Domain\tFamily_Name\tPrecursor_Pos\tPredicted_Motif\tMature_Name\tMature_Pos\tHigh_Confidence\tRobust_FANTOM5\tConserved_ADmiRE\tAF_Quantile_gnomAD\tHMDD_KnownDisease_PMID\tHMDD_ValidatedTargets_Disease_PMID\tmiRTarBase_NumberOfTargets\tmiRTarBase_TargetGene_ValidationType\tmiRTarBase_Reference_PMID\tTranscriptionFactor_PMID\tPhylop_100way\tPhastcons_100way\tgnomAD_Count\tgnomAD_MAF\n";}
elsif ($line_maf !~ /^\#/ && $line_maf !~ /^CHROM/ && $line_maf !~ /^Hugo_Symbol/) {
my $db_key=$entry_maf[$chr]."_".$entry_maf[$pos];
if ( keys %{$mirbase{$db_key}}){}
else {print FO "@entry_maf\t@empty\n";next ALLMUT;}
foreach my $db_key2 ( keys %{$mirbase{$db_key}}) {
print FO "@entry_maf\t@{$mirbase{$db_key}{$db_key2}}[3..23]\n";
}
next ALLMUT;
}
}