-
Notifications
You must be signed in to change notification settings - Fork 5
/
blast_matches_n_inx_to_dagchainer.pl
89 lines (65 loc) · 2.13 KB
/
blast_matches_n_inx_to_dagchainer.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin;
use lib ("$FindBin::Bin/../PerlLib");
use Gene_obj;
use Gene_obj_indexer;
use Carp;
use Getopt::Long qw(:config no_ignore_case bundling);
my $usage = <<_EOH_;
################################################################
#
# --match_file btab or m2fmt (wu-blast mformat=2 file)
# --format btab | m2fmt
# --query_inx query inx filel (see index_gff3_file.pl)
# --search_inx search database inx file
#
################################################################
_EOH_
;
my ($match_file, $query_inx, $search_inx, $format);
&GetOptions ( "match_file=s" => \$match_file,
"format=s" => \$format,
"query_inx=s" => \$query_inx,
"search_inx=s" => \$search_inx,
);
unless ($match_file && $query_inx && $search_inx) {
die $usage;
}
unless ($format && $format =~ /^(btab|m2fmt)$/) {
die $usage;
}
main: {
my $query_gene_indexer = new Gene_obj_indexer( { "use" => $query_inx } );
my $search_gene_indexer = new Gene_obj_indexer( { "use" => $search_inx } );
open (my $fh, $match_file) or die $!;
while (<$fh>) {
chomp;
my @x = split (/\t/);
my ($query_acc, $search_acc, $p_value);
if ($format eq 'btab') {
($query_acc, $search_acc, $p_value) = ($x[0], $x[5], $x[19]);
}
elsif ($format eq 'm2fmt') {
($query_acc, $search_acc, $p_value) = (@x[0..2]);
}
else {
die "Error, don't understand format: $format";
# should never get here anyway due to format check at top
}
eval {
my $query_gene = $query_gene_indexer->get_gene($query_acc);
my $search_gene = $search_gene_indexer->get_gene($search_acc);
my ($query_mol, $query_end5, $query_end3) = ($query_gene->{asmbl_id}, $query_gene->get_coords());
my ($search_mol, $search_end5, $search_end3) = ($search_gene->{asmbl_id}, $search_gene->get_coords());
print "$query_mol\t$query_mol:$query_acc\t$query_end5\t$query_end3\t"
. "$search_mol\t$search_mol:$search_acc\t$search_end5\t$search_end3\t"
. "$p_value\n";
};
if ($@) {
print STDERR $@;
}
}
exit(0);
}