-
Notifications
You must be signed in to change notification settings - Fork 0
/
taxid2wgs.pl
130 lines (100 loc) · 3.49 KB
/
taxid2wgs.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env perl
# $Id: taxid2wgs.pl,v 1.6 2017/06/23 13:53:09 camacho Exp $
use strict;
use warnings;
use LWP::UserAgent;
use Getopt::Long;
use Pod::Usage;
use autodie;
use constant URL => "https://www.ncbi.nlm.nih.gov/blast/BDB2EZ/taxid2wgs.cgi";
use constant INCLUDE => "INCLUDE_TAXIDS";
use constant EXCLUDE => "EXCLUDE_TAXIDS";
use constant USER_AGENT => "taxid2wgs.pl/1.1";
use constant EXTN => ".nvl";
my @includes = ();
my @excludes = ();
my $url_api_ready = 0;
my ($title, $alias_file);
my $verbose = 0;
my $help_requested = 0;
GetOptions("excludes=i" => \@excludes,
"title=s" => \$title,
"alias_file=s" => \$alias_file,
"verbose|v+" => \$verbose,
"url_api_ready" => \$url_api_ready,
"help|?" => \$help_requested) || pod2usage(2);
pod2usage(-verbose=>2) if ($help_requested);
pod2usage(-msg => "Must provide taxids", -verbose=>2) unless (@ARGV);
pod2usage(-msg => "Alias file cannot be empty", -verbose=>2)
if (defined($alias_file) and (length($alias_file) == 0));
pod2usage(-msg => "Title must be provided with alias_file", -verbose=>2)
if (defined($title) and not defined($alias_file));
pod2usage(-msg => "Title cannot be empty", -verbose=>2)
if (defined($title) and (length($title) == 0));
my @projects = &get_wgs_projects();
die "Failed to get taxids\n" unless (@projects);
if (defined $alias_file) {
map { s,WGS_VDB://,, } @projects;
&print_alias($alias_file, $title, \@projects);
} else {
map { s,WGS_VDB://,, } @projects unless ($url_api_ready);
print join("\n", @projects);
}
sub get_wgs_projects
{
my $url = URL . "?" . INCLUDE . "=" . join(",", @ARGV);
$url .= "&" . EXCLUDE . "=" . join(",", @excludes) if scalar (@excludes);
my $ua = LWP::UserAgent->new;
$ua->agent(USER_AGENT);
$ua->show_progress(1) if ($verbose >= 3);
if ($verbose >= 2) {
$ua->add_handler("request_send", sub { shift->dump; return; });
$ua->add_handler("response_done", sub { shift->dump; return; });
}
my $req = HTTP::Request->new(POST => $url);
my $res = $ua->request($req);
print $res->decoded_content if $verbose;
my @retval;
if ($res->is_success) {
@retval = split(/\s+/, $res->content);
} else {
print STDERR $res->status_line, "\n";
}
return @retval;
}
sub print_alias
{
use Time::localtime;
my $basename = shift;
my $title = shift;
my $projects = shift;
open(my $out, ">", $basename . EXTN);
print $out "#\n# Alias file created by " . USER_AGENT;
print $out " on " . ctime(time) . "\n";
print $out "TITLE $title\n" if (defined $title and length $title);
print $out "VDBLIST " . join(" ", @$projects) . "\n";
close ($out);
}
__END__
=head1 NAME
B<taxid2wgs.pl> - Retrieve WGS projects for given NCBI taxonomy IDs
=head1 SYNOPSIS
taxid2wgs.pl [options] <taxid1> [ <taxid2> ... <taxidN> ]
=head1 ARGUMENTS
=over
=item B<-exclude>
Accepts taxids to exclude from the output (default: None)
=item B<-alias_file>
File base name (no extension) to save results into an alias file.
=item B<-title>
Title to include in the generated alias file. Required if alias_file is provided.
=item B<-url_api_ready>
Produce output that can be used in the NCBI URL API (default: false)
=item B<-verbose>, B<-v>
Produce verbose output, can be specified multiple times for increased verbosity (default: false)
=item B<-help>, B<-?>
Displays this man page.
=back
=head1 AUTHOR
Christiam Camacho ([email protected])
=cut