-
Notifications
You must be signed in to change notification settings - Fork 4
/
Dimob.pl
executable file
·156 lines (119 loc) · 4.17 KB
/
Dimob.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env perl
=head1 NAME
IslandPath-DIMOB
=head1 DESCRIPTION
Script to run IslandPath-DIMOB genomic island prediction on a given genome
=head1 SYNOPSIS
./Dimob.pl <genome.gbk> <outputfile.txt>
Example:
./Dimob.pl example/NC_003210.gbk NC_003210_GIs.txt
=head1 AUTHORS
Claire Bertelli
Email: [email protected]
Brinkman Laboratory
Simon Fraser University
=head1 LAST MAINTAINED
December 16th, 2016
=cut
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
use Data::Dumper;
use File::Copy;
use File::Basename;
use File::Spec;
use File::Path;
use Log::Log4perl qw(get_logger :nowarn);
use File::Temp qw/ :mktemp /;
use Cwd;
# use local Dimob libraries
use FindBin qw($RealBin);
use lib "$RealBin/lib";
use GenomeUtils;
use Dimob;
MAIN: {
# config files
my $cwd = getcwd;
my $cfname = "$RealBin/Dimob.config";
my $logger;
#my $logger_conf = "$RealBin/logger.conf";
# usage help
my $usage = "Usage:\n./Dimob.pl <genome.gbk> <outputfile.txt>\nExample:\n./Dimob.pl example/NC_003210.gbk NC_003210_GIs.txt\n";
my ($inputfile, $outputfile) = @ARGV;
# Check that input file and output file are specified or die and print help message
unless(defined($inputfile) && defined($outputfile)){
print $usage;
exit;
}
# Transform relative path to absolute path and
# check that input file is readable
$inputfile = File::Spec -> rel2abs($inputfile);
unless( -f $inputfile && -r $inputfile ) {
print "Error: $inputfile is not readable";
exit;
}
# Create a dimob object
my $dimob_obj = Dimob->new(
{cfg_file => $cfname,
bindir => $RealBin,
workdir => $cwd,
MIN_GI_SIZE => 2000,
extended_ids => 1
}
);
# Recover the config from file, initialized during creation dimob_obj
my $cfg = Dimob::Config->config;
$cfg->{logger_conf} = "$RealBin/" . $cfg->{logger_conf};
$cfg->{hmmer_db} = "$RealBin/" . $cfg->{hmmer_db};
# Check that the logger exists and initializes it
print $cfg->{logger_conf};
if($cfg->{logger_conf} && ( -r $cfg->{logger_conf})) {
Log::Log4perl::init($cfg->{logger_conf});
$logger = Log::Log4perl->get_logger;
$logger->debug("Logging initialized");
}
# Create a tmp directory to store intermediate results, copy the input file to the tmp
$logger->info("Creating temp directory with needed files");
my($filename, $dirs, $suffix) = fileparse($inputfile, qr/\.[^.]+$/);
my $tmp_path = mkdtemp($dirs . "dimob_tmpXXXXXXXXXX");
if (! -d $tmp_path)
{
my $dirs = eval { mkpath($tmp_path) };
die "Failed to create $tmp_path: $@\n" unless $dirs;
}
copy($inputfile,$tmp_path) or die "Failed to copy $inputfile: $!\n";
$inputfile = File::Spec->catfile($tmp_path,$filename);
# update workdir in genome_obj with the temporary directory
$dimob_obj -> {workdir} = $tmp_path;
######
# From an embl or genbank file regenerate a ptt, ffn, and faa file needed by dimob.pm
# create a genome object from package GenomeUtils
my $genome_obj = GenomeUtils->new();
$logger->info("This is the $inputfile");
# check the gbk/embl file format
$genome_obj->read_and_check($genome_obj, $inputfile . $suffix);
# read the gbk/embl file and convert it to all files needed
my $genome_name = $genome_obj->{'base_filename'};
$genome_obj->read_and_convert($inputfile . $suffix, $genome_name);
######
# Runs IslandPath-DIMOB on the genome files
$logger->info("Running IslandPath-DIMOB");
my @islands = $dimob_obj->run_dimob($inputfile);
$logger->info("Printing results");
my $i = 1;
open my $fhgd, '>', $outputfile or die "Cannot open output.txt: $!";
foreach my $island (@islands) {
my $start = $island->[0];
my $end = $island->[1];
print $fhgd "GI_$i\t$start\t$end\n";
$i++;
}
close $fhgd;
$logger->info("Removing tmp files");
unless(unlink glob "$inputfile.*") {
$logger->error("Can't remove $inputfile: $!");
}
unless(rmdir $tmp_path) {
$logger->error("Can't remove $tmp_path: $!");
}
}