-
Notifications
You must be signed in to change notification settings - Fork 21
/
NCList.pm
129 lines (106 loc) · 3.82 KB
/
NCList.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#After
#Alekseyenko, A., and Lee, C. (2007).
#Nested Containment List (NCList): A new algorithm for accelerating
# interval query of genome alignment and interval databases.
#Bioinformatics, doi:10.1093/bioinformatics/btl647
#http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btl647v1
package NCList;
use strict;
use warnings;
use List::Util qw(max);
=head2 new
Title : new
Usage : NCList->new($start, $end, $setSublist, $featList)
Function: create an NCList
Returns : an NCList object
Args : $featList is a reference to an array of arrays;
each of the inner arrays represents an interval.
$start is a reference to a sub that, given an inner array from
$featList, returns the start position of the interval
represented by that inner array.
$end is a reference to a sub that, given an inner array from
$featList, returns the end position of the interval
represented by that inner array.
$setSublist is a reference to a sub that, given an inner array from
$featList and a sublist reference, sets the "Sublist" attribute
on the array to the sublist.
=cut
sub new {
my ($class, $start, $end, $setSublist, $featList) = @_;
my @features = sort {
if ($start->($a) != $start->($b)) {
$start->($a) - $start->($b);
} else {
$end->($b) - $end->($a);
}
} @$featList;
#@sublistStack is a list of all the currently relevant sublists
#(one for each level of nesting)
my @sublistStack;
#$curlist is the currently active sublist
my $curList = [];
my $self = { 'topList' => $curList,
'setSublist' => $setSublist,
'count' => scalar( @features ),
'minStart' => ( @features ? $start->($features[0]) : undef ),
};
bless $self, $class;
push @$curList, $features[0] if @features;
my $maxEnd = @features ? $end->($features[0]) : undef;
my $topSublist;
for (my $i = 1; $i < @features; $i++) {
$maxEnd = max( $maxEnd, $end->( $features[$i] ));
#if this interval is contained in the previous interval,
if ($end->($features[$i]) < $end->($features[$i - 1])) {
#create a new sublist starting with this interval
push @sublistStack, $curList;
$curList = [$features[$i]];
$setSublist->($features[$i - 1], $curList);
} else {
#find the right sublist for this interval
while (1) {
#if we're at the top level list,
if ($#sublistStack < 0) {
#just add the current feature
push @$curList, $features[$i];
last;
} else {
$topSublist = $sublistStack[$#sublistStack];
#if the last interval in the top sublist ends
#after the end of the current interval,
if ($end->($topSublist->[$#{$topSublist}])
> $end->($features[$i]) ) {
#then curList is the first (deepest) sublist
#that the current feature fits into, and
#we add the current feature to curList
push @$curList, $features[$i];
last;
} else {
#move on to the next shallower sublist
$curList = pop @sublistStack;
}
}
}
}
}
$self->{maxEnd} = $maxEnd;
return $self;
}
sub maxEnd {
return shift->{maxEnd};
}
sub minStart {
return shift->{minStart};
}
sub nestedList {
return shift->{topList};
}
1;
=head1 AUTHOR
Mitchell Skinner E<lt>[email protected]<gt>
Copyright (c) 2007-2009 The Evolutionary Software Foundation
This package and its accompanying libraries are free software; you can
redistribute it and/or modify it under the terms of the LGPL (either
version 2.1, or at your option, any later version) or the Artistic
License 2.0. Refer to LICENSE for the full license text.
=cut