-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
Copy pathstrongly_connected_components.h
223 lines (203 loc) · 9.88 KB
/
strongly_connected_components.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
// Copyright 2010-2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This code computes the strongly connected components of a directed graph,
// and presents them sorted by reverse topological order.
//
// It implements an efficient version of Tarjan's strongly connected components
// algorithm published in: Tarjan, R. E. (1972), "Depth-first search and linear
// graph algorithms", SIAM Journal on Computing.
//
// A description can also be found here:
// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
//
// SIMPLE EXAMPLE:
//
// Fill a vector<vector<int>> graph; representing your graph adjacency lists.
// That is, graph[i] contains the nodes adjacent to node #i. The nodes must be
// integers in [0, num_nodes). Then just do:
//
// vector<vector<int>> components;
// FindStronglyConnectedComponents(
// static_cast<int>(graph.size()), graph, &components);
//
// The nodes of each strongly connected components will be listed in each
// subvector of components. The components appear in reverse topological order:
// outgoing arcs from a component will only be towards earlier components.
//
// IMPORTANT: num_nodes will be the number of nodes of the graph. Its type
// is the type used internally by the algorithm. It is why it is better to
// convert it to int or even int32_t rather than using size_t which takes 64
// bits.
#ifndef UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
#define UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_
#include <limits>
#include <vector>
#include "absl/log/check.h"
#include "absl/types/span.h"
// Finds the strongly connected components of a directed graph. It is templated
// so it can be used in many contexts. See the simple example above for the
// easiest use case.
//
// The requirement of the different types are:
// - The type NodeIndex must be an integer type representing a node of the
// graph. The nodes must be in [0, num_nodes). It can be unsigned.
// - The type Graph must provide a [] operator such that the following code
// iterates over the adjacency list of the given node:
// for (const NodeIndex head : graph[node]) {}
// - The type SccOutput must implement the function:
// emplace_back(NodeIndex const* begin, NodeIndex const* end);
// It will be called with the connected components of the given graph as they
// are found (In the reverse topological order).
//
// More practical details on the algorithm:
// - It deals properly with self-loop and duplicate nodes.
// - It is really fast! and work in O(nodes + edges).
// - Its memory usage is also bounded by O(nodes + edges) but in practice it
// uses less than the input graph.
template <typename NodeIndex, typename Graph, typename SccOutput>
void FindStronglyConnectedComponents(NodeIndex num_nodes, const Graph& graph,
SccOutput* components);
// A simple custom output class that just counts the number of SCC. Not
// allocating many vectors can save both space and speed if your graph is large.
//
// Note: If this matters, you probably don't want to use vector<vector<int>> as
// an input either. See StaticGraph in ortools/graph/graph.h
// for an efficient graph data structure compatible with this algorithm.
template <typename NodeIndex>
struct SccCounterOutput {
int number_of_components = 0;
void emplace_back(NodeIndex const* b, NodeIndex const* e) {
++number_of_components;
}
// This is just here so this class can transparently replace a code that
// use vector<vector<int>> as an SccOutput, and get its size with size().
int size() const { return number_of_components; }
void clear() { number_of_components = 0; }
};
// This implementation is slightly different than a classical iterative version
// of Tarjan's strongly connected components algorithm. But basically it is
// still an iterative DFS. We use a class so memory can be reused if one needs
// to compute many SCC in a row. It also allows more complex behavior in the
// Graph or SccOutput class that might inspect the current state of the
// algorithm.
//
// TODO(user): Possible optimizations:
// - Try to reserve the vectors which sizes are bounded by num_nodes.
// - Use an index rather than doing push_back(), pop_back() on them.
template <typename NodeIndex, typename Graph, typename SccOutput>
class StronglyConnectedComponentsFinder {
public:
void FindStronglyConnectedComponents(const NodeIndex num_nodes,
const Graph& graph,
SccOutput* components) {
// Reset the class fields.
scc_stack_.clear();
scc_start_index_.clear();
node_index_.assign(num_nodes, 0);
node_to_process_.clear();
// Caching the pointer to this vector.data() avoid re-fetching it and help.
absl::Span<NodeIndex> node_index = absl::MakeSpan(node_index_);
// Optimization. This will always be equal to scc_start_index_.back() except
// when scc_stack_ is empty, in which case its value does not matter.
NodeIndex current_scc_start = 0;
// Loop over all the nodes not yet settled and start a DFS from each of
// them.
for (NodeIndex base_node = 0; base_node < num_nodes; ++base_node) {
if (node_index[base_node] != 0) continue;
DCHECK_EQ(0, node_to_process_.size());
node_to_process_.push_back(base_node);
do {
const NodeIndex node = node_to_process_.back();
const NodeIndex index = node_index[node];
if (index == 0) {
// We continue the dfs from this node and set its 1-based index.
scc_stack_.push_back(node);
current_scc_start = scc_stack_.size();
node_index[node] = current_scc_start;
scc_start_index_.push_back(current_scc_start);
// Enqueue all its adjacent nodes.
NodeIndex min_head_index = kSettledIndex;
for (const NodeIndex head : graph[node]) {
const NodeIndex head_index = node_index[head];
if (head_index == 0) {
node_to_process_.push_back(head);
} else {
// Note that if head_index == kSettledIndex, nothing happens.
min_head_index = std::min(min_head_index, head_index);
}
}
// Update the start of this strongly connected component.
// Note that scc_start_index_ can never be empty since it first
// element is 1 and by definition min_head_index is 1-based and can't
// be 0.
while (current_scc_start > min_head_index) {
scc_start_index_.pop_back();
current_scc_start = scc_start_index_.back();
}
} else {
node_to_process_.pop_back();
if (current_scc_start == index) {
// We found a strongly connected component.
components->emplace_back(&scc_stack_[current_scc_start - 1],
&scc_stack_[0] + scc_stack_.size());
for (int i = current_scc_start - 1; i < scc_stack_.size(); ++i) {
node_index[scc_stack_[i]] = kSettledIndex;
}
scc_stack_.resize(current_scc_start - 1);
scc_start_index_.pop_back();
current_scc_start =
scc_start_index_.empty() ? 0 : scc_start_index_.back();
}
}
} while (!node_to_process_.empty());
}
}
// Advanced usage. This can be used in either the Graph or SccOutput template
// class to query the current state of the algorithm. It allows to build more
// complex variant based on the core DFS algo.
bool NodeIsInCurrentDfsPath(NodeIndex node) const {
return node_index_[node] > 0 && node_index_[node] < kSettledIndex;
}
private:
static constexpr NodeIndex kSettledIndex =
std::numeric_limits<NodeIndex>::max();
// Each node expanded by the DFS will be pushed on this stack. A node is only
// popped back when its strongly connected component has been explored and
// outputted.
std::vector<NodeIndex> scc_stack_;
// This is equivalent to the "low link" of a node in Tarjan's algorithm.
// Basically, scc_start_index_.back() represent the 1-based index in
// scc_stack_ of the beginning of the current strongly connected component.
// All the nodes after this index will be on the same component.
std::vector<NodeIndex> scc_start_index_;
// Each node is assigned an index which changes 2 times in the algorithm:
// - Everyone starts with an index of 0 which means unexplored.
// - The first time they are explored by the DFS and pushed on scc_stack_,
// they get their 1-based index on this stack.
// - Once they have been processed and outputted to components, they are said
// to be settled, and their index become kSettledIndex.
std::vector<NodeIndex> node_index_;
// This is a well known way to do an efficient iterative DFS. Each time a node
// is explored, all its adjacent nodes are pushed on this stack. The iterative
// dfs processes the nodes one by one by popping them back from here.
std::vector<NodeIndex> node_to_process_;
};
// Simple wrapper function for most usage.
template <typename NodeIndex, typename Graph, typename SccOutput>
void FindStronglyConnectedComponents(const NodeIndex num_nodes,
const Graph& graph,
SccOutput* components) {
StronglyConnectedComponentsFinder<NodeIndex, Graph, SccOutput> helper;
return helper.FindStronglyConnectedComponents(num_nodes, graph, components);
}
#endif // UTIL_GRAPH_STRONGLY_CONNECTED_COMPONENTS_H_