forked from google/or-tools
-
Notifications
You must be signed in to change notification settings - Fork 9
/
set_cover_model.h
391 lines (320 loc) · 15.1 KB
/
set_cover_model.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
// Copyright 2010-2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
#define OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
#include <cstdint>
#include <string>
#include <vector>
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "ortools/algorithms/set_cover.pb.h"
#include "ortools/base/strong_int.h"
#include "ortools/base/strong_vector.h"
// Representation class for the weighted set-covering problem.
//
// Let E be a "universe" set, let (S_j) be a family (j in J) of subsets of E,
// and c_j costs associated to each S_j. Note that J = {j in 1..|S|}.
//
// The minimum-cost set-covering problem consists in finding K (for covering),
// a subset of J such that the union of all the S_j for k in K is equal to E
// (the subsets indexed by K "cover" E), while minimizing total cost sum c_k (k
// in K).
//
// In Mixed-Integer Programming and matrix terms, the goal is to find values
// of binary variables x_j, where x_j is 1 when subset S_j is in K, 0
// otherwise, that minimize the sum of c_j * x_j subject to M.x >= 1. Each row
// corresponds to an element in E.
//
// The matrix M for linear constraints is defined as follows:
// - it has as many rows as there are elements in E.
// - its columns are such that M(i, j) = 1 iff the i-th element of E is present
// in S_j.
//
// We also use m to denote |E|, the number of elements, and n to denote |S|, the
// number of subsets.
// Finally, NNZ denotes the numbers of non-zeros, i.e. the sum of the
// cardinalities of all the subsets.
namespace operations_research {
// Basic non-strict type for cost. The speed penalty for using double is ~2%.
using Cost = double;
// Base non-strict integer type for counting elements and subsets.
// Using ints makes it possible to represent problems with more than 2 billion
// (2e9) elements and subsets. If need arises one day, BaseInt can be split
// into SubsetBaseInt and ElementBaseInt.
// Quick testing has shown a slowdown of about 20-25% when using int64_t.
using BaseInt = int32_t;
// We make heavy use of strong typing to avoid obvious mistakes.
// Subset index.
DEFINE_STRONG_INT_TYPE(SubsetIndex, BaseInt);
// Element index.
DEFINE_STRONG_INT_TYPE(ElementIndex, BaseInt);
// Position in a vector. The vector may either represent a column, i.e. a
// subset with all its elements, or a row, i,e. the list of subsets which
// contain a given element.
DEFINE_STRONG_INT_TYPE(ColumnEntryIndex, BaseInt);
DEFINE_STRONG_INT_TYPE(RowEntryIndex, BaseInt);
using SubsetRange = util_intops::StrongIntRange<SubsetIndex>;
using ElementRange = util_intops::StrongIntRange<ElementIndex>;
using ColumnEntryRange = util_intops::StrongIntRange<ColumnEntryIndex>;
using SubsetCostVector = util_intops::StrongVector<SubsetIndex, Cost>;
using ElementCostVector = util_intops::StrongVector<ElementIndex, Cost>;
using SparseColumn = util_intops::StrongVector<ColumnEntryIndex, ElementIndex>;
using SparseRow = util_intops::StrongVector<RowEntryIndex, SubsetIndex>;
using ElementToIntVector = util_intops::StrongVector<ElementIndex, BaseInt>;
using SubsetToIntVector = util_intops::StrongVector<SubsetIndex, BaseInt>;
// Views of the sparse vectors. These need not be aligned as it's their contents
// that need to be aligned.
using SparseColumnView = util_intops::StrongVector<SubsetIndex, SparseColumn>;
using SparseRowView = util_intops::StrongVector<ElementIndex, SparseRow>;
using SubsetBoolVector = util_intops::StrongVector<SubsetIndex, bool>;
using ElementBoolVector = util_intops::StrongVector<ElementIndex, bool>;
// Useful for representing permutations,
using ElementToElementVector =
util_intops::StrongVector<ElementIndex, ElementIndex>;
using SubsetToSubsetVector =
util_intops::StrongVector<SubsetIndex, SubsetIndex>;
// Main class for describing a weighted set-covering problem.
class SetCoverModel {
public:
// Constructs an empty weighted set-covering problem.
SetCoverModel()
: num_elements_(0),
num_subsets_(0),
num_nonzeros_(0),
row_view_is_valid_(false),
subset_costs_(),
columns_(),
rows_(),
all_subsets_() {}
// Constructs a weighted set-covering problem from a seed model, with
// num_elements elements and num_subsets subsets.
// - The distributions of the degrees of the elements and the cardinalities of
// the subsets are based on those of the seed model. They are scaled
// affininely by row_scale and column_scale respectively.
// - By affine scaling, we mean that the minimum value of the distribution is
// not scaled, but the variation above this minimum value is.
// - For a given subset with a given cardinality in the generated model, its
// elements are sampled from the distribution of the degrees as computed
// above.
// - The costs of the subsets in the new model are sampled from the
// distribution of the costs of the subsets in the seed model, scaled by
// cost_scale.
// IMPORTANT NOTICE: The algorithm may not succeed in generating a model
// where all the elements can be covered. In that case, the model will be
// empty.
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel& seed_model,
BaseInt num_elements,
BaseInt num_subsets,
double row_scale,
double column_scale,
double cost_scale);
// Returns true if the model is empty, i.e. has no elements, no subsets, and
// no nonzeros.
bool IsEmpty() const { return rows_.empty() || columns_.empty(); }
// Current number of elements to be covered in the model, i.e. the number of
// elements in S. In matrix terms, this is the number of rows.
BaseInt num_elements() const { return num_elements_; }
// Current number of subsets in the model. In matrix terms, this is the
// number of columns.
BaseInt num_subsets() const { return num_subsets_; }
// Current number of nonzeros in the matrix.
int64_t num_nonzeros() const { return num_nonzeros_; }
double FillRate() const {
return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets());
}
// Vector of costs for each subset.
const SubsetCostVector& subset_costs() const { return subset_costs_; }
// Column view of the set covering problem.
const SparseColumnView& columns() const { return columns_; }
// Row view of the set covering problem.
const SparseRowView& rows() const {
DCHECK(row_view_is_valid_);
return rows_;
}
// Returns true if rows_ and columns_ represent the same problem.
bool row_view_is_valid() const { return row_view_is_valid_; }
// Access to the ranges of subsets and elements.
util_intops::StrongIntRange<SubsetIndex> SubsetRange() const {
return util_intops::StrongIntRange<SubsetIndex>(SubsetIndex(num_subsets_));
}
util_intops::StrongIntRange<ElementIndex> ElementRange() const {
return util_intops::StrongIntRange<ElementIndex>(
ElementIndex(num_elements_));
}
// Returns the list of indices for all the subsets in the model.
std::vector<SubsetIndex> all_subsets() const { return all_subsets_; }
// Adds an empty subset with a cost to the problem. In matrix terms, this
// adds a column to the matrix.
void AddEmptySubset(Cost cost);
// Adds an element to the last subset created. In matrix terms, this adds a
// 1 on row 'element' of the current last column of the matrix.
void AddElementToLastSubset(BaseInt element);
void AddElementToLastSubset(ElementIndex element);
// Sets 'cost' to an already existing 'subset'.
// This will CHECK-fail if cost is infinite or a NaN.
void SetSubsetCost(BaseInt subset, Cost cost);
void SetSubsetCost(SubsetIndex subset, Cost cost);
// Adds 'element' to an already existing 'subset'.
// No check is done if element is already in the subset.
void AddElementToSubset(BaseInt element, BaseInt subset);
void AddElementToSubset(ElementIndex element, SubsetIndex subset);
// Creates the sparse ("dual") representation of the problem.
void CreateSparseRowView();
// Returns true if the problem is feasible, i.e. if the subsets cover all
// the elements.
bool ComputeFeasibility() const;
// Reserves num_subsets columns in the model.
void ReserveNumSubsets(BaseInt num_subsets);
void ReserveNumSubsets(SubsetIndex num_subsets);
// Reserves num_elements rows in the column indexed by subset.
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset);
void ReserveNumElementsInSubset(ElementIndex num_elements,
SubsetIndex subset);
// Returns the model as a SetCoverProto. The function is not const because
// the element indices in the columns need to be sorted for the representation
// as a protobuf to be canonical.
SetCoverProto ExportModelAsProto();
// Imports the model from a SetCoverProto.
void ImportModelFromProto(const SetCoverProto& message);
// A struct enabling to show basic statistics on rows and columns.
// The meaning of the fields is obvious.
struct Stats {
double min;
double max;
double median;
double mean;
double stddev;
std::string DebugString() const {
return absl::StrCat("min = ", min, ", max = ", max, ", mean = ", mean,
", median = ", median, ", stddev = ", stddev, ", ");
}
};
// Computes basic statistics on costs and returns a Stats structure.
Stats ComputeCostStats();
// Computes basic statistics on rows and returns a Stats structure.
Stats ComputeRowStats();
// Computes basic statistics on columns and returns a Stats structure.
Stats ComputeColumnStats();
// Computes deciles on rows and returns a vector of deciles.
std::vector<BaseInt> ComputeRowDeciles() const;
// Computes deciles on columns and returns a vector of deciles.
std::vector<BaseInt> ComputeColumnDeciles() const;
// Computes basic statistics on the deltas of the row and column elements and
// returns a Stats structure. The deltas are computed as the difference
// between two consecutive indices in rows or columns. The number of bytes
// computed is meant using a variable-length base-128 encoding.
// TODO(user): actually use this to compress the rows and columns.
Stats ComputeRowDeltaSizeStats() const;
Stats ComputeColumnDeltaSizeStats() const;
private:
// Updates the all_subsets_ vector so that it always contains 0 to
// columns.size() - 1
void UpdateAllSubsetsList();
// Number of elements.
BaseInt num_elements_;
// Number of subsets. Maintained for ease of access.
BaseInt num_subsets_;
// Number of nonzeros in the matrix. The value is an int64_t because there can
// be more than 1 << 31 nonzeros even with BaseInt = int32_t.
int64_t num_nonzeros_;
// True when the SparseRowView is up-to-date.
bool row_view_is_valid_;
// Costs for each subset.
SubsetCostVector subset_costs_;
// Vector of columns. Each column corresponds to a subset and contains the
// elements of the given subset.
// This takes NNZ (number of non-zeros) BaseInts, or |E| * |S| * fill_rate.
// On classical benchmarks, the fill rate is in the 2 to 5% range.
// Some synthetic benchmarks have fill rates of 20%, while benchmarks for
// rail rotations have a fill rate of 0.2 to 0.4%.
// TODO(user): try using a compressed representation like VarInt or LEB128,
// since the data is only iterated upon.
SparseColumnView columns_;
// Vector of rows. Each row corresponds to an element and contains the
// subsets containing the element.
// The size is exactly the same as for columns_.
SparseRowView rows_;
// Vector of indices from 0 to columns.size() - 1. (Like std::iota, but built
// incrementally.) Used to (un)focus optimization algorithms on the complete
// problem.
// This takes |S| BaseInts.
// TODO(user): use this to enable deletion and recycling of columns/subsets.
// TODO(user): replace this with an iterator?
std::vector<SubsetIndex> all_subsets_;
};
// The IntersectingSubsetsIterator is a forward iterator that returns the next
// intersecting subset for a fixed seed_subset.
// The iterator is initialized with a model and a seed_subset and
// allows a speedup in getting the intersecting subsets
// by not storing them in memory.
// The iterator is at the end when the last intersecting subset has been
// returned.
// TODO(user): Add the possibility for range-for loops.
class IntersectingSubsetsIterator {
public:
IntersectingSubsetsIterator(const SetCoverModel& model,
SubsetIndex seed_subset)
: intersecting_subset_(-1),
element_entry_(0),
subset_entry_(0),
seed_subset_(seed_subset),
model_(model),
subset_seen_(model_.columns().size(), false) {
CHECK(model_.row_view_is_valid());
subset_seen_[seed_subset] = true; // Avoid iterating on `seed_subset`.
++(*this); // Move to the first intersecting subset.
}
// Returns (true) whether the iterator is at the end.
bool at_end() const {
return element_entry_.value() == model_.columns()[seed_subset_].size();
}
// Returns the intersecting subset.
SubsetIndex operator*() const { return intersecting_subset_; }
// Move the iterator to the next intersecting subset.
IntersectingSubsetsIterator& operator++() {
DCHECK(model_.row_view_is_valid());
DCHECK(!at_end());
const SparseRowView& rows = model_.rows();
const SparseColumn& column = model_.columns()[seed_subset_];
for (; element_entry_ < ColumnEntryIndex(column.size()); ++element_entry_) {
const ElementIndex current_element = column[element_entry_];
const SparseRow& current_row = rows[current_element];
for (; subset_entry_ < RowEntryIndex(current_row.size());
++subset_entry_) {
intersecting_subset_ = current_row[subset_entry_];
if (!subset_seen_[intersecting_subset_]) {
subset_seen_[intersecting_subset_] = true;
return *this;
}
}
subset_entry_ = RowEntryIndex(0); // 'carriage-return'
}
return *this;
}
private:
// The intersecting subset.
SubsetIndex intersecting_subset_;
// The position of the entry in the column corresponding to `seed_subset_`.
ColumnEntryIndex element_entry_;
// The position of the entry in the row corresponding to `element_entry`.
RowEntryIndex subset_entry_;
// The seed subset.
SubsetIndex seed_subset_;
// The model to which the iterator is applying.
const SetCoverModel& model_;
// A vector of Booleans indicating whether the current subset has been
// already seen by the iterator.
SubsetBoolVector subset_seen_;
};
} // namespace operations_research
#endif // OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_