forked from google/or-tools
-
Notifications
You must be signed in to change notification settings - Fork 9
/
dense_set.h
151 lines (135 loc) · 4.96 KB
/
dense_set.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Copyright 2010-2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef OR_TOOLS_UTIL_DENSE_SET_H_
#define OR_TOOLS_UTIL_DENSE_SET_H_
#include <cstddef>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/types/span.h"
namespace operations_research {
// A set of dense non-negative integer values stored in a dense vector.
//
// This is useful when we want to iterate over a small subset of the possible
// values and reuse the memory, or if we want to randomly sample from the set.
//
// If the set is usually small but occasionally very large, iterating over a
// regular hash_set would be less efficient as you would (internal to the hash
// table iterator) have have to iterate over all the buckets in the hash
// table even if empty. If you clear the set frequently to avoid this, you would
// grow and rehash when you have a larger set.
//
// If resize=false, users *must* call reserve(K) where K > any key before
// calling any other method.
template <typename T, bool auto_resize = true>
class DenseSet {
public:
using iterator = typename std::vector<T>::const_iterator;
using const_iterator = typename std::vector<T>::const_iterator;
using value_type = T;
static constexpr bool kAutoResize = auto_resize;
const_iterator begin() const { return values_.begin(); }
const_iterator end() const { return values_.end(); }
size_t size() const { return values_.size(); }
bool empty() const { return values_.empty(); }
void reserve(size_t size) {
values_.reserve(size);
if (size >= positions_.size()) positions_.resize(size, -1);
}
size_t capacity() const { return positions_.size(); }
std::pair<iterator, bool> insert(T value) {
const int pos = Position(value);
if (pos == -1) {
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(value)] = values_.size();
values_.push_back(value);
return {values_.begin() + positions_[ToInt(value)], true};
}
return {values_.begin() + pos, false};
}
iterator find(T value) {
const int pos = Position(value);
DCHECK_GT(positions_.size(), ToInt(value));
if (pos < 0) return values_.end();
return values_.begin() + pos;
}
bool contains(T value) const {
if (kAutoResize && ToInt(value) >= positions_.size()) return false;
return positions_[ToInt(value)] >= 0;
}
void erase(iterator it) {
const T value = *it;
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(values_.back())] = it - values_.begin();
positions_[ToInt(value)] = -1;
// This is a hack to allow erase to work with a const iterator.
values_[it - begin()] = values_.back();
values_.pop_back();
}
int erase(T value) {
const int pos = Position(value);
if (pos < 0) return 0;
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(values_.back())] = pos;
values_[pos] = values_.back();
values_.pop_back();
positions_[ToInt(value)] = -1;
return 1;
}
// The ordering is deterministic given the same sequence of inserts and
// erases but is arbitrary and should not be relied upon.
absl::Span<const T> values() const { return values_; }
void clear() {
// We expect values_ to be much smaller than the total number of possible
// values, so just clear entries in the set.
for (const T value : values_) {
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(value)] = -1;
}
values_.clear();
}
private:
static int ToInt(T);
inline int Position(T value) {
int int_value = ToInt(value);
DCHECK_GE(int_value, 0);
// Automatic Resize increases the CPU time of microbenchmarks by ~30%, but
// even with kAutoResize=true, DenseSet is still 25x faster than a
// flat_hash_set<int>.
if (kAutoResize && int_value >= positions_.size()) {
positions_.resize(ToInt(value) + 1, -1);
}
DCHECK_GT(positions_.size(), int_value);
return positions_[int_value];
}
std::vector<int> positions_;
std::vector<T> values_;
};
// Like DenseSet, but does not automatically resize the internal position
// vector, which is ~30% faster.
template <typename T>
using UnsafeDenseSet = DenseSet<T, false>;
template <typename T, bool resize>
inline int DenseSet<T, resize>::ToInt(T value) {
return value.value();
}
template <>
inline int DenseSet<int, true>::ToInt(int value) {
return value;
}
template <>
inline int DenseSet<int, false>::ToInt(int value) {
return value;
}
} // namespace operations_research
#endif // OR_TOOLS_UTIL_DENSE_SET_H_