Skip to content

Commit

Permalink
Use robin_hood hash map
Browse files Browse the repository at this point in the history
Provides ~10% speedup to tests
  • Loading branch information
mmore500 committed Oct 4, 2020
1 parent f6d5b79 commit 2316998
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 23 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
[submodule "third-party/span-lite"]
path = third-party/span-lite
url = https://github.com/martinmoene/span-lite.git
[submodule "third-party/robin-hood-hashing"]
path = third-party/robin-hood-hashing
url = https://github.com/martinus/robin-hood-hashing
72 changes: 59 additions & 13 deletions source/tools/MatchBin.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@
#include <mutex>
#include <shared_mutex>
#include <string>
#include <unordered_map>
#include <atomic>

#include "../../third-party/robin-hood-hashing/src/include/robin_hood.h"

#include "../base/assert.h"
#include "../base/optional.h"
#include "../base/vector.h"
Expand Down Expand Up @@ -79,7 +80,7 @@ namespace emp::internal {
operator size_t() const { return emp::CombineHash(query, maybe_tag, buffer); }
};

using logbuffer_t = std::unordered_map<
using logbuffer_t = robin_hood::unordered_map<
LogEntry,
size_t,
TupleHash<
Expand Down Expand Up @@ -109,7 +110,7 @@ namespace emp::internal {
// in this case, we simply return the data from our logbuffer.
emp::ContainerDataFile<logbuffer_t> datafile;

using datapoint_t = std::pair<const LogEntry, size_t>;
using datapoint_t = robin_hood::pair<const LogEntry, size_t>;
// setup getter functions
std::function<query_t(const datapoint_t)> get_query_log = [](const datapoint_t datapoint){
return datapoint.first.query;
Expand Down Expand Up @@ -277,12 +278,12 @@ namespace emp::internal {

// caches
// cache of regulated scores
std::unordered_map<
robin_hood::unordered_map<
query_t,
cache_state_t
> cache_regulated;
// cache of raw scores
std::unordered_map<
robin_hood::unordered_map<
query_t,
cache_state_t
> cache_raw;
Expand Down Expand Up @@ -382,9 +383,9 @@ namespace emp {
using tag_t = Tag;
using uid_t = size_t;

std::unordered_map<uid_t, Val> values;
std::unordered_map<uid_t, Regulator> regulators;
std::unordered_map<uid_t, tag_t> tags;
robin_hood::unordered_map<uid_t, Val> values;
robin_hood::unordered_map<uid_t, Regulator> regulators;
robin_hood::unordered_map<uid_t, tag_t> tags;
emp::vector<uid_t> uids;

#ifdef CEREAL_NVP
Expand Down Expand Up @@ -543,15 +544,15 @@ namespace emp {
) override {
const auto makeResult = [&]() {
// compute distance between query and all stored tags
std::unordered_map<tag_t, double> matches;
robin_hood::unordered_map<tag_t, double> matches;
for (const auto &[uid, tag] : state.tags) {
if (matches.find(tag) == std::end(matches)) {
matches[tag] = metric(query, tag);
}
}

// apply regulation to generate match scores
std::unordered_map<uid_t, double> scores;
robin_hood::unordered_map<uid_t, double> scores;
for (const auto & uid : state.uids) {
scores[uid] = state.regulators.at(uid)(
matches.at( state.tags.at(uid) )
Expand Down Expand Up @@ -607,14 +608,14 @@ namespace emp {
) override {
const auto makeResult = [&]() {
// compute distance between query and all stored tags
std::unordered_map<tag_t, double> matches;
robin_hood::unordered_map<tag_t, double> matches;
for (const auto &[uid, tag] : state.tags) {
if (matches.find(tag) == std::end(matches)) {
matches[tag] = metric(query, tag);
}
}
// apply regulation to generate match scores
std::unordered_map<uid_t, double> scores;
robin_hood::unordered_map<uid_t, double> scores;
for (const auto & uid : state.uids) {
scores[uid] = matches[state.tags[uid]];
}
Expand Down Expand Up @@ -674,7 +675,7 @@ namespace emp {
cache.Clear();

state.values[uid] = v;
state.regulators.insert({{uid},{}});
state.regulators[uid] = {};
state.tags[uid] = t;
state.uids.push_back(uid);
return uid;
Expand Down Expand Up @@ -939,4 +940,49 @@ namespace emp {

}

namespace cereal {

template<
class Archive,
class T
>
void save(
Archive & archive,
robin_hood::unordered_map<size_t, T> const & m
) {

std::unordered_map<size_t, T> t;
std::transform(
std::begin( m ),
std::end( m ),
std::inserter( t , std::begin(t) ),
[](const auto& pair){
const auto& [k , v] = pair;
return std::pair{k, v};
}
);

archive( t );
}

template<
class Archive,
class T
>
void load(
Archive & archive,
robin_hood::unordered_map<size_t, T> & m
) {

std::unordered_map<size_t, T> t;
archive( t );

for (const auto& [k, v] : t) {
m[k] = v;
}

}

} // namespace cereal

#endif
6 changes: 4 additions & 2 deletions source/tools/matchbin_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <utility>
#include <queue>

#include "../../third-party/robin-hood-hashing/src/include/robin_hood.h"

#include "../base/assert.h"
#include "../base/array.h"
#include "../base/vector.h"
Expand Down Expand Up @@ -447,7 +449,7 @@ namespace emp {
template<size_t N>
struct ExactStreakDistribution {

std::unordered_map<
robin_hood::unordered_map<
std::tuple<size_t, size_t>, /* (min_heads, num_coins) */
double,
emp::TupleHash<size_t, size_t>
Expand Down Expand Up @@ -609,7 +611,7 @@ namespace emp {
using tag_t = typename Metric::tag_t;

Metric metric;
static inline std::unordered_map<
static inline robin_hood::unordered_map<
std::tuple<query_t, tag_t>,
double,
emp::TupleHash<query_t, tag_t>
Expand Down
13 changes: 7 additions & 6 deletions source/tools/matchbin_selectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#define EMP_MATCH_BIN_SELECTORS_H

#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <functional>
#include <algorithm>
Expand All @@ -26,6 +25,8 @@
#include <utility>
#include <limits>

#include "../../third-party/robin-hood-hashing/src/include/robin_hood.h"

#include "../base/assert.h"
#include "../base/array.h"
#include "../base/optional.h"
Expand Down Expand Up @@ -167,7 +168,7 @@ namespace emp {
virtual ~SelectorBase() {};
virtual CacheType operator()(
const emp::vector<size_t>& uids,
const std::unordered_map<size_t, double>& scores,
const robin_hood::unordered_map<size_t, double>& scores,
size_t n
) = 0;
virtual std::string name() const = 0;
Expand Down Expand Up @@ -202,7 +203,7 @@ namespace emp {

RankedCacheState operator()(
const emp::vector<size_t>& uids_,
const std::unordered_map<size_t, double>& scores,
const robin_hood::unordered_map<size_t, double>& scores,
size_t n
) override {

Expand Down Expand Up @@ -295,7 +296,7 @@ namespace emp {

RouletteCacheState operator()(
const emp::vector<size_t>& uids_,
const std::unordered_map<size_t, double>& scores,
const robin_hood::unordered_map<size_t, double>& scores,
size_t n
) override {

Expand Down Expand Up @@ -423,7 +424,7 @@ namespace emp {

RouletteCacheState operator()(
const emp::vector<size_t>& uids_,
const std::unordered_map<size_t, double>& scores,
const robin_hood::unordered_map<size_t, double>& scores,
size_t n
) override {

Expand Down Expand Up @@ -555,7 +556,7 @@ namespace emp {
// scores (post-regulation) are assumed to be between 0 and 1
SieveCacheState operator()(
const emp::vector<size_t>& uids_,
const std::unordered_map<size_t, double>& scores,
const robin_hood::unordered_map<size_t, double>& scores,
size_t n
) override {

Expand Down
4 changes: 2 additions & 2 deletions tests/tools/MatchBin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,7 @@ TEST_CASE("Test MatchBin", "[tools]")

emp::RankedCacheState operator()(
emp::vector<size_t>& uids,
std::unordered_map<size_t, double>& scores,
robin_hood::unordered_map<size_t, double>& scores,
size_t n
){
opCount+=1;
Expand Down Expand Up @@ -1899,7 +1899,7 @@ TEST_CASE("Test MatchBin", "[tools]")
size_t opCount = 0;
emp::RankedCacheState operator()(
emp::vector<size_t>& uids,
std::unordered_map<size_t, double>& scores,
robin_hood::unordered_map<size_t, double>& scores,
size_t n
){
opCount+=1;
Expand Down
1 change: 1 addition & 0 deletions third-party/robin-hood-hashing
Submodule robin-hood-hashing added at 8a8dc1

0 comments on commit 2316998

Please sign in to comment.