From 49ed6293d5d9ac4764b1cca669d01d0d9eaf6206 Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Tue, 6 Feb 2024 17:15:08 +0100 Subject: [PATCH] refactor(shm-fm): rename DenseGainCache to SparseGainCache --- kaminpar-shm/factories.cc | 4 +- .../refinement/balancer/greedy_balancer.h | 6 +-- kaminpar-shm/refinement/fm/fm_refiner.cc | 9 ++-- kaminpar-shm/refinement/fm/fm_refiner.h | 49 ++++++++++++------- ...dense_gain_cache.h => sparse_gain_cache.h} | 27 +++++----- kaminpar-shm/refinement/jet/jet_refiner.cc | 6 +-- 6 files changed, 55 insertions(+), 46 deletions(-) rename kaminpar-shm/refinement/gains/{dense_gain_cache.h => sparse_gain_cache.h} (90%) diff --git a/kaminpar-shm/factories.cc b/kaminpar-shm/factories.cc index 54ee4092..cd71482c 100644 --- a/kaminpar-shm/factories.cc +++ b/kaminpar-shm/factories.cc @@ -31,7 +31,7 @@ #include "kaminpar-shm/refinement/multi_refiner.h" // Gain cache strategies for the FM algorithm -#include "kaminpar-shm/refinement/gains/dense_gain_cache.h" +#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h" #include "kaminpar-shm/refinement/gains/hybrid_gain_cache.h" #include "kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h" @@ -89,7 +89,7 @@ std::unique_ptr create_refiner(const Context &ctx, const RefinementAlgo case RefinementAlgorithm::KWAY_FM: { if (ctx.refinement.kway_fm.gain_cache_strategy == GainCacheStrategy::DENSE) { - return std::make_unique>(ctx); + return std::make_unique>(ctx); } else if (ctx.refinement.kway_fm.gain_cache_strategy == GainCacheStrategy::ON_THE_FLY) { return std::make_unique>( ctx diff --git a/kaminpar-shm/refinement/balancer/greedy_balancer.h b/kaminpar-shm/refinement/balancer/greedy_balancer.h index 73286d41..5806ce7f 100644 --- a/kaminpar-shm/refinement/balancer/greedy_balancer.h +++ b/kaminpar-shm/refinement/balancer/greedy_balancer.h @@ -15,7 +15,7 @@ #include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" #include "kaminpar-shm/metrics.h" -#include "kaminpar-shm/refinement/gains/dense_gain_cache.h" +#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h" #include "kaminpar-shm/refinement/refiner.h" #include "kaminpar-common/datastructures/binary_heap.h" @@ -103,7 +103,7 @@ class GreedyBalancer : public Refiner { void initialize(const PartitionedGraph &p_graph) final; bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) final; - void track_moves(DenseGainCache<> *gain_cache) { + void track_moves(SparseGainCache<> *gain_cache) { _gain_cache = gain_cache; } @@ -157,6 +157,6 @@ class GreedyBalancer : public Refiner { Statistics _stats; - DenseGainCache<> *_gain_cache = nullptr; + SparseGainCache<> *_gain_cache = nullptr; }; } // namespace kaminpar::shm diff --git a/kaminpar-shm/refinement/fm/fm_refiner.cc b/kaminpar-shm/refinement/fm/fm_refiner.cc index f43eb535..f8ed70af 100644 --- a/kaminpar-shm/refinement/fm/fm_refiner.cc +++ b/kaminpar-shm/refinement/fm/fm_refiner.cc @@ -776,19 +776,16 @@ std::pair LocalizedFMRefiner; +template class SharedData; template class SharedData; template class SharedData; } // namespace fm -// template class FMRefiner; -// template class LocalizedFMRefiner; - template class FMRefiner; template class LocalizedFMRefiner; -template class FMRefiner; -template class LocalizedFMRefiner; +template class FMRefiner; +template class LocalizedFMRefiner; template class FMRefiner; template class LocalizedFMRefiner; diff --git a/kaminpar-shm/refinement/fm/fm_refiner.h b/kaminpar-shm/refinement/fm/fm_refiner.h index ae3c050a..7dafa242 100644 --- a/kaminpar-shm/refinement/fm/fm_refiner.h +++ b/kaminpar-shm/refinement/fm/fm_refiner.h @@ -16,9 +16,9 @@ #include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" #include "kaminpar-shm/refinement/fm/stopping_policies.h" -#include "kaminpar-shm/refinement/gains/dense_gain_cache.h" #include "kaminpar-shm/refinement/gains/hybrid_gain_cache.h" #include "kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h" +#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h" #include "kaminpar-shm/refinement/refiner.h" #include "kaminpar-common/datastructures/binary_heap.h" @@ -27,7 +27,7 @@ namespace kaminpar::shm { namespace fm { using DefaultDeltaPartitionedGraph = GenericDeltaPartitionedGraph<>; -using DenseGainCache = DenseGainCache<>; +using SparseGainCache = SparseGainCache<>; using OnTheFlyGainCache = OnTheFlyGainCache<>; using HighDegreeGainCache = HybridGainCache<>; @@ -88,7 +88,7 @@ class NodeTracker { ); } - int owner(const NodeID u) const { + [[nodiscard]] int owner(const NodeID u) const { return __atomic_load_n(&_state[u], __ATOMIC_RELAXED); } @@ -158,7 +158,7 @@ template class BorderNodes { return polled; } - NodeID get() const { + [[nodiscard]] NodeID get() const { return has_more() ? _border_nodes[_next_border_node] : kInvalidNodeID; } @@ -182,7 +182,7 @@ template class BorderNodes { tbb::concurrent_vector _border_nodes; }; -template struct SharedData { +template struct SharedData { SharedData(const Context &ctx, const NodeID max_n, const BlockID max_k) : node_tracker(max_n), gain_cache(ctx, max_n, max_k), @@ -228,11 +228,11 @@ struct AppliedMove { template < typename DeltaPartitionedGraph = fm::DefaultDeltaPartitionedGraph, - typename GainCache = fm::DenseGainCache> + typename GainCache = fm::SparseGainCache> class FMRefiner : public Refiner { public: FMRefiner(const Context &ctx); - ~FMRefiner(); // Required for the std::unique_ptr<> member. + ~FMRefiner() override; // Required for the std::unique_ptr<> member. FMRefiner(const FMRefiner &) = delete; FMRefiner &operator=(const FMRefiner &) = delete; @@ -249,20 +249,20 @@ class FMRefiner : public Refiner { using MovesVec = std::vector; using Batches = tbb::concurrent_vector>; - std::vector + [[nodiscard]] std::vector dbg_compute_batch_stats(const PartitionedGraph &graph, Batches next_batches) const; - std::pair + [[nodiscard]] std::pair dbg_build_prev_p_graph(const PartitionedGraph &p_graph, Batches next_batches) const; - fm::BatchStats dbg_compute_single_batch_stats_in_sequence( + [[nodiscard]] fm::BatchStats dbg_compute_single_batch_stats_in_sequence( PartitionedGraph &p_graph, const std::vector &seeds, const std::vector &moves, const std::vector &distances ) const; - std::vector dbg_compute_batch_distances( + [[nodiscard]] std::vector dbg_compute_batch_distances( const Graph &graph, const std::vector &seeds, const std::vector &moves @@ -276,7 +276,7 @@ class FMRefiner : public Refiner { template < typename DeltaPartitionedGraph = fm::DefaultDeltaPartitionedGraph, - typename GainCache = fm::DenseGainCache> + typename GainCache = fm::SparseGainCache> class LocalizedFMRefiner { public: LocalizedFMRefiner( @@ -313,23 +313,34 @@ class LocalizedFMRefiner { const PartitionContext &_p_ctx; const KwayFMRefinementContext &_fm_ctx; - // Graph to work on + // Shared: Graph to work on PartitionedGraph &_p_graph; - // Data shared among all workers + // Shared: Data shared among all workers fm::SharedData &_shared; - // Data local to this worker - DeltaPartitionedGraph _d_graph; // O(|Delta|) space - typename GainCache::template DeltaCache _d_gain_cache; // O(|Delta|) space - BinaryMaxHeap _block_pq; // O(k) space - std::vector> _node_pqs; // O(k + |Touched|) space + // Thread-local: O(|Delta|) space + DeltaPartitionedGraph _d_graph; + + // Thread local: O(|Delta|) sparse + using DeltaGainCache = typename GainCache::template DeltaCache; + DeltaGainCache _d_gain_cache; + + // Thread local: O(k) space + BinaryMaxHeap _block_pq; + + // Thread local: O(k + |Touched|) space + std::vector> _node_pqs; AdaptiveStoppingPolicy _stopping_policy; + // Thread local: O(|Touched|) space std::vector _touched_nodes; + + // Thread local: O(1) space std::vector _seed_nodes; + // Thread local: O(|Touched|) space if move recording is enabled std::vector _applied_moves; bool _record_applied_moves = false; }; diff --git a/kaminpar-shm/refinement/gains/dense_gain_cache.h b/kaminpar-shm/refinement/gains/sparse_gain_cache.h similarity index 90% rename from kaminpar-shm/refinement/gains/dense_gain_cache.h rename to kaminpar-shm/refinement/gains/sparse_gain_cache.h index 8329fa8a..5c08f139 100644 --- a/kaminpar-shm/refinement/gains/dense_gain_cache.h +++ b/kaminpar-shm/refinement/gains/sparse_gain_cache.h @@ -2,7 +2,7 @@ * Gain cache that caches one gain for each node and block, using a total of * O(|V| * k) memory. * - * @file: dense_gain_cache.h + * @file: sparse_gain_cache.h * @author: Daniel Seemaier * @date: 15.03.2023 ******************************************************************************/ @@ -24,15 +24,15 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { -template class DenseDeltaGainCache; +template class SparseDeltaGainCache; -template class DenseGainCache { - using Self = DenseGainCache; - template friend class DenseDeltaGainCache; +template class SparseGainCache { + using Self = SparseGainCache; + template friend class SparseDeltaGainCache; public: template - using DeltaCache = DenseDeltaGainCache; + using DeltaCache = SparseDeltaGainCache; // gains() will iterate over all blocks, including those not adjacent to the node. constexpr static bool kIteratesNonadjacentBlocks = true; @@ -42,7 +42,7 @@ template class DenseGainCache { // (more expensive, but safes a call to gain() if the exact gain for the best block is needed). constexpr static bool kIteratesExactGains = iterate_exact_gains; - DenseGainCache(const Context & /* ctx */, const NodeID max_n, const BlockID max_k) + SparseGainCache(const Context & /* ctx */, const NodeID max_n, const BlockID max_k) : _max_n(max_n), _max_k(max_k), _gain_cache( @@ -160,7 +160,8 @@ template class DenseGainCache { } } - bool check_cached_gain_for_node(const PartitionedGraph &p_graph, const NodeID u) const { + [[nodiscard]] bool + check_cached_gain_for_node(const PartitionedGraph &p_graph, const NodeID u) const { const BlockID block_u = p_graph.block(u); std::vector actual_external_degrees(_k, 0); EdgeWeight actual_weighted_degree = 0; @@ -200,19 +201,19 @@ template class DenseGainCache { StaticArray _weighted_degrees; }; -template class DenseDeltaGainCache { +template class SparseDeltaGainCache { public: constexpr static bool kIteratesNonadjacentBlocks = GainCache::kIteratesNonadjacentBlocks; constexpr static bool kIteratesExactGains = GainCache::kIteratesExactGains; - DenseDeltaGainCache(const GainCache &gain_cache, const DeltaPartitionedGraph & /* d_graph */) + SparseDeltaGainCache(const GainCache &gain_cache, const DeltaPartitionedGraph & /* d_graph */) : _gain_cache(gain_cache) {} - EdgeWeight conn(const NodeID node, const BlockID block) const { + [[nodiscard]] EdgeWeight conn(const NodeID node, const BlockID block) const { return _gain_cache.conn(node, block) + conn_delta(node, block); } - EdgeWeight gain(const NodeID node, const BlockID from, const BlockID to) const { + [[nodiscard]] EdgeWeight gain(const NodeID node, const BlockID from, const BlockID to) const { return _gain_cache.gain(node, from, to) + conn_delta(node, to) - conn_delta(node, from); } @@ -248,7 +249,7 @@ template class DenseDeltaGa } private: - EdgeWeight conn_delta(const NodeID node, const BlockID block) const { + [[nodiscard]] EdgeWeight conn_delta(const NodeID node, const BlockID block) const { const auto it = _gain_cache_delta.get_if_contained(_gain_cache.index(node, block)); return it != _gain_cache_delta.end() ? *it : 0; } diff --git a/kaminpar-shm/refinement/jet/jet_refiner.cc b/kaminpar-shm/refinement/jet/jet_refiner.cc index d7200aee..19d07e9f 100644 --- a/kaminpar-shm/refinement/jet/jet_refiner.cc +++ b/kaminpar-shm/refinement/jet/jet_refiner.cc @@ -12,7 +12,7 @@ #include "kaminpar-shm/datastructures/partitioned_graph.h" #include "kaminpar-shm/metrics.h" #include "kaminpar-shm/refinement/balancer/greedy_balancer.h" -#include "kaminpar-shm/refinement/gains/dense_gain_cache.h" +#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h" #include "kaminpar-common/datastructures/noinit_vector.h" #include "kaminpar-common/degree_buckets.h" @@ -39,7 +39,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx SCOPED_TIMER("Jet Refiner"); START_TIMER("Allocation"); - DenseGainCache gain_cache(_ctx, p_graph.n(), p_graph.k()); + SparseGainCache gain_cache(_ctx, p_graph.n(), p_graph.k()); gain_cache.initialize(p_graph); NoinitVector next_partition(p_graph.n()); @@ -165,7 +165,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx TIMED_SCOPE("Update best partition") { if (final_cut <= best_cut) { p_graph.pfor_nodes([&](const NodeID u) { best_partition[u] = p_graph.block(u); }); - last_iteration_is_best = true; + last_iteration_is_best = true; } else { last_iteration_is_best = false; }