From 49ed6293d5d9ac4764b1cca669d01d0d9eaf6206 Mon Sep 17 00:00:00 2001
From: Daniel Seemaier <daniel@seemaier.de>
Date: Tue, 6 Feb 2024 17:15:08 +0100
Subject: [PATCH] refactor(shm-fm): rename DenseGainCache to SparseGainCache

---
 kaminpar-shm/factories.cc                     |  4 +-
 .../refinement/balancer/greedy_balancer.h     |  6 +--
 kaminpar-shm/refinement/fm/fm_refiner.cc      |  9 ++--
 kaminpar-shm/refinement/fm/fm_refiner.h       | 49 ++++++++++++-------
 ...dense_gain_cache.h => sparse_gain_cache.h} | 27 +++++-----
 kaminpar-shm/refinement/jet/jet_refiner.cc    |  6 +--
 6 files changed, 55 insertions(+), 46 deletions(-)
 rename kaminpar-shm/refinement/gains/{dense_gain_cache.h => sparse_gain_cache.h} (90%)
diff --git a/kaminpar-shm/factories.cc b/kaminpar-shm/factories.cc
index 54ee4092..cd71482c 100644
--- a/kaminpar-shm/factories.cc
+++ b/kaminpar-shm/factories.cc
@@ -31,7 +31,7 @@
 #include "kaminpar-shm/refinement/multi_refiner.h"
 
 // Gain cache strategies for the FM algorithm
-#include "kaminpar-shm/refinement/gains/dense_gain_cache.h"
+#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h"
 #include "kaminpar-shm/refinement/gains/hybrid_gain_cache.h"
 #include "kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h"
 
@@ -89,7 +89,7 @@ std::unique_ptr<Refiner> create_refiner(const Context &ctx, const RefinementAlgo
 
   case RefinementAlgorithm::KWAY_FM: {
     if (ctx.refinement.kway_fm.gain_cache_strategy == GainCacheStrategy::DENSE) {
-      return std::make_unique<FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::DenseGainCache>>(ctx);
+      return std::make_unique<FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::SparseGainCache>>(ctx);
     } else if (ctx.refinement.kway_fm.gain_cache_strategy == GainCacheStrategy::ON_THE_FLY) {
       return std::make_unique<FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::OnTheFlyGainCache>>(
           ctx
diff --git a/kaminpar-shm/refinement/balancer/greedy_balancer.h b/kaminpar-shm/refinement/balancer/greedy_balancer.h
index 73286d41..5806ce7f 100644
--- a/kaminpar-shm/refinement/balancer/greedy_balancer.h
+++ b/kaminpar-shm/refinement/balancer/greedy_balancer.h
@@ -15,7 +15,7 @@
 #include "kaminpar-shm/datastructures/graph.h"
 #include "kaminpar-shm/datastructures/partitioned_graph.h"
 #include "kaminpar-shm/metrics.h"
-#include "kaminpar-shm/refinement/gains/dense_gain_cache.h"
+#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h"
 #include "kaminpar-shm/refinement/refiner.h"
 
 #include "kaminpar-common/datastructures/binary_heap.h"
@@ -103,7 +103,7 @@ class GreedyBalancer : public Refiner {
   void initialize(const PartitionedGraph &p_graph) final;
   bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) final;
 
-  void track_moves(DenseGainCache<> *gain_cache) {
+  void track_moves(SparseGainCache<> *gain_cache) {
     _gain_cache = gain_cache;
   }
 
@@ -157,6 +157,6 @@ class GreedyBalancer : public Refiner {
 
   Statistics _stats;
 
-  DenseGainCache<> *_gain_cache = nullptr;
+  SparseGainCache<> *_gain_cache = nullptr;
 };
 } // namespace kaminpar::shm
diff --git a/kaminpar-shm/refinement/fm/fm_refiner.cc b/kaminpar-shm/refinement/fm/fm_refiner.cc
index f43eb535..f8ed70af 100644
--- a/kaminpar-shm/refinement/fm/fm_refiner.cc
+++ b/kaminpar-shm/refinement/fm/fm_refiner.cc
@@ -776,19 +776,16 @@ std::pair<BlockID, EdgeWeight> LocalizedFMRefiner<DeltaPartitionedGraph, GainCac
 // Instantiate variants that are actually configurable
 
 namespace fm {
-template class SharedData<DenseGainCache>;
+template class SharedData<SparseGainCache>;
 template class SharedData<HighDegreeGainCache>;
 template class SharedData<OnTheFlyGainCache>;
 } // namespace fm
 
-// template class FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::DenseGainCache>;
-// template class LocalizedFMRefiner<fm::DefaultDeltaPartitionedGraph, fm::DenseGainCache>;
-
 template class FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::OnTheFlyGainCache>;
 template class LocalizedFMRefiner<fm::DefaultDeltaPartitionedGraph, fm::OnTheFlyGainCache>;
 
-template class FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::DenseGainCache>;
-template class LocalizedFMRefiner<fm::DefaultDeltaPartitionedGraph, fm::DenseGainCache>;
+template class FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::SparseGainCache>;
+template class LocalizedFMRefiner<fm::DefaultDeltaPartitionedGraph, fm::SparseGainCache>;
 
 template class FMRefiner<fm::DefaultDeltaPartitionedGraph, fm::HighDegreeGainCache>;
 template class LocalizedFMRefiner<fm::DefaultDeltaPartitionedGraph, fm::HighDegreeGainCache>;
diff --git a/kaminpar-shm/refinement/fm/fm_refiner.h b/kaminpar-shm/refinement/fm/fm_refiner.h
index ae3c050a..7dafa242 100644
--- a/kaminpar-shm/refinement/fm/fm_refiner.h
+++ b/kaminpar-shm/refinement/fm/fm_refiner.h
@@ -16,9 +16,9 @@
 #include "kaminpar-shm/datastructures/graph.h"
 #include "kaminpar-shm/datastructures/partitioned_graph.h"
 #include "kaminpar-shm/refinement/fm/stopping_policies.h"
-#include "kaminpar-shm/refinement/gains/dense_gain_cache.h"
 #include "kaminpar-shm/refinement/gains/hybrid_gain_cache.h"
 #include "kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h"
+#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h"
 #include "kaminpar-shm/refinement/refiner.h"
 
 #include "kaminpar-common/datastructures/binary_heap.h"
@@ -27,7 +27,7 @@
 namespace kaminpar::shm {
 namespace fm {
 using DefaultDeltaPartitionedGraph = GenericDeltaPartitionedGraph<>;
-using DenseGainCache = DenseGainCache<>;
+using SparseGainCache = SparseGainCache<>;
 using OnTheFlyGainCache = OnTheFlyGainCache<>;
 using HighDegreeGainCache = HybridGainCache<>;
 
@@ -88,7 +88,7 @@ class NodeTracker {
     );
   }
 
-  int owner(const NodeID u) const {
+  [[nodiscard]] int owner(const NodeID u) const {
     return __atomic_load_n(&_state[u], __ATOMIC_RELAXED);
   }
 
@@ -158,7 +158,7 @@ template <typename GainCache> class BorderNodes {
     return polled;
   }
 
-  NodeID get() const {
+  [[nodiscard]] NodeID get() const {
     return has_more() ? _border_nodes[_next_border_node] : kInvalidNodeID;
   }
 
@@ -182,7 +182,7 @@ template <typename GainCache> class BorderNodes {
   tbb::concurrent_vector<NodeID> _border_nodes;
 };
 
-template <typename GainCache = fm::DenseGainCache> struct SharedData {
+template <typename GainCache = fm::SparseGainCache> struct SharedData {
   SharedData(const Context &ctx, const NodeID max_n, const BlockID max_k)
       : node_tracker(max_n),
         gain_cache(ctx, max_n, max_k),
@@ -228,11 +228,11 @@ struct AppliedMove {
 
 template <
     typename DeltaPartitionedGraph = fm::DefaultDeltaPartitionedGraph,
-    typename GainCache = fm::DenseGainCache>
+    typename GainCache = fm::SparseGainCache>
 class FMRefiner : public Refiner {
 public:
   FMRefiner(const Context &ctx);
-  ~FMRefiner(); // Required for the std::unique_ptr<> member.
+  ~FMRefiner() override; // Required for the std::unique_ptr<> member.
 
   FMRefiner(const FMRefiner &) = delete;
   FMRefiner &operator=(const FMRefiner &) = delete;
@@ -249,20 +249,20 @@ class FMRefiner : public Refiner {
   using MovesVec = std::vector<fm::AppliedMove>;
   using Batches = tbb::concurrent_vector<std::pair<SeedNodesVec, MovesVec>>;
 
-  std::vector<fm::BatchStats>
+  [[nodiscard]] std::vector<fm::BatchStats>
   dbg_compute_batch_stats(const PartitionedGraph &graph, Batches next_batches) const;
 
-  std::pair<PartitionedGraph, Batches>
+  [[nodiscard]] std::pair<PartitionedGraph, Batches>
   dbg_build_prev_p_graph(const PartitionedGraph &p_graph, Batches next_batches) const;
 
-  fm::BatchStats dbg_compute_single_batch_stats_in_sequence(
+  [[nodiscard]] fm::BatchStats dbg_compute_single_batch_stats_in_sequence(
       PartitionedGraph &p_graph,
       const std::vector<NodeID> &seeds,
       const std::vector<fm::AppliedMove> &moves,
       const std::vector<NodeID> &distances
   ) const;
 
-  std::vector<NodeID> dbg_compute_batch_distances(
+  [[nodiscard]] std::vector<NodeID> dbg_compute_batch_distances(
       const Graph &graph,
       const std::vector<NodeID> &seeds,
       const std::vector<fm::AppliedMove> &moves
@@ -276,7 +276,7 @@ class FMRefiner : public Refiner {
 
 template <
     typename DeltaPartitionedGraph = fm::DefaultDeltaPartitionedGraph,
-    typename GainCache = fm::DenseGainCache>
+    typename GainCache = fm::SparseGainCache>
 class LocalizedFMRefiner {
 public:
   LocalizedFMRefiner(
@@ -313,23 +313,34 @@ class LocalizedFMRefiner {
   const PartitionContext &_p_ctx;
   const KwayFMRefinementContext &_fm_ctx;
 
-  // Graph to work on
+  // Shared: Graph to work on
   PartitionedGraph &_p_graph;
 
-  // Data shared among all workers
+  // Shared: Data shared among all workers
   fm::SharedData<GainCache> &_shared;
 
-  // Data local to this worker
-  DeltaPartitionedGraph _d_graph;                                               // O(|Delta|) space
-  typename GainCache::template DeltaCache<DeltaPartitionedGraph> _d_gain_cache; // O(|Delta|) space
-  BinaryMaxHeap<EdgeWeight> _block_pq;                                          // O(k) space
-  std::vector<SharedBinaryMaxHeap<EdgeWeight>> _node_pqs; // O(k + |Touched|) space
+  // Thread-local: O(|Delta|) space
+  DeltaPartitionedGraph _d_graph;
+
+  // Thread local: O(|Delta|) sparse
+  using DeltaGainCache = typename GainCache::template DeltaCache<DeltaPartitionedGraph>;
+  DeltaGainCache _d_gain_cache;
+
+  // Thread local: O(k) space
+  BinaryMaxHeap<EdgeWeight> _block_pq;
+
+  // Thread local: O(k + |Touched|) space
+  std::vector<SharedBinaryMaxHeap<EdgeWeight>> _node_pqs;
 
   AdaptiveStoppingPolicy _stopping_policy;
 
+  // Thread local: O(|Touched|) space
   std::vector<NodeID> _touched_nodes;
+
+  // Thread local: O(1) space
   std::vector<NodeID> _seed_nodes;
 
+  // Thread local: O(|Touched|) space if move recording is enabled
   std::vector<fm::AppliedMove> _applied_moves;
   bool _record_applied_moves = false;
 };
diff --git a/kaminpar-shm/refinement/gains/dense_gain_cache.h b/kaminpar-shm/refinement/gains/sparse_gain_cache.h
similarity index 90%
rename from kaminpar-shm/refinement/gains/dense_gain_cache.h
rename to kaminpar-shm/refinement/gains/sparse_gain_cache.h
index 8329fa8a..5c08f139 100644
--- a/kaminpar-shm/refinement/gains/dense_gain_cache.h
+++ b/kaminpar-shm/refinement/gains/sparse_gain_cache.h
@@ -2,7 +2,7 @@
  * Gain cache that caches one gain for each node and block, using a total of
  * O(|V| * k) memory.
  *
- * @file:   dense_gain_cache.h
+ * @file:   sparse_gain_cache.h
  * @author: Daniel Seemaier
  * @date:   15.03.2023
  ******************************************************************************/
@@ -24,15 +24,15 @@
 #include "kaminpar-common/timer.h"
 
 namespace kaminpar::shm {
-template <typename DeltaPartitionedGraph, typename GainCache> class DenseDeltaGainCache;
+template <typename DeltaPartitionedGraph, typename GainCache> class SparseDeltaGainCache;
 
-template <bool iterate_exact_gains = false> class DenseGainCache {
-  using Self = DenseGainCache<iterate_exact_gains>;
-  template <typename, typename> friend class DenseDeltaGainCache;
+template <bool iterate_exact_gains = false> class SparseGainCache {
+  using Self = SparseGainCache<iterate_exact_gains>;
+  template <typename, typename> friend class SparseDeltaGainCache;
 
 public:
   template <typename DeltaPartitionedGraph>
-  using DeltaCache = DenseDeltaGainCache<DeltaPartitionedGraph, Self>;
+  using DeltaCache = SparseDeltaGainCache<DeltaPartitionedGraph, Self>;
 
   // gains() will iterate over all blocks, including those not adjacent to the node.
   constexpr static bool kIteratesNonadjacentBlocks = true;
@@ -42,7 +42,7 @@ template <bool iterate_exact_gains = false> class DenseGainCache {
   // (more expensive, but safes a call to gain() if the exact gain for the best block is needed).
   constexpr static bool kIteratesExactGains = iterate_exact_gains;
 
-  DenseGainCache(const Context & /* ctx */, const NodeID max_n, const BlockID max_k)
+  SparseGainCache(const Context & /* ctx */, const NodeID max_n, const BlockID max_k)
       : _max_n(max_n),
         _max_k(max_k),
         _gain_cache(
@@ -160,7 +160,8 @@ template <bool iterate_exact_gains = false> class DenseGainCache {
     }
   }
 
-  bool check_cached_gain_for_node(const PartitionedGraph &p_graph, const NodeID u) const {
+  [[nodiscard]] bool
+  check_cached_gain_for_node(const PartitionedGraph &p_graph, const NodeID u) const {
     const BlockID block_u = p_graph.block(u);
     std::vector<EdgeWeight> actual_external_degrees(_k, 0);
     EdgeWeight actual_weighted_degree = 0;
@@ -200,19 +201,19 @@ template <bool iterate_exact_gains = false> class DenseGainCache {
   StaticArray<EdgeWeight> _weighted_degrees;
 };
 
-template <typename DeltaPartitionedGraph, typename GainCache> class DenseDeltaGainCache {
+template <typename DeltaPartitionedGraph, typename GainCache> class SparseDeltaGainCache {
 public:
   constexpr static bool kIteratesNonadjacentBlocks = GainCache::kIteratesNonadjacentBlocks;
   constexpr static bool kIteratesExactGains = GainCache::kIteratesExactGains;
 
-  DenseDeltaGainCache(const GainCache &gain_cache, const DeltaPartitionedGraph & /* d_graph */)
+  SparseDeltaGainCache(const GainCache &gain_cache, const DeltaPartitionedGraph & /* d_graph */)
       : _gain_cache(gain_cache) {}
 
-  EdgeWeight conn(const NodeID node, const BlockID block) const {
+  [[nodiscard]] EdgeWeight conn(const NodeID node, const BlockID block) const {
     return _gain_cache.conn(node, block) + conn_delta(node, block);
   }
 
-  EdgeWeight gain(const NodeID node, const BlockID from, const BlockID to) const {
+  [[nodiscard]] EdgeWeight gain(const NodeID node, const BlockID from, const BlockID to) const {
     return _gain_cache.gain(node, from, to) + conn_delta(node, to) - conn_delta(node, from);
   }
 
@@ -248,7 +249,7 @@ template <typename DeltaPartitionedGraph, typename GainCache> class DenseDeltaGa
   }
 
 private:
-  EdgeWeight conn_delta(const NodeID node, const BlockID block) const {
+  [[nodiscard]] EdgeWeight conn_delta(const NodeID node, const BlockID block) const {
     const auto it = _gain_cache_delta.get_if_contained(_gain_cache.index(node, block));
     return it != _gain_cache_delta.end() ? *it : 0;
   }
diff --git a/kaminpar-shm/refinement/jet/jet_refiner.cc b/kaminpar-shm/refinement/jet/jet_refiner.cc
index d7200aee..19d07e9f 100644
--- a/kaminpar-shm/refinement/jet/jet_refiner.cc
+++ b/kaminpar-shm/refinement/jet/jet_refiner.cc
@@ -12,7 +12,7 @@
 #include "kaminpar-shm/datastructures/partitioned_graph.h"
 #include "kaminpar-shm/metrics.h"
 #include "kaminpar-shm/refinement/balancer/greedy_balancer.h"
-#include "kaminpar-shm/refinement/gains/dense_gain_cache.h"
+#include "kaminpar-shm/refinement/gains/sparse_gain_cache.h"
 
 #include "kaminpar-common/datastructures/noinit_vector.h"
 #include "kaminpar-common/degree_buckets.h"
@@ -39,7 +39,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx
   SCOPED_TIMER("Jet Refiner");
 
   START_TIMER("Allocation");
-  DenseGainCache gain_cache(_ctx, p_graph.n(), p_graph.k());
+  SparseGainCache gain_cache(_ctx, p_graph.n(), p_graph.k());
   gain_cache.initialize(p_graph);
 
   NoinitVector<BlockID> next_partition(p_graph.n());
@@ -165,7 +165,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx
     TIMED_SCOPE("Update best partition") {
       if (final_cut <= best_cut) {
         p_graph.pfor_nodes([&](const NodeID u) { best_partition[u] = p_graph.block(u); });
-        last_iteration_is_best = true; 
+        last_iteration_is_best = true;
       } else {
         last_iteration_is_best = false;
       }