Skip to content

Commit

Permalink
refactor: unify Global and Local clusterers
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielSeemaier committed May 6, 2024
1 parent b4b6e71 commit b4707a2
Show file tree
Hide file tree
Showing 13 changed files with 146 additions and 191 deletions.
73 changes: 36 additions & 37 deletions kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,22 @@ SET_DEBUG(true);

HEMClusterer::HEMClusterer(const Context &ctx) : _input_ctx(ctx), _ctx(ctx.coarsening.hem) {}

void HEMClusterer::initialize(const DistributedGraph &graph) {
mpi::barrier(graph.communicator());
_graph = &graph;
void HEMClusterer::initialize_coloring() {
SCOPED_TIMER("Initialize HEM clustering");

const auto coloring = [&] {
// Graph is already sorted by a coloring -> reconstruct this coloring
// @todo if we always want to do this, optimize this refiner
if (graph.color_sorted()) {
if (_graph->color_sorted()) {
LOG << "Graph sorted by colors: using precomputed coloring";

NoinitVector<ColorID> coloring(graph.n()
); // We do not actually need the colors for ghost nodes
// We do not actually need the colors for ghost nodes
NoinitVector<ColorID> coloring(_graph->n());

// @todo parallelize
NodeID pos = 0;
for (ColorID c = 0; c < graph.number_of_colors(); ++c) {
const std::size_t size = graph.color_size(c);
for (ColorID c = 0; c < _graph->number_of_colors(); ++c) {
const std::size_t size = _graph->color_size(c);
std::fill(coloring.begin() + pos, coloring.begin() + pos + size, c);
pos += size;
}
Expand All @@ -47,14 +45,14 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {

// Otherwise, compute a coloring now
LOG << "Computing new coloring";
return compute_node_coloring_sequentially(graph, _ctx.chunks.compute(_input_ctx.parallel));
return compute_node_coloring_sequentially(*_graph, _ctx.chunks.compute(_input_ctx.parallel));
}();

const ColorID num_local_colors = *std::max_element(coloring.begin(), coloring.end()) + 1;
const ColorID num_colors = mpi::allreduce(num_local_colors, MPI_MAX, graph.communicator());
const ColorID num_colors = mpi::allreduce(num_local_colors, MPI_MAX, _graph->communicator());

TIMED_SCOPE("Allocation") {
_color_sorted_nodes.resize(graph.n());
_color_sorted_nodes.resize(_graph->n());
_color_sizes.resize(num_colors + 1);
_color_blacklist.resize(num_colors);
tbb::parallel_for<std::size_t>(0, _color_sorted_nodes.size(), [&](const std::size_t i) {
Expand All @@ -69,11 +67,11 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {
};

TIMED_SCOPE("Count color sizes") {
if (graph.color_sorted()) {
const auto &color_sizes = graph.get_color_sizes();
if (_graph->color_sorted()) {
const auto &color_sizes = _graph->get_color_sizes();
_color_sizes.assign(color_sizes.begin(), color_sizes.end());
} else {
graph.pfor_nodes([&](const NodeID u) {
_graph->pfor_nodes([&](const NodeID u) {
const ColorID c = coloring[u];
KASSERT(c < num_colors);
__atomic_fetch_add(&_color_sizes[c], 1, __ATOMIC_RELAXED);
Expand All @@ -83,11 +81,11 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {
};

TIMED_SCOPE("Sort nodes") {
if (graph.color_sorted()) {
if (_graph->color_sorted()) {
// @todo parallelize
std::iota(_color_sorted_nodes.begin(), _color_sorted_nodes.end(), 0);
} else {
graph.pfor_nodes([&](const NodeID u) {
_graph->pfor_nodes([&](const NodeID u) {
const ColorID c = coloring[u];
const std::size_t i = __atomic_sub_fetch(&_color_sizes[c], 1, __ATOMIC_SEQ_CST);
KASSERT(i < _color_sorted_nodes.size());
Expand All @@ -98,8 +96,8 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {

TIMED_SCOPE("Compute color blacklist") {
if (_ctx.small_color_blacklist == 0 ||
(_ctx.only_blacklist_input_level && graph.global_n() != _input_ctx.partition.graph->global_n
)) {
(_ctx.only_blacklist_input_level &&
_graph->global_n() != _input_ctx.partition.graph->global_n)) {
return;
}

Expand All @@ -113,7 +111,7 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {
asserting_cast<int>(num_colors),
mpi::type::get<GlobalNodeID>(),
MPI_SUM,
graph.communicator()
_graph->communicator()
);

// @todo parallelize the rest of this section
Expand All @@ -130,7 +128,7 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {
GlobalNodeID excluded_so_far = 0;
for (const ColorID c : sorted_by_size) {
excluded_so_far += global_color_sizes[c];
const double percentage = 1.0 * excluded_so_far / graph.global_n();
const double percentage = 1.0 * excluded_so_far / _graph->global_n();
if (percentage <= _ctx.small_color_blacklist) {
_color_blacklist[c] = 1;
} else {
Expand All @@ -140,38 +138,39 @@ void HEMClusterer::initialize(const DistributedGraph &graph) {
};

KASSERT(_color_sizes.front() == 0u);
KASSERT(_color_sizes.back() == graph.n());
KASSERT(_color_sizes.back() == _graph->n());
}

TIMED_SCOPE("Allocation") {
_matching.clear();
_matching.resize(graph.total_n());
tbb::parallel_for<NodeID>(0, graph.total_n(), [&](const NodeID u) {
_matching[u] = kInvalidGlobalNodeID;
});
};
void HEMClusterer::set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) {
_max_cluster_weight = max_cluster_weight;
}

HEMClusterer::ClusterArray &
HEMClusterer::cluster(const DistributedGraph &graph, GlobalNodeWeight max_cluster_weight) {
KASSERT(_graph == &graph, "must call initialize() before cluster()", assert::always);
void HEMClusterer::cluster(StaticArray<GlobalNodeID> &matching, const DistributedGraph &graph) {
_matching = std::move(matching);
_graph = &graph;

initialize_coloring();

SCOPED_TIMER("Compute HEM clustering");

tbb::parallel_for<NodeID>(0, graph.total_n(), [&](const NodeID u) {
matching[u] = kInvalidGlobalNodeID;
});

for (ColorID c = 0; c + 1 < _color_sizes.size(); ++c) {
compute_local_matching(c, max_cluster_weight);
compute_local_matching(c, _max_cluster_weight);
resolve_global_conflicts(c);
}

// Unmatched nodes become singleton clusters
_graph->pfor_all_nodes([&](const NodeID u) {
if (_matching[u] == kInvalidGlobalNodeID) {
_matching[u] = _graph->local_to_global_node(u);
if (matching[u] == kInvalidGlobalNodeID) {
matching[u] = _graph->local_to_global_node(u);
}
});

// Validate our matching
KASSERT(validate_matching(), "matching in inconsistent state", assert::always);

return _matching;
matching = std::move(_matching);
}

bool HEMClusterer::validate_matching() {
Expand Down
15 changes: 9 additions & 6 deletions kaminpar-dist/coarsening/clustering/hem/hem_clusterer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
#pragma once

#include "kaminpar-dist/algorithms/greedy_node_coloring.h"
#include "kaminpar-dist/coarsening/clustering/clusterer.h"
#include "kaminpar-dist/coarsening/clusterer.h"
#include "kaminpar-dist/context.h"
#include "kaminpar-dist/dkaminpar.h"

namespace kaminpar::dist {
class HEMClusterer : public GlobalClusterer {
class HEMClusterer : public Clusterer {
public:
HEMClusterer(const Context &ctx);

Expand All @@ -23,11 +23,13 @@ class HEMClusterer : public GlobalClusterer {
HEMClusterer(HEMClusterer &&) noexcept = default;
HEMClusterer &operator=(HEMClusterer &&) = delete;

void initialize(const DistributedGraph &graph) final;
void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) final;

ClusterArray &cluster(const DistributedGraph &graph, GlobalNodeWeight max_cluster_weight) final;
void cluster(StaticArray<GlobalNodeID> &matching, const DistributedGraph &graph) final;

private:
void initialize_coloring();

void compute_local_matching(ColorID c, GlobalNodeWeight max_cluster_weight);
void resolve_global_conflicts(ColorID c);

Expand All @@ -38,10 +40,11 @@ class HEMClusterer : public GlobalClusterer {

const DistributedGraph *_graph;

ClusterArray _matching;

NoinitVector<std::uint8_t> _color_blacklist;
NoinitVector<ColorID> _color_sizes;
NoinitVector<NodeID> _color_sorted_nodes;

GlobalNodeWeight _max_cluster_weight = 0;
StaticArray<GlobalNodeID> _matching;
};
} // namespace kaminpar::dist
27 changes: 13 additions & 14 deletions kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,31 @@ HEMLPClusterer::HEMLPClusterer(const Context &ctx)
: _lp(std::make_unique<GlobalLPClusterer>(ctx)),
_hem(std::make_unique<HEMClusterer>(ctx)) {}

void HEMLPClusterer::initialize(const DistributedGraph &graph) {
_lp->initialize(graph);
_hem->initialize(graph);
void HEMLPClusterer::set_max_cluster_weight(const GlobalNodeWeight weight) {
_lp->set_max_cluster_weight(weight);
_hem->set_max_cluster_weight(weight);
}

HEMLPClusterer::ClusterArray &
HEMLPClusterer::cluster(const DistributedGraph &graph, const GlobalNodeWeight max_cluster_weight) {
void HEMLPClusterer::cluster(StaticArray<GlobalNodeID> &clustering, const DistributedGraph &graph) {
_graph = &graph;

if (_fallback) {
return _lp->cluster(graph, max_cluster_weight);
_lp->cluster(clustering, graph);
} else {
auto &matching = _hem->cluster(graph, max_cluster_weight);
const GlobalNodeID new_size = compute_size_after_matching_contraction(matching);
_hem->cluster(clustering, graph);

// If the matching shrinks the graph by less than 10%, switch to label propagation
// @todo make this configurable
if (1.0 * new_size / graph.global_n() <= 0.9) { // Shrink by at least 10%
return matching;
const GlobalNodeID new_size = compute_size_after_matching_contraction(clustering);
if (1.0 * new_size / graph.global_n() > 0.9) {
_fallback = true;
cluster(clustering, graph);
}

_fallback = true;
return cluster(graph, max_cluster_weight);
}
}

GlobalNodeID HEMLPClusterer::compute_size_after_matching_contraction(const ClusterArray &clustering
GlobalNodeID
HEMLPClusterer::compute_size_after_matching_contraction(const StaticArray<GlobalNodeID> &clustering
) {
tbb::enumerable_thread_specific<NodeID> num_matched_edges_ets;
_graph->pfor_nodes([&](const NodeID u) {
Expand Down
16 changes: 8 additions & 8 deletions kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
******************************************************************************/
#pragma once

#include "kaminpar-dist/algorithms/greedy_node_coloring.h"
#include "kaminpar-dist/coarsening/clustering/clusterer.h"
#include "kaminpar-dist/coarsening/clusterer.h"
#include "kaminpar-dist/context.h"
#include "kaminpar-dist/dkaminpar.h"

namespace kaminpar::dist {
class HEMLPClusterer : public GlobalClusterer {
class HEMLPClusterer : public Clusterer {
public:
HEMLPClusterer(const Context &ctx);

Expand All @@ -23,17 +22,18 @@ class HEMLPClusterer : public GlobalClusterer {
HEMLPClusterer(HEMLPClusterer &&) noexcept = default;
HEMLPClusterer &operator=(HEMLPClusterer &&) = delete;

void initialize(const DistributedGraph &graph) final;
void set_max_cluster_weight(GlobalNodeWeight weight) final;

ClusterArray &cluster(const DistributedGraph &graph, GlobalNodeWeight max_cluster_weight) final;
void cluster(StaticArray<GlobalNodeID> &clustering, const DistributedGraph &graph) final;

private:
GlobalNodeID compute_size_after_matching_contraction(const ClusterArray &clustering);
GlobalNodeID compute_size_after_matching_contraction(const StaticArray<GlobalNodeID> &clustering);

const DistributedGraph *_graph;

bool _fallback = false;

std::unique_ptr<GlobalClusterer> _lp;
std::unique_ptr<GlobalClusterer> _hem;
std::unique_ptr<Clusterer> _lp;
std::unique_ptr<Clusterer> _hem;
};
} // namespace kaminpar::dist
58 changes: 25 additions & 33 deletions kaminpar-dist/coarsening/global_cluster_coarsener.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
******************************************************************************/
#include "kaminpar-dist/coarsening/global_cluster_coarsener.h"

#include "kaminpar-dist/coarsening/contraction/cluster_contraction.h"
#include "kaminpar-dist/coarsening/contraction/global_cluster_contraction.h"
#include "kaminpar-dist/datastructures/distributed_graph.h"
#include "kaminpar-dist/datastructures/distributed_partitioned_graph.h"
#include "kaminpar-dist/factories.h"
Expand Down Expand Up @@ -35,59 +35,51 @@ bool GlobalClusterCoarsener::coarsen() {
const DistributedGraph &graph = current();

StaticArray<GlobalNodeID> clustering(graph.total_n(), static_array::noinit);

_clusterer->set_max_cluster_weight(max_cluster_weight());
_clusterer->cluster(clustering, graph);

auto result = contract_clustering(graph, clustering, _input_ctx.coarsening);

auto coarse_graph = contract_clustering(graph, clustering, _input_ctx.coarsening);
KASSERT(
debug::validate_graph(result.graph),
debug::validate_graph(coarse_graph->get()),
"invalid graph after global cluster contraction",
assert::heavy
);
DBG << "Reduced number of nodes from " << graph.global_n() << " to " << result.graph.global_n();

if (!has_converged(graph, result.graph)) {
DBG << "... success";

_graph_hierarchy.push_back(std::move(result.graph));
_global_mapping_hierarchy.push_back(std::move(result.mapping));
_node_migration_history.push_back(std::move(result.migration));
if (!has_converged(graph, coarse_graph->get())) {
DBG << "... accepted coarsened graph";

_graph_hierarchy.push_back(std::move(coarse_graph));
return true;
}

DBG << "... converged due to insufficient shrinkage";
DBG << "... converged due to insufficient shrinkage, discarding last coarsening step";
return false;
}

DistributedPartitionedGraph GlobalClusterCoarsener::uncoarsen(DistributedPartitionedGraph &&p_graph
) {
const DistributedGraph *new_coarsest = nth_coarsest(1);

p_graph = project_partition(
*new_coarsest,
std::move(p_graph),
_global_mapping_hierarchy.back(),
_node_migration_history.back()
DistributedPartitionedGraph
GlobalClusterCoarsener::uncoarsen(DistributedPartitionedGraph &&p_c_graph) {
std::unique_ptr<CoarseGraph> c_graph = std::move(_graph_hierarchy.back());
KASSERT(
&c_graph->get() == &p_c_graph.graph(),
"given graph partition does not belong to the coarse graph"
);

_graph_hierarchy.pop_back();
const DistributedGraph &f_graph = current();

StaticArray<BlockID> f_partition(f_graph.total_n(), static_array::noinit);
c_graph->project(p_c_graph.partition(), f_partition);

DistributedPartitionedGraph p_f_graph(
&f_graph, p_c_graph.k(), std::move(f_partition), p_c_graph.take_block_weights()
);
KASSERT(
debug::validate_partition(p_graph),
debug::validate_partition(p_f_graph),
"invalid partition after projection to finer graph",
assert::heavy
);

_graph_hierarchy.pop_back();
_global_mapping_hierarchy.pop_back();
_node_migration_history.pop_back();

// if pop_back() on _graph_hierarchy caused a reallocation, the graph pointer
// in p_graph dangles
p_graph.UNSAFE_set_graph(coarsest());

return std::move(p_graph);
return p_f_graph;
}

bool GlobalClusterCoarsener::has_converged(
Expand All @@ -97,7 +89,7 @@ bool GlobalClusterCoarsener::has_converged(
}

const DistributedGraph &GlobalClusterCoarsener::current() const {
return _graph_hierarchy.empty() ? *_input_graph : _graph_hierarchy.back();
return _graph_hierarchy.empty() ? *_input_graph : _graph_hierarchy.back()->get();
}

std::size_t GlobalClusterCoarsener::level() const {
Expand Down
Loading

0 comments on commit b4707a2

Please sign in to comment.