diff --git a/README.MD b/README.MD index 66f64de6..de2c18ef 100644 --- a/README.MD +++ b/README.MD @@ -95,7 +95,9 @@ KaMinPar shm(int num_threads, shm::create_default_context()); // KaMinPar::reseed(int seed); shm.borrow_and_mutate_graph(NodeID n, EdgeID *xadj, NodeID *adjncy, NodeWeight *vwgt = nullptr, EdgeWeight *adjwgt = nullptr); // alternatively: shm.copy_graph(n, xadj, adjncy, vwgt, adjwgt); will work on a copy of the graph -shm.compute_partition(BlockID number_of_blocks, BlockID *out_partition); +shm.compute_partition(BlockID number_of_blocks, double epsilon, BlockID *out_partition); +// alternatively: shm.compute_partition(std::vector max_block_weights, BlockID *out_partition); +// Note: you must ensure that the total max block weight is larger than the total node weight of the graph // Call the distributed partitioner: dKaMinPar dist(MPI_Comm comm, int num_threads, dist::create_default_context()); diff --git a/apps/KaMinPar.cc b/apps/KaMinPar.cc index 7ac92e3d..40ecb5a3 100644 --- a/apps/KaMinPar.cc +++ b/apps/KaMinPar.cc @@ -56,6 +56,9 @@ struct ApplicationContext { float heap_profiler_min_struct_size = 10; BlockID k = 0; + double epsilon = 0.03; + std::vector max_block_weights = {}; + std::vector max_block_weight_factors = {}; bool quiet = false; bool experiment = false; @@ -103,15 +106,45 @@ The output should be stored in a file and can be used by the -C,--config option. // Mandatory -> ... or partition a graph auto *gp_group = mandatory->add_option_group("Partitioning")->silent(); - gp_group->add_option("-k,--k", app.k, "Number of blocks in the partition.") - ->configurable(false) - ->check(CLI::Range(static_cast(2), std::numeric_limits::max())) - ->required(); gp_group->add_option("-G,--graph", app.graph_filename, "Input graph in METIS format.") ->check(CLI::ExistingFile) ->configurable(false); + auto *partition_group = gp_group->add_option_group("Partition settings")->require_option(1); + partition_group + ->add_option( + "-k,--k", + app.k, + "Number of blocks in the partition. This option will be ignored if explicit block " + "weights are specified via --block-weights or --block-weight-factors." + ) + ->check(CLI::Range(static_cast(2), std::numeric_limits::max())); + partition_group + ->add_option( + "-B,--block-weights", + app.max_block_weights, + "Absolute max block weights, one weight for each block of the partition. If this " + "option is set, --epsilon will be ignored." + ) + ->check(CLI::NonNegativeNumber) + ->capture_default_str(); + partition_group->add_option( + "-b,--block-weight-factors", + app.max_block_weight_factors, + "Max block weights relative to the total node weight of the input graph, one factor for each " + "block of the partition. If this option is set, --epsilon will be ignored." + ); + // Application options + cli.add_option( + "-e,--epsilon", + app.epsilon, + "Maximum allowed imbalance, e.g. 0.03 for 3%. Must be greater than 0%. If maximum block " + "weights are specified explicitly via the --block-weights, this option will be ignored." + ) + ->check(CLI::NonNegativeNumber) + ->capture_default_str(); + cli.add_option("-s,--seed", app.seed, "Seed for random number generation.") ->default_val(app.seed); cli.add_flag("-q,--quiet", app.quiet, "Suppress all console output."); @@ -414,7 +447,30 @@ int main(int argc, char *argv[]) { // Compute partition partitioner.set_graph(std::move(graph)); - partitioner.compute_partition(app.k, partition.data()); + if (!app.max_block_weight_factors.empty()) { + const NodeWeight total_node_weight = partitioner.graph()->total_node_weight(); + app.max_block_weights.reserve(app.max_block_weight_factors.size()); + for (const double &factor : app.max_block_weight_factors) { + app.max_block_weights.push_back(std::ceil(factor * total_node_weight)); + } + } + + if (!app.max_block_weights.empty()) { + const BlockWeight total_block_weight = std::accumulate( + app.max_block_weights.begin(), app.max_block_weights.end(), static_cast(0) + ); + const NodeWeight total_node_weight = partitioner.graph()->total_node_weight(); + if (total_node_weight > total_block_weight) { + LOG_ERROR << "Total max block weights (" << total_block_weight + << ") is smaller than the total node weight (" << total_node_weight + << ") of the graph. This does not work. Please increase your max block weights."; + std::exit(1); + } + + partitioner.compute_partition(std::move(app.max_block_weights), partition.data()); + } else { + partitioner.compute_partition(app.k, app.epsilon, partition.data()); + } // Save graph partition if (!app.partition_filename.empty()) { diff --git a/apps/benchmarks/shm_label_propagation_benchmark.cc b/apps/benchmarks/shm_label_propagation_benchmark.cc index 8772838c..f0d7dc94 100644 --- a/apps/benchmarks/shm_label_propagation_benchmark.cc +++ b/apps/benchmarks/shm_label_propagation_benchmark.cc @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) { STOP_TIMER(); if (graph.sorted()) { - graph::integrate_isolated_nodes(graph, original_epsilon, ctx); + graph.integrate_isolated_nodes(); } cio::print_delimiter("Input Summary", '#'); diff --git a/apps/tools/shm_graph_rearrangement_tool.cc b/apps/tools/shm_graph_rearrangement_tool.cc index 5c1fe135..16ffd2dc 100644 --- a/apps/tools/shm_graph_rearrangement_tool.cc +++ b/apps/tools/shm_graph_rearrangement_tool.cc @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { LOG << "Rearranging graph..."; if (ctx.node_ordering == NodeOrdering::DEGREE_BUCKETS) { graph = graph::rearrange_by_degree_buckets(graph.csr_graph()); - graph::integrate_isolated_nodes(graph, ctx.partition.epsilon, ctx); + graph.integrate_isolated_nodes(); } if (ctx.edge_ordering == EdgeOrdering::COMPRESSION) { diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index deac8f70..0eeb9bcb 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -34,16 +34,6 @@ CLI::Option_group *create_graph_compression_options(CLI::App *app, Context &ctx) CLI::Option_group *create_partitioning_options(CLI::App *app, Context &ctx) { auto *partitioning = app->add_option_group("Partitioning"); - partitioning - ->add_option( - "-e,--epsilon", - ctx.partition.epsilon, - "Maximum allowed imbalance, e.g. 0.03 for 3%. Must be strictly " - "positive." - ) - ->check(CLI::NonNegativeNumber) - ->capture_default_str(); - // Partitioning options partitioning->add_option("-m,--p-mode", ctx.partitioning.mode) ->transform(CLI::CheckedTransformer(get_partitioning_modes()).description("")) @@ -341,6 +331,12 @@ CLI::Option_group *create_initial_partitioning_options(CLI::App *app, Context &c "--i-r-disable", ctx.initial_partitioning.refinement.disabled, "Disable initial refinement." ) ->capture_default_str(); + ip->add_flag( + "--i-adaptive-epsilon", + ctx.initial_partitioning.use_adaptive_epsilon, + "Use adaptive epsilon." + ) + ->capture_default_str(); return ip; } diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index a71be15e..e7c6fbf8 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -26,7 +26,7 @@ #include "kaminpar-dist/metrics.h" #include "kaminpar-dist/timer.h" -#include "kaminpar-shm/context.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/environment.h" diff --git a/kaminpar-dist/dkaminpar.h b/kaminpar-dist/dkaminpar.h index d34c6050..52912efe 100644 --- a/kaminpar-dist/dkaminpar.h +++ b/kaminpar-dist/dkaminpar.h @@ -351,6 +351,10 @@ struct PartitionContext { ~PartitionContext(); + [[nodiscard]] double inferred_epsilon() const { + return epsilon; + } + BlockID k = kInvalidBlockID; BlockID K = kInvalidBlockID; double epsilon; diff --git a/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc b/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc index 09755c2d..4a3620b9 100644 --- a/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc +++ b/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc @@ -16,6 +16,7 @@ #include "kaminpar-common/timer.h" namespace kaminpar::dist { + shm::PartitionedGraph KaMinParInitialPartitioner::initial_partition( const shm::Graph &graph, const PartitionContext &p_ctx ) { @@ -25,9 +26,8 @@ shm::PartitionedGraph KaMinParInitialPartitioner::initial_partition( auto shm_ctx = _ctx.initial_partitioning.kaminpar; shm_ctx.refinement.lp.num_iterations = 1; - shm_ctx.partition.k = p_ctx.k; - shm_ctx.partition.epsilon = p_ctx.epsilon; - shm_ctx.setup(graph); + shm_ctx.partition.setup(graph, p_ctx.k, p_ctx.epsilon); + shm_ctx.compression.setup(graph); DISABLE_TIMERS(); START_HEAP_PROFILER("KaMinPar"); @@ -40,4 +40,5 @@ shm::PartitionedGraph KaMinParInitialPartitioner::initial_partition( return p_graph; } + } // namespace kaminpar::dist diff --git a/kaminpar-shm/coarsening/cluster_coarsener.cc b/kaminpar-shm/coarsening/cluster_coarsener.cc index 197d775f..09152deb 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.cc +++ b/kaminpar-shm/coarsening/cluster_coarsener.cc @@ -21,7 +21,9 @@ namespace kaminpar::shm { namespace { + SET_DEBUG(false); + } ClusteringCoarsener::ClusteringCoarsener(const Context &ctx, const PartitionContext &p_ctx) diff --git a/kaminpar-shm/coarsening/max_cluster_weights.h b/kaminpar-shm/coarsening/max_cluster_weights.h index 3c834a71..5e2683b6 100644 --- a/kaminpar-shm/coarsening/max_cluster_weights.h +++ b/kaminpar-shm/coarsening/max_cluster_weights.h @@ -13,6 +13,7 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + template NodeWeight compute_max_cluster_weight( const CoarseningContext &c_ctx, @@ -24,12 +25,12 @@ NodeWeight compute_max_cluster_weight( switch (c_ctx.clustering.cluster_weight_limit) { case ClusterWeightLimit::EPSILON_BLOCK_WEIGHT: - max_cluster_weight = (p_ctx.epsilon * total_node_weight) / + max_cluster_weight = (p_ctx.infer_epsilon(total_node_weight) * total_node_weight) / std::clamp(n / c_ctx.contraction_limit, 2, p_ctx.k); break; case ClusterWeightLimit::BLOCK_WEIGHT: - max_cluster_weight = (1.0 + p_ctx.epsilon) * total_node_weight / p_ctx.k; + max_cluster_weight = (1.0 + p_ctx.epsilon()) * total_node_weight / p_ctx.k; break; case ClusterWeightLimit::ONE: @@ -55,12 +56,16 @@ NodeWeight compute_max_cluster_weight( switch (c_ctx.cluster_weight_limit) { case ClusterWeightLimit::EPSILON_BLOCK_WEIGHT: - max_cluster_weight = (p_ctx.epsilon * total_node_weight) / + max_cluster_weight = (p_ctx.inferred_epsilon() * total_node_weight) / std::clamp(n / c_ctx.contraction_limit, 2, p_ctx.k); break; case ClusterWeightLimit::BLOCK_WEIGHT: - max_cluster_weight = (1.0 + p_ctx.epsilon) * total_node_weight / p_ctx.k; + if constexpr (requires { p_ctx.epsilon(); }) { + max_cluster_weight = (1.0 + p_ctx.epsilon()) * total_node_weight / p_ctx.k; + } else { + max_cluster_weight = (1.0 + p_ctx.epsilon) * total_node_weight / p_ctx.k; + } break; case ClusterWeightLimit::ONE: @@ -74,4 +79,5 @@ NodeWeight compute_max_cluster_weight( return static_cast(max_cluster_weight * c_ctx.cluster_weight_multiplier); } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/context.cc b/kaminpar-shm/context.cc deleted file mode 100644 index 0b57da2c..00000000 --- a/kaminpar-shm/context.cc +++ /dev/null @@ -1,159 +0,0 @@ -/******************************************************************************* - * Context struct for KaMinPar. - * - * @file: context.cc - * @author: Daniel Seemaier - * @date: 21.09.2021 - ******************************************************************************/ -#include "kaminpar-shm/context.h" - -#include "kaminpar-shm/datastructures/graph.h" -#include "kaminpar-shm/partitioning/partition_utils.h" - -#include "kaminpar-common/assert.h" - -namespace kaminpar::shm { - -void GraphCompressionContext::setup(const Graph &graph) { - high_degree_encoding = CompressedGraph::kHighDegreeEncoding; - high_degree_threshold = CompressedGraph::kHighDegreeThreshold; - high_degree_part_length = CompressedGraph::kHighDegreePartLength; - interval_encoding = CompressedGraph::kIntervalEncoding; - interval_length_treshold = CompressedGraph::kIntervalLengthTreshold; - streamvbyte_encoding = CompressedGraph::kStreamVByteEncoding; - - if (enabled) { - const auto &compressed_graph = graph.compressed_graph(); - compression_ratio = compressed_graph.compression_ratio(); - size_reduction = compressed_graph.size_reduction(); - num_high_degree_nodes = compressed_graph.num_high_degree_nodes(); - num_high_degree_parts = compressed_graph.num_high_degree_parts(); - num_interval_nodes = compressed_graph.num_interval_nodes(); - num_intervals = compressed_graph.num_intervals(); - } -} - -// -// PartitionContext -// - -void PartitionContext::setup(const AbstractGraph &graph, const bool setup_block_weights) { - n = graph.n(); - m = graph.m(); - total_node_weight = graph.total_node_weight(); - total_edge_weight = graph.total_edge_weight(); - max_node_weight = graph.max_node_weight(); - - if (setup_block_weights) { - PartitionContext::setup_block_weights(); - } -} - -void PartitionContext::setup_block_weights() { - block_weights.setup(*this); -} - -// -// BlockWeightsContext -// - -void BlockWeightsContext::setup(const PartitionContext &p_ctx, const bool parallel) { - KASSERT(p_ctx.k != kInvalidBlockID, "PartitionContext::k not initialized"); - KASSERT(p_ctx.k != 0u, "PartitionContext::k not initialized"); - KASSERT( - p_ctx.total_node_weight != kInvalidNodeWeight, - "PartitionContext::total_node_weight not initialized" - ); - KASSERT( - p_ctx.max_node_weight != kInvalidNodeWeight, - "PartitionContext::max_node_weight not initialized" - ); - - const auto perfectly_balanced_block_weight = - static_cast(std::ceil(1.0 * p_ctx.total_node_weight / p_ctx.k)); - const auto max_block_weight = - static_cast((1.0 + p_ctx.epsilon) * perfectly_balanced_block_weight); - - _max_block_weights.resize(p_ctx.k); - _perfectly_balanced_block_weights.resize(p_ctx.k); - - const auto setup_block_weight = [&](const BlockID b) { - _perfectly_balanced_block_weights[b] = perfectly_balanced_block_weight; - - // relax balance constraint by max_node_weight on coarse levels only - if (p_ctx.max_node_weight == 1) { - _max_block_weights[b] = max_block_weight; - } else { - _max_block_weights[b] = std::max( - max_block_weight, perfectly_balanced_block_weight + p_ctx.max_node_weight - ); - } - }; - - if (parallel) { - tbb::parallel_for(0, p_ctx.k, setup_block_weight); - } else { - for (BlockID b = 0; b < p_ctx.k; ++b) { - setup_block_weight(b); - } - } -} - -void BlockWeightsContext::setup( - const PartitionContext &p_ctx, const BlockID input_k, const bool parallel -) { - KASSERT(p_ctx.k != kInvalidBlockID, "PartitionContext::k not initialized"); - KASSERT( - p_ctx.total_node_weight != kInvalidNodeWeight, - "PartitionContext::total_node_weight not initialized" - ); - KASSERT( - p_ctx.max_node_weight != kInvalidNodeWeight, - "PartitionContext::max_node_weight not initialized" - ); - - const double block_weight = 1.0 * p_ctx.total_node_weight / input_k; - - _max_block_weights.resize(p_ctx.k); - _perfectly_balanced_block_weights.resize(p_ctx.k); - - const auto setup_block_weight = [&](const BlockID b) { - const BlockID final_k = partitioning::compute_final_k(b, p_ctx.k, input_k); - - _perfectly_balanced_block_weights[b] = std::ceil(final_k * block_weight); - - const auto max_block_weight = - static_cast((1.0 + p_ctx.epsilon) * _perfectly_balanced_block_weights[b]); - - // Relax balance constraint by max_node_weight on coarse levels only - if (p_ctx.max_node_weight == 1) { - _max_block_weights[b] = max_block_weight; - } else { - _max_block_weights[b] = std::max( - max_block_weight, _perfectly_balanced_block_weights[b] + p_ctx.max_node_weight - ); - } - }; - - if (parallel) { - tbb::parallel_for(0, p_ctx.k, setup_block_weight); - } else { - for (BlockID b = 0; b < p_ctx.k; ++b) { - setup_block_weight(b); - } - } -} - -[[nodiscard]] const std::vector &BlockWeightsContext::all_max() const { - return _max_block_weights; -} - -[[nodiscard]] const std::vector &BlockWeightsContext::all_perfectly_balanced() const { - return _perfectly_balanced_block_weights; -} - -void Context::setup(const Graph &graph) { - compression.setup(graph); - partition.setup(graph); -} -} // namespace kaminpar::shm diff --git a/kaminpar-shm/context.h b/kaminpar-shm/context.h deleted file mode 100644 index 9ede816e..00000000 --- a/kaminpar-shm/context.h +++ /dev/null @@ -1,11 +0,0 @@ -/******************************************************************************* - * Context struct for KaMinPar. - * - * @file: context.h - * @author: Daniel Seemaier - * @date: 21.09.2021 - ******************************************************************************/ -#pragma once - -// Part of this header is defined in the public library header: -#include "kaminpar-shm/kaminpar.h" // IWYU pragma: export diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index 88e11e4f..f8db0519 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -505,7 +505,8 @@ void print(const RefinementContext &r_ctx, std::ostream &out) { } void print(const PartitionContext &p_ctx, std::ostream &out) { - const auto max_block_weight = static_cast(p_ctx.block_weights.max(0)); + // @todo rework block weights output + const auto max_block_weight = static_cast(p_ctx.max_block_weight(0)); const auto size = std::max( {static_cast(p_ctx.n), static_cast(p_ctx.m), max_block_weight} ); @@ -524,8 +525,9 @@ void print(const PartitionContext &p_ctx, std::ostream &out) { out << " (total weight: " << p_ctx.total_edge_weight << ")\n"; } out << "Number of blocks: " << p_ctx.k << "\n"; - out << "Maximum block weight: " << p_ctx.block_weights.max(0) << " (" - << p_ctx.block_weights.perfectly_balanced(0) << " + " << 100 * p_ctx.epsilon << "%)\n"; + out << "Maximum block weight: " << p_ctx.max_block_weight(0) << " (" + << p_ctx.perfectly_balanced_block_weight(0) << " + " << 100 * p_ctx.epsilon() << "% / " + << 100 * p_ctx.inferred_epsilon() << "%)\n"; } void print(const PartitioningContext &p_ctx, std::ostream &out) { @@ -556,4 +558,5 @@ void print(const Context &ctx, std::ostream &out) { cio::print_delimiter("Refinement", '-'); print(ctx.refinement, out); } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/abstract_graph.h b/kaminpar-shm/datastructures/abstract_graph.h index 2b9c592b..5f6686d3 100644 --- a/kaminpar-shm/datastructures/abstract_graph.h +++ b/kaminpar-shm/datastructures/abstract_graph.h @@ -66,6 +66,9 @@ class AbstractGraph { [[nodiscard]] virtual std::size_t bucket_size(const std::size_t bucket) const = 0; [[nodiscard]] virtual NodeID first_node_in_bucket(const std::size_t bucket) const = 0; [[nodiscard]] virtual NodeID first_invalid_node_in_bucket(const std::size_t bucket) const = 0; + + virtual void remove_isolated_nodes(const NodeID num_isolated_nodes) = 0; + virtual NodeID integrate_isolated_nodes() = 0; }; } // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/compressed_graph.cc b/kaminpar-shm/datastructures/compressed_graph.cc index c7a82f44..d0738432 100644 --- a/kaminpar-shm/datastructures/compressed_graph.cc +++ b/kaminpar-shm/datastructures/compressed_graph.cc @@ -105,7 +105,7 @@ void CompressedGraph::remove_isolated_nodes(const NodeID num_isolated_nodes) { } } -void CompressedGraph::integrate_isolated_nodes() { +NodeID CompressedGraph::integrate_isolated_nodes() { KASSERT(sorted()); const NodeID nonisolated_nodes = n(); @@ -124,6 +124,8 @@ void CompressedGraph::integrate_isolated_nodes() { if (_number_of_buckets == 0) { _number_of_buckets = 1; } + + return nonisolated_nodes; } } // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index e66a6a5c..06e6ffde 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -192,12 +192,16 @@ class CompressedGraph : public AbstractGraph { // Parallel iteration // + template inline void pfor_nodes_range(Lambda &&l) const { + tbb::parallel_for(tbb::blocked_range(0, n()), std::forward(l)); + } + template inline void pfor_nodes(Lambda &&l) const { - tbb::parallel_for(static_cast(0), n(), std::forward(l)); + tbb::parallel_for(0, n(), std::forward(l)); } template inline void pfor_edges(Lambda &&l) const { - tbb::parallel_for(static_cast(0), m(), std::forward(l)); + tbb::parallel_for(0, m(), std::forward(l)); } template @@ -260,9 +264,9 @@ class CompressedGraph : public AbstractGraph { // Isolated nodes // - void remove_isolated_nodes(const NodeID num_isolated_nodes); + void remove_isolated_nodes(NodeID num_isolated_nodes) final; - void integrate_isolated_nodes(); + NodeID integrate_isolated_nodes() final; // // Compressions statistics diff --git a/kaminpar-shm/datastructures/csr_graph.cc b/kaminpar-shm/datastructures/csr_graph.cc index 4f4fd535..4700bd1e 100644 --- a/kaminpar-shm/datastructures/csr_graph.cc +++ b/kaminpar-shm/datastructures/csr_graph.cc @@ -154,7 +154,7 @@ void CSRGraph::remove_isolated_nodes(const NodeID num_isolated_nodes) { } } -void CSRGraph::integrate_isolated_nodes() { +NodeID CSRGraph::integrate_isolated_nodes() { KASSERT(sorted()); const NodeID nonisolated_nodes = n(); @@ -173,6 +173,8 @@ void CSRGraph::integrate_isolated_nodes() { if (_number_of_buckets == 0) { _number_of_buckets = 1; } + + return isolated_nodes; } void CSRGraph::init_degree_buckets() { diff --git a/kaminpar-shm/datastructures/csr_graph.h b/kaminpar-shm/datastructures/csr_graph.h index 67a9f08c..6123c5d6 100644 --- a/kaminpar-shm/datastructures/csr_graph.h +++ b/kaminpar-shm/datastructures/csr_graph.h @@ -302,12 +302,16 @@ class CSRGraph : public AbstractGraph { // Parallel iteration // + template inline void pfor_nodes_range(Lambda &&l) const { + tbb::parallel_for(tbb::blocked_range(0, n()), std::forward(l)); + } + template inline void pfor_nodes(Lambda &&l) const { - tbb::parallel_for(static_cast(0), n(), std::forward(l)); + tbb::parallel_for(0, n(), std::forward(l)); } template inline void pfor_edges(Lambda &&l) const { - tbb::parallel_for(static_cast(0), m(), std::forward(l)); + tbb::parallel_for(0, m(), std::forward(l)); } template @@ -389,9 +393,9 @@ class CSRGraph : public AbstractGraph { // Isolated nodes // - void remove_isolated_nodes(const NodeID num_isolated_nodes); + void remove_isolated_nodes(NodeID num_isolated_nodes) final; - void integrate_isolated_nodes(); + NodeID integrate_isolated_nodes() final; // // Direct member access -- used for some "low level" operations diff --git a/kaminpar-shm/datastructures/graph.h b/kaminpar-shm/datastructures/graph.h index c7b9615b..fadfe9e9 100644 --- a/kaminpar-shm/datastructures/graph.h +++ b/kaminpar-shm/datastructures/graph.h @@ -131,6 +131,10 @@ class Graph : public AbstractGraph { // Parallel iteration // + template inline void pfor_nodes_range(Lambda &&l) const { + reified([&](auto &graph) { graph.pfor_nodes_range(std::forward(l)); }); + } + template inline void pfor_nodes(Lambda &&l) const { reified([&](auto &graph) { graph.pfor_nodes(std::forward(l)); }); } @@ -192,6 +196,14 @@ class Graph : public AbstractGraph { return _underlying_graph->first_invalid_node_in_bucket(bucket); } + void remove_isolated_nodes(const NodeID num_isolated_nodes) final { + _underlying_graph->remove_isolated_nodes(num_isolated_nodes); + } + + NodeID integrate_isolated_nodes() final { + return _underlying_graph->integrate_isolated_nodes(); + } + // // Access to the underlying graph // diff --git a/kaminpar-shm/datastructures/partitioned_graph.h b/kaminpar-shm/datastructures/partitioned_graph.h index 29c17429..40f60686 100644 --- a/kaminpar-shm/datastructures/partitioned_graph.h +++ b/kaminpar-shm/datastructures/partitioned_graph.h @@ -271,6 +271,13 @@ template class GenericPartitionedGraph : public GraphDelega private: void init_block_weights_par() { + if (k() >= 65536) { + this->pfor_nodes([&](const NodeID u) { + __atomic_fetch_add(&_block_weights[block(u)], this->node_weight(u), __ATOMIC_RELAXED); + }); + return; + } + tbb::enumerable_thread_specific> block_weights_ets([&] { return StaticArray(k()); }); diff --git a/kaminpar-shm/factories.cc b/kaminpar-shm/factories.cc index 304f4f47..7529e4d3 100644 --- a/kaminpar-shm/factories.cc +++ b/kaminpar-shm/factories.cc @@ -13,7 +13,7 @@ // Partitioning schemes #include "kaminpar-shm/partitioning/deep/deep_multilevel.h" #include "kaminpar-shm/partitioning/kway/kway_multilevel.h" -#include "kaminpar-shm/partitioning/rb/rb_multilevel.h" +// #include "kaminpar-shm/partitioning/rb/rb_multilevel.h" // Clusterings #include "kaminpar-shm/coarsening/clustering/lp_clusterer.h" @@ -41,7 +41,8 @@ std::unique_ptr create_partitioner(const Graph &graph, const Contex return std::make_unique(graph, ctx); case PartitioningMode::RB: - return std::make_unique(graph, ctx); + return nullptr; + //return std::make_unique(graph, ctx); case PartitioningMode::KWAY: return std::make_unique(graph, ctx); diff --git a/kaminpar-shm/graphutils/permutator.cc b/kaminpar-shm/graphutils/permutator.cc index fed1a74c..3a29577c 100644 --- a/kaminpar-shm/graphutils/permutator.cc +++ b/kaminpar-shm/graphutils/permutator.cc @@ -238,84 +238,6 @@ void reorder_edges_by_compression(CSRGraph &graph) { }); } -template -std::pair -find_isolated_nodes_info(const NodeContainer &nodes, const NodeWeightContainer &node_weights) { - KASSERT((node_weights.empty() || node_weights.size() + 1 == nodes.size())); - - tbb::enumerable_thread_specific isolated_nodes; - tbb::enumerable_thread_specific isolated_nodes_weights; - const bool is_weighted = !node_weights.empty(); - - const NodeID n = nodes.size() - 1; - tbb::parallel_for(tbb::blocked_range(0, n), [&](const tbb::blocked_range &r) { - NodeID &local_isolated_nodes = isolated_nodes.local(); - NodeWeight &local_isolated_weights = isolated_nodes_weights.local(); - - for (NodeID u = r.begin(); u != r.end(); ++u) { - if (nodes[u] == nodes[u + 1]) { - ++local_isolated_nodes; - local_isolated_weights += is_weighted ? node_weights[u] : 1; - } - } - }); - - return {isolated_nodes.combine(std::plus{}), isolated_nodes_weights.combine(std::plus{})}; -} - -template -void remove_isolated_nodes_generic_graph(Graph &graph, PartitionContext &p_ctx) { - auto &nodes = graph.raw_nodes(); - auto &node_weights = graph.raw_node_weights(); - - const NodeWeight total_node_weight = - node_weights.empty() ? nodes.size() - 1 : parallel::accumulate(node_weights, 0); - const auto [isolated_nodes, isolated_nodes_weight] = - find_isolated_nodes_info(nodes, node_weights); - - const NodeID old_n = nodes.size() - 1; - const NodeID new_n = old_n - isolated_nodes; - const NodeWeight new_weight = total_node_weight - isolated_nodes_weight; - - const BlockID k = p_ctx.k; - const double old_max_block_weight = (1 + p_ctx.epsilon) * std::ceil(1.0 * total_node_weight / k); - const double new_epsilon = - new_weight > 0 ? old_max_block_weight / std::ceil(1.0 * new_weight / k) - 1 : 0.0; - p_ctx.epsilon = new_epsilon; - p_ctx.n = new_n; - p_ctx.total_node_weight = new_weight; - - graph.remove_isolated_nodes(isolated_nodes); -} - -void remove_isolated_nodes(Graph &graph, PartitionContext &p_ctx) { - SCOPED_TIMER("Remove isolated nodes"); - graph.reified([&](auto &graph) { remove_isolated_nodes_generic_graph(graph, p_ctx); }); -} - -template -NodeID integrate_isolated_nodes_generic_graph(Graph &graph, const double epsilon, Context &ctx) { - const NodeID num_nonisolated_nodes = graph.n(); // this becomes the first isolated node - - graph.integrate_isolated_nodes(); - - const NodeID num_isolated_nodes = graph.n() - num_nonisolated_nodes; - - // note: max block weights should not change - ctx.partition.epsilon = epsilon; - - return num_isolated_nodes; -} - -NodeID integrate_isolated_nodes(Graph &graph, double epsilon, Context &ctx) { - NodeID num_isolated_nodes = graph.reified([&](auto &graph) { - return integrate_isolated_nodes_generic_graph(graph, epsilon, ctx); - }); - - ctx.setup(graph); - return num_isolated_nodes; -} - PartitionedGraph assign_isolated_nodes( PartitionedGraph p_graph, const NodeID num_isolated_nodes, const PartitionContext &p_ctx ) { @@ -326,19 +248,18 @@ PartitionedGraph assign_isolated_nodes( RECORD("partition") StaticArray partition(graph.n(), static_array::noinit); - // copy partition of non-isolated nodes + // Copy partition of non-isolated nodes tbb::parallel_for(0, num_nonisolated_nodes, [&](const NodeID u) { partition[u] = p_graph.block(u); }); - // now append the isolated ones + // Now append the isolated ones const BlockID k = p_graph.k(); auto block_weights = p_graph.take_raw_block_weights(); BlockID b = 0; - // TODO parallelize this for (NodeID u = num_nonisolated_nodes; u < num_nonisolated_nodes + num_isolated_nodes; ++u) { - while (b + 1 < k && block_weights[b] + graph.node_weight(u) > p_ctx.block_weights.max(b)) { + while (b + 1 < k && block_weights[b] + graph.node_weight(u) > p_ctx.max_block_weight(b)) { ++b; } partition[u] = b; @@ -348,4 +269,20 @@ PartitionedGraph assign_isolated_nodes( return {graph, k, std::move(partition)}; } +NodeID count_isolated_nodes(const Graph &graph) { + tbb::enumerable_thread_specific isolated_nodes_ets; + + graph.pfor_nodes_range([&](const auto &range) { + auto &isolated_nodes = isolated_nodes_ets.local(); + + for (NodeID u = range.begin(); u != range.end(); ++u) { + if (graph.degree(u) == 0) { + ++isolated_nodes; + } + } + }); + + return isolated_nodes_ets.combine(std::plus{}); +} + } // namespace kaminpar::shm::graph diff --git a/kaminpar-shm/graphutils/permutator.h b/kaminpar-shm/graphutils/permutator.h index 07e13768..9d39f5ed 100644 --- a/kaminpar-shm/graphutils/permutator.h +++ b/kaminpar-shm/graphutils/permutator.h @@ -240,24 +240,6 @@ Graph rearrange_by_degree_buckets(CSRGraph &graph); */ void reorder_edges_by_compression(CSRGraph &graph); -/*! - * Removes the isolated nodes of a graph which are located at the back of the graph. - * - * @param graph The graph whose isolated nodes to remove. - * @param p_ctx The parition context to update. - */ -void remove_isolated_nodes(Graph &graph, PartitionContext &p_ctx); - -/*! - * Integrates the isolated nodes of a graph that have been removed. - * - * @param graph The graph whose isolated nodes to integrate. - * @param epsilon The epsilon value before removing the integrated nodes. - * @param ctx The context to update. - * @return The number of isolated nodes integrated. - */ -NodeID integrate_isolated_nodes(Graph &graph, double epsilon, Context &ctx); - /*! * Assignes isolated nodes to a partition. * @@ -270,4 +252,6 @@ PartitionedGraph assign_isolated_nodes( PartitionedGraph p_graph, const NodeID num_isolated_nodes, const PartitionContext &p_ctx ); +NodeID count_isolated_nodes(const Graph &graph); + } // namespace kaminpar::shm::graph diff --git a/kaminpar-shm/graphutils/subgraph_extractor.cc b/kaminpar-shm/graphutils/subgraph_extractor.cc index 2f8ad6ca..4eeb09a3 100644 --- a/kaminpar-shm/graphutils/subgraph_extractor.cc +++ b/kaminpar-shm/graphutils/subgraph_extractor.cc @@ -21,7 +21,9 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm::graph { + namespace { + SET_DEBUG(false); template @@ -160,6 +162,7 @@ SequentialSubgraphExtractionResult extract_subgraphs_sequential_generic_graph( return {std::move(subgraphs), std::move(subgraph_positions)}; } + } // namespace SequentialSubgraphExtractionResult extract_subgraphs_sequential( @@ -253,6 +256,7 @@ lazy_extract_subgraphs_preprocessing(const PartitionedGraph &p_graph) { } namespace { + template shm::Graph extract_subgraph_generic_graph( const PartitionedGraph &p_graph, @@ -303,6 +307,7 @@ shm::Graph extract_subgraph_generic_graph( : StaticArray() )); } + } // namespace Graph extract_subgraph( @@ -320,6 +325,7 @@ Graph extract_subgraph( } namespace { + /* * Builds a block-induced subgraph for each block of a partitioned graph. Return * type contains a mapping that maps nodes from p_graph to nodes in the @@ -471,6 +477,7 @@ SubgraphExtractionResult extract_subgraphs_generic_graph( return {std::move(subgraphs), std::move(mapping), std::move(start_positions)}; } + } // namespace SubgraphExtractionResult extract_subgraphs( @@ -506,10 +513,10 @@ PartitionedGraph copy_subgraph_partitions( parallel::prefix_sum(k0.begin(), k0.end(), k0.begin()); DBG << "Copying partition after recursive bipartitioning: extended " << p_graph.k() - << "-way partition to " << k_prime << "-way, goal: " << input_k - << " with block offsets: " << k0; + << "-way partition to " << k_prime << "-way, goal: " << input_k; StaticArray partition = p_graph.take_raw_partition(); + p_graph.pfor_nodes([&](const NodeID u) { const BlockID b = partition[u]; const NodeID s_u = mapping[u]; @@ -518,10 +525,10 @@ PartitionedGraph copy_subgraph_partitions( PartitionedGraph new_p_graph(p_graph.graph(), k_prime, std::move(partition)); DBG << "Statistics after copying the subgraph partitions:"; - DBG << " Block weights: " << new_p_graph.raw_block_weights(); DBG << " Cut: " << metrics::edge_cut(new_p_graph); DBG << " Imbalance: " << metrics::imbalance(new_p_graph); return new_p_graph; } + } // namespace kaminpar::shm::graph diff --git a/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.cc index 343ed66e..b72d73ed 100644 --- a/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.cc @@ -15,6 +15,7 @@ #include "kaminpar-common/datastructures/queue.h" namespace kaminpar::shm { + using Queues = std::array, 2>; namespace bfs { @@ -38,7 +39,7 @@ struct lighter { struct sequential { BlockID operator()(const BlockID, const std::array &block_weights, const PartitionContext &context, const Queues &) { - return (block_weights[0] < context.block_weights.perfectly_balanced(0)) ? 0 : 1; + return (block_weights[0] < context.perfectly_balanced_block_weight(0)) ? 0 : 1; } }; @@ -146,7 +147,7 @@ void InitialBFSBipartitioner::fill_bipartition() { // than this version: const NodeWeight weight = _block_weights[active]; const bool assignment_allowed = - (weight + _graph->node_weight(u) <= _p_ctx->block_weights.max(active)); + (weight + _graph->node_weight(u) <= _p_ctx->max_block_weight(active)); active = assignment_allowed * active + (1 - assignment_allowed) * (1 - active); set_block(u, active); @@ -171,4 +172,5 @@ template class InitialBFSBipartitioner; template class InitialBFSBipartitioner; template class InitialBFSBipartitioner; template class InitialBFSBipartitioner; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.h index a31c1588..7c727a04 100644 --- a/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_bfs_bipartitioner.h @@ -17,12 +17,15 @@ #include "kaminpar-common/datastructures/queue.h" namespace kaminpar::shm { + namespace bfs { + struct alternating; // Switch between queues after each node struct lighter; // Use lighter queue next struct sequential; // Only use the first queue struct longer_queue; // Use longer queue next struct shorter_queue; // Use shorter queue next + } // namespace bfs /*! @@ -59,4 +62,5 @@ using LighterBlockBfsBipartitioner = InitialBFSBipartitioner; using SequentialBfsBipartitioner = InitialBFSBipartitioner; using LongerQueueBfsBipartitioner = InitialBFSBipartitioner; using ShorterQueueBfsBipartitioner = InitialBFSBipartitioner; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h b/kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h index e5bd4360..a0f96539 100644 --- a/kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h +++ b/kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h @@ -14,11 +14,88 @@ #include "kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.h" #include "kaminpar-shm/kaminpar.h" +#include "kaminpar-common/logger.h" + namespace kaminpar::shm { + +struct BipartitionTimingInfo { + std::uint64_t bipartitioner_init_ms = 0; + std::uint64_t bipartitioner_ms = 0; + std::uint64_t graph_init_ms = 0; + std::uint64_t extract_ms = 0; + std::uint64_t copy_ms = 0; + std::uint64_t misc_ms = 0; + InitialPartitionerTimings ip_timings{}; + + BipartitionTimingInfo &operator+=(const BipartitionTimingInfo &other) { + bipartitioner_init_ms += other.bipartitioner_init_ms; + bipartitioner_ms += other.bipartitioner_ms; + graph_init_ms += other.graph_init_ms; + extract_ms += other.extract_ms; + copy_ms += other.copy_ms; + misc_ms += other.misc_ms; + ip_timings += other.ip_timings; + return *this; + } +}; + class InitialBipartitionerWorkerPool { + SET_DEBUG(true); + public: explicit InitialBipartitionerWorkerPool(const Context &ctx) : _ctx(ctx) {} + PartitionedGraph bipartition( + const Graph *graph, + const BlockID current_block, + const BlockID current_k, + const bool partition_lifespan + ) { + const CSRGraph *csr = dynamic_cast(graph->underlying_graph()); + + // If we work with something other than a CSRGraph, construct a CSR copy to call the initial + // partitioning code. This is only necessary if the graph is too small for coarsening *and* we + // are using graph compression. + std::unique_ptr csr_copy; + if (csr == nullptr) { + DBG << "Bipartitioning a non-CSR graph is not supported by the initial partitioning code: " + "constructing a CSR-graph copy of the given graph with n=" + << graph->n() << ", m=" << graph->m(); + csr_copy = std::make_unique(*graph); + csr = csr_copy.get(); + } + + auto bipartition = [&] { + if (graph->n() == 0) { + return StaticArray{}; + } + + InitialMultilevelBipartitioner bipartitioner = get(); + bipartitioner.initialize(*csr, current_block, current_k); + auto bipartition = bipartitioner.partition(nullptr).take_raw_partition(); + + if (partition_lifespan) { + StaticArray owned_bipartition(bipartition.size(), static_array::noinit); + std::copy(bipartition.begin(), bipartition.end(), owned_bipartition.begin()); + + put(std::move(bipartitioner)); + + return owned_bipartition; + } else { + put(std::move(bipartitioner)); + return bipartition; + } + }(); + + PartitionedGraph p_graph(PartitionedGraph::seq{}, *graph, 2, std::move(bipartition)); + return p_graph; + } + + void free() { + _pool_ets.clear(); + } + +private: InitialMultilevelBipartitioner get() { auto &pool = _pool_ets.local(); @@ -36,12 +113,8 @@ class InitialBipartitionerWorkerPool { pool.push_back(std::move(initial_partitioner)); } - void free() { - _pool_ets.clear(); - } - -private: const Context &_ctx; tbb::enumerable_thread_specific> _pool_ets; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_coarsener.cc b/kaminpar-shm/initial_partitioning/initial_coarsener.cc index 9d667b27..8f158773 100644 --- a/kaminpar-shm/initial_partitioning/initial_coarsener.cc +++ b/kaminpar-shm/initial_partitioning/initial_coarsener.cc @@ -12,8 +12,11 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { + namespace { + constexpr static bool kRandomizeNodeOrder = true; + } InitialCoarsener::InitialCoarsener(const InitialCoarseningContext &c_ctx) : _c_ctx(c_ctx) {} @@ -449,4 +452,5 @@ void InitialCoarsener::interleaved_visit_neighbor( _rating_map[_clustering[c_v].leader] += weight; } } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_coarsener.h b/kaminpar-shm/initial_partitioning/initial_coarsener.h index 7c66be02..05dacf7e 100644 --- a/kaminpar-shm/initial_partitioning/initial_coarsener.h +++ b/kaminpar-shm/initial_partitioning/initial_coarsener.h @@ -20,6 +20,7 @@ #include "kaminpar-common/random.h" namespace kaminpar::shm { + struct InitialCoarsenerTimings { std::uint64_t contract_ms = 0; std::uint64_t alloc_ms = 0; @@ -143,4 +144,5 @@ class InitialCoarsener { InitialCoarsenerTimings _timings{}; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.cc index 0c55eeff..d724d05c 100644 --- a/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.cc @@ -15,6 +15,7 @@ #include "kaminpar-common/datastructures/static_array.h" namespace kaminpar::shm { + void InitialFlatBipartitioner::init(const CSRGraph &graph, const PartitionContext &p_ctx) { KASSERT(p_ctx.k == 2u, "must be initialized with a 2-way partition context"); @@ -55,4 +56,5 @@ PartitionedCSRGraph InitialFlatBipartitioner::bipartition( return {*_graph, 2, std::move(_partition), std::move(_final_block_weights)}; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.h index 72dd6ba0..9e6cf39a 100644 --- a/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_flat_bipartitioner.h @@ -17,6 +17,7 @@ #include "kaminpar-common/datastructures/static_array.h" namespace kaminpar::shm { + class InitialFlatBipartitioner { public: InitialFlatBipartitioner(const InitialFlatBipartitioner &) = delete; @@ -45,8 +46,8 @@ class InitialFlatBipartitioner { // inline void add_to_smaller_block(const NodeID u) { - const NodeWeight delta1 = _block_weights[0] - _p_ctx->block_weights.perfectly_balanced(0); - const NodeWeight delta2 = _block_weights[1] - _p_ctx->block_weights.perfectly_balanced(1); + const NodeWeight delta1 = _block_weights[0] - _p_ctx->perfectly_balanced_block_weight(0); + const NodeWeight delta2 = _block_weights[1] - _p_ctx->perfectly_balanced_block_weight(1); const BlockID block = delta1 < delta2 ? V1 : V2; set_block(u, block); } @@ -81,4 +82,5 @@ class InitialFlatBipartitioner { StaticArray _partition; StaticArray _final_block_weights; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc b/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc index 675c0273..730ceccc 100644 --- a/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc +++ b/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc @@ -16,13 +16,17 @@ #include "kaminpar-common/logger.h" namespace kaminpar::shm { + namespace { + SET_DEBUG(false); + } using Queues = std::array, 2>; namespace fm { + void SimpleStoppingPolicy::init(const CSRGraph *) { reset(); } @@ -83,8 +87,8 @@ struct MaxWeightSelectionPolicy { const Queues &, Random &rand ) { - const auto weight0 = p_graph.block_weight(0) - context.block_weights.perfectly_balanced(0); - const auto weight1 = p_graph.block_weight(1) - context.block_weights.perfectly_balanced(1); + const auto weight0 = p_graph.block_weight(0) - context.perfectly_balanced_block_weight(0); + const auto weight1 = p_graph.block_weight(1) - context.perfectly_balanced_block_weight(1); return weight1 > weight0 || (weight0 == weight1 && rand.random_bool()); } }; @@ -118,9 +122,9 @@ struct MaxOverloadSelectionPolicy { Random &rand ) { const NodeWeight overload0 = - std::max(0, p_graph.block_weight(0) - context.block_weights.max(0)); + std::max(0, p_graph.block_weight(0) - context.max_block_weight(0)); const NodeWeight overload1 = - std::max(0, p_graph.block_weight(1) - context.block_weights.max(1)); + std::max(0, p_graph.block_weight(1) - context.max_block_weight(1)); if (overload0 == 0 && overload1 == 0) { return MaxGainSelectionPolicy()(p_graph, context, queues, rand); @@ -458,4 +462,5 @@ template class InitialFMRefiner< fm::MaxOverloadSelectionPolicy, fm::BalancedMinCutAcceptancePolicy, fm::AdaptiveStoppingPolicy>; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_fm_refiner.h b/kaminpar-shm/initial_partitioning/initial_fm_refiner.h index 8392e9d4..81709792 100644 --- a/kaminpar-shm/initial_partitioning/initial_fm_refiner.h +++ b/kaminpar-shm/initial_partitioning/initial_fm_refiner.h @@ -19,6 +19,7 @@ namespace kaminpar::shm { namespace fm { + struct SimpleStoppingPolicy { void init(const CSRGraph *graph); [[nodiscard]] bool should_stop(const InitialRefinementContext &fm_ctx); @@ -53,6 +54,7 @@ struct MaxGainSelectionPolicy; struct MaxOverloadSelectionPolicy; struct BalancedMinCutAcceptancePolicy; + } // namespace fm /*! @@ -105,6 +107,7 @@ class InitialFMRefiner : public InitialRefiner { StoppingPolicy _stopping_policy{}; Random &_rand = Random::instance(); + RandomPermutations _permutations; std::vector _chunks; }; @@ -118,4 +121,5 @@ using InitialAdaptive2WayFM = InitialFMRefiner< fm::MaxOverloadSelectionPolicy, fm::BalancedMinCutAcceptancePolicy, fm::AdaptiveStoppingPolicy>; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.cc index fa51fa72..b6760916 100644 --- a/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.cc @@ -16,6 +16,7 @@ #include "kaminpar-common/random.h" namespace kaminpar::shm { + void InitialGGGBipartitioner::init(const CSRGraph &graph, const PartitionContext &p_ctx) { InitialFlatBipartitioner::init(graph, p_ctx); @@ -64,7 +65,7 @@ void InitialGGGBipartitioner::fill_bipartition() { KASSERT(_queue.peek_key() == compute_gain(u), "invalid gain in queue", assert::heavy); _queue.pop(); change_block(u, V2); - if (_block_weights[V2] >= _p_ctx->block_weights.perfectly_balanced(V2)) { + if (_block_weights[V2] >= _p_ctx->perfectly_balanced_block_weight(V2)) { break; } @@ -89,7 +90,7 @@ void InitialGGGBipartitioner::fill_bipartition() { } }); } - } while (_block_weights[V2] < _p_ctx->block_weights.perfectly_balanced(V2)); + } while (_block_weights[V2] < _p_ctx->perfectly_balanced_block_weight(V2)); } [[nodiscard]] EdgeWeight InitialGGGBipartitioner::compute_gain(const NodeID u) const { @@ -105,4 +106,5 @@ void InitialGGGBipartitioner::fill_bipartition() { return gain; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.h index fffd94b1..ba1f48e7 100644 --- a/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_ggg_bipartitioner.h @@ -14,6 +14,7 @@ #include "kaminpar-common/datastructures/marker.h" namespace kaminpar::shm { + class InitialGGGBipartitioner : public InitialFlatBipartitioner { public: explicit InitialGGGBipartitioner(const InitialPoolPartitionerContext &pool_ctx) @@ -30,4 +31,5 @@ class InitialGGGBipartitioner : public InitialFlatBipartitioner { BinaryMinHeap _queue{0}; Marker<> _marker{0}; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.cc index 44c08ee2..8e09c65d 100644 --- a/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.cc @@ -35,8 +35,11 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { + namespace { + SET_DEBUG(false); + } InitialMultilevelBipartitioner::InitialMultilevelBipartitioner(const Context &ctx) @@ -46,22 +49,97 @@ InitialMultilevelBipartitioner::InitialMultilevelBipartitioner(const Context &ct _bipartitioner(std::make_unique(_i_ctx.pool)), _refiner(create_initial_refiner(_i_ctx.refinement)) {} -void InitialMultilevelBipartitioner::initialize(const CSRGraph &graph, const BlockID final_k) { - KASSERT(final_k > 0u); +// Note: `graph` is the `current_block`-th block-induced subgraph of some graph which is already +// partitioned into `current_k` blocks. +void InitialMultilevelBipartitioner::initialize( + const CSRGraph &graph, const BlockID current_block, const BlockID current_k +) { KASSERT(graph.n() > 0u); - _graph = &graph; - const auto [final_k1, final_k2] = math::split_integral(final_k); - _p_ctx = - partitioning::create_bipartition_context(graph, final_k1, final_k2, _ctx.partition, false); + // Through recursive bipartitioning, `current_block` (i.e., `graph`) will be subdivided further + // into a range of sub-blocks: R = [first_sub_block, first_invalid_sub_block). + const BlockID first_sub_block = + partitioning::compute_first_sub_block(current_block, current_k, _ctx.partition.k); + const BlockID first_invalid_sub_block = + partitioning::compute_first_invalid_sub_block(current_block, current_k, _ctx.partition.k); + const BlockID num_sub_blocks = + partitioning::compute_final_k(current_block, current_k, _ctx.partition.k); + + // The first `num_sub_blocks_b0` of `R` will be descendands of the first block of the bipartition + // that we are about to compute; the remaining ones will be descendands of the second block. + const auto [num_sub_blocks_b0, num_sub_blocks_b1] = math::split_integral(num_sub_blocks); + + // Based on this information, we can compute the maximum block weights by summing all maximum + // block weights of the corresponding sub-blocks. + std::vector max_block_weights{ + _ctx.partition.total_max_block_weights(first_sub_block, first_sub_block + num_sub_blocks_b0), + _ctx.partition.total_max_block_weights( + first_sub_block + num_sub_blocks_b0, first_invalid_sub_block + ) + }; + + DBG << "[" << current_block << "/" << current_k << "] Current weight " + << graph.total_node_weight() << ", spans sub-blocks [" << first_sub_block << ", " + << first_invalid_sub_block << "), split max weight " + << _ctx.partition.total_max_block_weights(first_sub_block, first_invalid_sub_block) + << " into " << max_block_weights[0] << " and " << max_block_weights[1]; + + // @todo: how to adapt the inferred epsilon when dealing with arbitrary block weights? + if (_ctx.partition.has_uniform_block_weights() && _i_ctx.use_adaptive_epsilon) { + // It can be beneficial to artifically "restrict" the maximum block weights of *this* + // bipartition, ensuring that there is enough wiggle room for further bipartitioning of the + // sub-blocks: this is based on the "adapted epsilon" strategy of KaHyPar. + const double base = (1.0 + _ctx.partition.inferred_epsilon()) * num_sub_blocks * + _ctx.partition.total_node_weight / _ctx.partition.k / + graph.total_node_weight(); + const double exponent = 1.0 / math::ceil_log2(num_sub_blocks); + const double epsilon_prime = std::pow(base, exponent) - 1.0; + const double adapted_eps = std::max(epsilon_prime, 0.0001); + + const BlockWeight total_max_weight = max_block_weights[0] + max_block_weights[1]; + std::array max_weight_ratios = { + 1.0 * max_block_weights[0] / total_max_weight, 1.0 * max_block_weights[1] / total_max_weight + }; + + for (const BlockID b : {0, 1}) { + max_block_weights[b] = (1.0 + adapted_eps) * graph.total_node_weight() * max_weight_ratios[b]; + } + + DBG << "[" << current_block << "/" << current_k << "]-> adapted epsilon from " + << _ctx.partition.epsilon() << " to " << adapted_eps << ", changing max block weights to " + << max_block_weights[0] << " + " << max_block_weights[1] + << ", will be relaxed with parameters max node weight " << graph.max_node_weight(); + + _p_ctx.setup(graph, std::move(max_block_weights), true); + + // @todo: we need this for the max cluster weight computation, where inferred epsilon might give + // slightly different values otherwise + // For now, only for testing, but keep in mind to update max_cluster_weight() to use + // inferred_epsilon() before removing this! + _p_ctx.set_epsilon(adapted_eps); + } else { + DBG << "[" << current_block << "/" << current_k + << "]j-> using original epsilon: " << _ctx.partition.epsilon() + << ", inferred from max block weights " << max_block_weights[0] << " and " + << max_block_weights[1]; + + _p_ctx.setup(graph, std::move(max_block_weights), true); + } _coarsener->init(graph); _refiner->init(graph); - const std::size_t num_bipartition_repetitions = - std::ceil(_i_ctx.pool.repetition_multiplier * final_k / math::ceil_log2(_ctx.partition.k)); + const int num_bipartition_repetitions = std::ceil( + _i_ctx.pool.repetition_multiplier * num_sub_blocks / math::ceil_log2(_ctx.partition.k) + ); _bipartitioner->set_num_repetitions(num_bipartition_repetitions); + + DBG << "[" << current_block << "/" << current_k + << "]--> max block weights: " << _p_ctx.max_block_weight(0) << " + " + << _p_ctx.max_block_weight(1) + << ", perfect block weights: " << _p_ctx.perfectly_balanced_block_weight(0) << " + " + << _p_ctx.perfectly_balanced_block_weight(1) << ", reps: " << num_bipartition_repetitions; } PartitionedCSRGraph InitialMultilevelBipartitioner::partition(InitialPartitionerTimings *timings) { @@ -107,7 +185,7 @@ const CSRGraph *InitialMultilevelBipartitioner::coarsen(InitialPartitionerTiming const CSRGraph *c_graph = _graph; bool shrunk = true; - DBG << "Coarsen: n=" << c_graph->n() << " m=" << c_graph->m(); + DBG << "Initial coarsening: n=" << c_graph->n() << " m=" << c_graph->m(); if (timings) { timings->coarsening_misc_ms += timer.elapsed(); } @@ -140,7 +218,7 @@ const CSRGraph *InitialMultilevelBipartitioner::coarsen(InitialPartitionerTiming } PartitionedCSRGraph InitialMultilevelBipartitioner::uncoarsen(PartitionedCSRGraph p_graph) { - DBG << "Uncoarsen: n=" << p_graph.n() << " m=" << p_graph.m(); + DBG << "Initial uncoarsening: n=" << p_graph.n() << " m=" << p_graph.m(); while (!_coarsener->empty()) { p_graph = _coarsener->uncoarsen(std::move(p_graph)); @@ -158,4 +236,5 @@ PartitionedCSRGraph InitialMultilevelBipartitioner::uncoarsen(PartitionedCSRGrap return p_graph; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.h index 5f0d304d..1c161f78 100644 --- a/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_multilevel_bipartitioner.h @@ -11,7 +11,7 @@ * * Constructing an object of InitialPartitioner is relatively expensive; * especially if one wants to compute *many* bipartitions (i.e., if k is large). - * Thus, objects should be kept in (thread-local!) memory and be re-used to + * Thus, objects should be kept in (thread-local) memory and be re-used to * compute multiple bipartitions (call init() for each new graph). * * Data structures are re-allocated to a larger size whenever necessary and never @@ -28,8 +28,10 @@ #include "kaminpar-shm/initial_partitioning/initial_coarsener.h" #include "kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.h" #include "kaminpar-shm/initial_partitioning/initial_refiner.h" +#include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + struct InitialPartitionerTimings { std::uint64_t coarsening_ms = 0; std::uint64_t coarsening_misc_ms = 0; @@ -57,8 +59,30 @@ class InitialMultilevelBipartitioner { public: explicit InitialMultilevelBipartitioner(const Context &ctx); - void initialize(const CSRGraph &graph, BlockID final_k); + /** + * Initializes the bipartitioner for bipartitioning a block-induced subgraph. + * + * This function prepares the bipartitioner to bipartition a subgraph extracted from a specific + * block `current_block` of an graph which is already partitioned into `current_k` blocks, where + * `current_k < ctx.partition.k`. The maximum block weights for the bipartition are computed based + * on the `ctx.partition` context. + * + * After initialization, the `partition()` method can be called to perform the bipartitioning. + * + * @param graph Subgraph extracted from a specific block of an already partitioned graph. + * @param current_block Block ID of the block from which the subgraph was extracted. + * @param current_k Number of blocks in the already partitioned graph. + */ + void initialize(const CSRGraph &graph, BlockID current_block, BlockID current_k); + /** + * Bipartitions the graph initialized by `initialize()`. + * + * The maximum block weights of the bipartition are computed based on the `ctx.partition` context + * and the information passed to `initialize()`. + * + * @return A partitioned graph with two blocks. + */ PartitionedCSRGraph partition(InitialPartitionerTimings *timings = nullptr); private: @@ -75,4 +99,5 @@ class InitialMultilevelBipartitioner { std::unique_ptr _bipartitioner; std::unique_ptr _refiner; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_noop_refiner.cc b/kaminpar-shm/initial_partitioning/initial_noop_refiner.cc index 4775321e..cd388981 100644 --- a/kaminpar-shm/initial_partitioning/initial_noop_refiner.cc +++ b/kaminpar-shm/initial_partitioning/initial_noop_refiner.cc @@ -12,9 +12,11 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + void InitialNoopRefiner::init(const CSRGraph &) {} bool InitialNoopRefiner::refine(PartitionedCSRGraph &, const PartitionContext &) { return false; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_noop_refiner.h b/kaminpar-shm/initial_partitioning/initial_noop_refiner.h index 6cf05491..e531a006 100644 --- a/kaminpar-shm/initial_partitioning/initial_noop_refiner.h +++ b/kaminpar-shm/initial_partitioning/initial_noop_refiner.h @@ -13,10 +13,12 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + class InitialNoopRefiner : public InitialRefiner { public: void init(const CSRGraph &graph) final; bool refine(PartitionedCSRGraph &p_graph, const PartitionContext &p_ctx) final; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.cc index ed9739fb..bae0c097 100644 --- a/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.cc @@ -19,8 +19,11 @@ #include "kaminpar-common/logger.h" namespace kaminpar::shm { + namespace { + SET_DEBUG(false); + } std::pair InitialPoolBipartitioner::RunningVariance::get() const { @@ -228,4 +231,5 @@ void InitialPoolBipartitioner::run_bipartitioner(const std::size_t i) { std::swap(_current_block_weights, _best_block_weights); } } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.h index b30c4420..5003a38a 100644 --- a/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_pool_bipartitioner.h @@ -20,6 +20,7 @@ #include "kaminpar-common/assert.h" namespace kaminpar::shm { + class InitialPoolBipartitioner { public: struct RunningVariance { @@ -108,4 +109,5 @@ class InitialPoolBipartitioner { std::vector _running_statistics{}; Statistics _statistics{}; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.cc b/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.cc index 9613b40a..be8b85ee 100644 --- a/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.cc +++ b/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.cc @@ -10,6 +10,7 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + InitialRandomBipartitioner::InitialRandomBipartitioner(const InitialPoolPartitionerContext &pool_ctx ) : InitialFlatBipartitioner(pool_ctx) {} @@ -19,11 +20,12 @@ void InitialRandomBipartitioner::fill_bipartition() { const std::size_t block = _rand.random_index(0, 2); if (_block_weights[block] + _graph->node_weight(u) < - _p_ctx->block_weights.perfectly_balanced(block)) { + _p_ctx->perfectly_balanced_block_weight(block)) { set_block(u, block); } else { add_to_smaller_block(u); } } } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.h b/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.h index e95d211f..95de1563 100644 --- a/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.h +++ b/kaminpar-shm/initial_partitioning/initial_random_bipartitioner.h @@ -12,6 +12,7 @@ #include "kaminpar-common/random.h" namespace kaminpar::shm { + class InitialRandomBipartitioner : public InitialFlatBipartitioner { public: explicit InitialRandomBipartitioner(const InitialPoolPartitionerContext &pool_ctx); @@ -21,4 +22,5 @@ class InitialRandomBipartitioner : public InitialFlatBipartitioner { Random &_rand = Random::instance(); }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_refiner.cc b/kaminpar-shm/initial_partitioning/initial_refiner.cc index 2559b1ea..4c689942 100644 --- a/kaminpar-shm/initial_partitioning/initial_refiner.cc +++ b/kaminpar-shm/initial_partitioning/initial_refiner.cc @@ -12,6 +12,7 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + std::unique_ptr create_initial_refiner(const InitialRefinementContext &r_ctx) { if (r_ctx.disabled) { return std::make_unique(); @@ -27,4 +28,5 @@ std::unique_ptr create_initial_refiner(const InitialRefinementCo __builtin_unreachable(); } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/initial_refiner.h b/kaminpar-shm/initial_partitioning/initial_refiner.h index df73cfc9..33f17ce1 100644 --- a/kaminpar-shm/initial_partitioning/initial_refiner.h +++ b/kaminpar-shm/initial_partitioning/initial_refiner.h @@ -12,6 +12,7 @@ #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { + class InitialRefiner { public: virtual ~InitialRefiner() = default; @@ -21,4 +22,5 @@ class InitialRefiner { }; std::unique_ptr create_initial_refiner(const InitialRefinementContext &r_ctx); + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/seed_node_utils.cc b/kaminpar-shm/initial_partitioning/seed_node_utils.cc index 9c27aa1d..9aad71bd 100644 --- a/kaminpar-shm/initial_partitioning/seed_node_utils.cc +++ b/kaminpar-shm/initial_partitioning/seed_node_utils.cc @@ -15,7 +15,9 @@ #include "kaminpar-common/random.h" namespace kaminpar::shm { + namespace { + std::pair find_furthest_away_node( const CSRGraph &graph, const NodeID start_node, Queue &queue, Marker<> &marker ) { @@ -64,6 +66,7 @@ std::pair find_furthest_away_node( queue.clear(); return {last_node, current_distance}; } + } // namespace std::pair find_far_away_nodes(const CSRGraph &graph, const int num_iterations) { @@ -96,4 +99,5 @@ std::pair find_far_away_nodes( return best_pair; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/seed_node_utils.h b/kaminpar-shm/initial_partitioning/seed_node_utils.h index e287ce7d..e4370560 100644 --- a/kaminpar-shm/initial_partitioning/seed_node_utils.h +++ b/kaminpar-shm/initial_partitioning/seed_node_utils.h @@ -16,6 +16,7 @@ #include "kaminpar-common/datastructures/queue.h" namespace kaminpar::shm { + /*! * Heuristic to find "far away" nodes for BFS initialization. Starts at a random seed * node and performs a BFS to find the furthest away node; repeats the process multiple @@ -48,4 +49,5 @@ std::pair find_far_away_nodes(const CSRGraph &graph, int num_ite std::pair find_far_away_nodes( const CSRGraph &graph, int num_iterations, Queue &queue, Marker<> &marker ); + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.cc b/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.cc index 7f982872..bef5e8b4 100644 --- a/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.cc +++ b/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.cc @@ -14,6 +14,7 @@ #include "kaminpar-common/datastructures/scalable_vector.h" namespace kaminpar::shm { + void SequentialGraphHierarchy::init(const CSRGraph &graph) { _finest_graph = &graph; @@ -189,4 +190,5 @@ CSRGraphMemory SequentialGraphHierarchy::alloc_graph_memory() { return memory; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.h b/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.h index 6a540523..e49c37cb 100644 --- a/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.h +++ b/kaminpar-shm/initial_partitioning/sequential_graph_hierarchy.h @@ -16,6 +16,7 @@ #include "kaminpar-common/datastructures/static_array.h" namespace kaminpar::shm { + class SequentialGraphHierarchy { public: SequentialGraphHierarchy() = default; @@ -65,4 +66,5 @@ class SequentialGraphHierarchy { ScalableVector> _partition_memory_cache; ScalableVector> _block_weights_memory_cache; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/kaminpar.cc b/kaminpar-shm/kaminpar.cc index e9894f33..8326b04f 100644 --- a/kaminpar-shm/kaminpar.cc +++ b/kaminpar-shm/kaminpar.cc @@ -7,6 +7,8 @@ ******************************************************************************/ #include "kaminpar-shm/kaminpar.h" +#include + #include "kaminpar-shm/context_io.h" #include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" @@ -22,7 +24,74 @@ namespace kaminpar { -using namespace shm; +namespace shm { + +void PartitionContext::setup( + const AbstractGraph &graph, + const BlockID k, + const double epsilon, + const bool relax_max_block_weights +) { + _epsilon = epsilon; + + // this->total_node_weight not yet initialized: use graph.total_node_weight instead + const BlockWeight perfectly_balanced_block_weight = + std::ceil(1.0 * graph.total_node_weight() / k); + std::vector max_block_weights(k, (1.0 + epsilon) * perfectly_balanced_block_weight); + setup(graph, std::move(max_block_weights), relax_max_block_weights); + + _uniform_block_weights = true; +} + +void PartitionContext::setup( + const AbstractGraph &graph, + std::vector max_block_weights, + const bool relax_max_block_weights +) { + original_n = graph.n(); + n = graph.n(); + m = graph.m(); + original_total_node_weight = graph.total_node_weight(); + total_node_weight = graph.total_node_weight(); + total_edge_weight = graph.total_edge_weight(); + max_node_weight = graph.max_node_weight(); + + k = static_cast(max_block_weights.size()); + _max_block_weights = std::move(max_block_weights); + _unrelaxed_max_block_weights = _max_block_weights; + _total_max_block_weights = std::accumulate( + _max_block_weights.begin(), _max_block_weights.end(), static_cast(0) + ); + _uniform_block_weights = false; + + if (relax_max_block_weights) { + const double eps = inferred_epsilon(); + for (BlockWeight &max_block_weight : _max_block_weights) { + max_block_weight = std::max( + max_block_weight, std::ceil(1.0 * max_block_weight / (1.0 + eps)) + max_node_weight + ); + } + } +} + +void GraphCompressionContext::setup(const Graph &graph) { + high_degree_encoding = CompressedGraph::kHighDegreeEncoding; + high_degree_threshold = CompressedGraph::kHighDegreeThreshold; + high_degree_part_length = CompressedGraph::kHighDegreePartLength; + interval_encoding = CompressedGraph::kIntervalEncoding; + interval_length_treshold = CompressedGraph::kIntervalLengthTreshold; + streamvbyte_encoding = CompressedGraph::kStreamVByteEncoding; + + if (enabled) { + const auto &compressed_graph = graph.compressed_graph(); + compression_ratio = compressed_graph.compression_ratio(); + size_reduction = compressed_graph.size_reduction(); + num_high_degree_nodes = compressed_graph.num_high_degree_nodes(); + num_high_degree_parts = compressed_graph.num_high_degree_parts(); + num_interval_nodes = compressed_graph.num_interval_nodes(); + num_intervals = compressed_graph.num_intervals(); + } +} namespace { @@ -38,7 +107,7 @@ void print_statistics( cio::print_delimiter("Result Summary"); - // statistics output that is easy to parse + // Statistics output that is easy to parse if (parseable) { LOG << "RESULT cut=" << cut << " imbalance=" << imbalance << " feasible=" << feasible << " k=" << p_graph.k(); @@ -76,10 +145,40 @@ void print_statistics( } else { LOG << logger::RED << " Feasible: no"; } + + LOG; + LOG << "Block weights:"; + + constexpr BlockID max_displayed_weights = 128; + + const int block_id_width = std::log10(std::min(max_displayed_weights, p_graph.k())) + 1; + const int block_weight_width = std::log10(ctx.partition.original_total_node_weight) + 1; + + for (BlockID b = 0; b < std::min(p_graph.k(), max_displayed_weights); ++b) { + std::stringstream ss; + ss << " w(" << std::left << std::setw(block_id_width) << b + << ") = " << std::setw(block_weight_width) << p_graph.block_weight(b); + if (p_graph.block_weight(b) > ctx.partition.max_block_weight(b)) { + LLOG << logger::RED << ss.str() << " "; + } else { + LLOG << ss.str() << " "; + } + if ((b % 4) == 3) { + LOG; + } + } + if (p_graph.k() > max_displayed_weights) { + LOG << "(only showing the first " << max_displayed_weights << " of " << p_graph.k() + << " blocks)"; + } } } // namespace +} // namespace shm + +using namespace shm; + KaMinPar::KaMinPar(const int num_threads, Context ctx) : _num_threads(num_threads), _ctx(std::move(ctx)), @@ -176,9 +275,22 @@ void KaMinPar::reseed(int seed) { Random::reseed(seed); } -EdgeWeight KaMinPar::compute_partition( - const BlockID k, BlockID *partition, const bool use_initial_node_ordering -) { +EdgeWeight KaMinPar::compute_partition(const BlockID k, BlockID *partition) { + return compute_partition(k, 0.03, partition); +} + +EdgeWeight KaMinPar::compute_partition(const BlockID k, const double epsilon, BlockID *partition) { + _ctx.partition.setup(*_graph_ptr, k, epsilon); + return compute_partition(partition); +} + +EdgeWeight +KaMinPar::compute_partition(std::vector max_block_weights, BlockID *partition) { + _ctx.partition.setup(*_graph_ptr, std::move(max_block_weights)); + return compute_partition(partition); +} + +EdgeWeight KaMinPar::compute_partition(BlockID *partition) { if (_output_level == OutputLevel::QUIET) { Logger::set_quiet_mode(true); } @@ -188,12 +300,8 @@ EdgeWeight KaMinPar::compute_partition( cio::print_build_datatypes(); cio::print_delimiter("Input Summary", '#'); - const double original_epsilon = _ctx.partition.epsilon; + _ctx.compression.setup(*_graph_ptr); _ctx.parallel.num_threads = _num_threads; - _ctx.partition.k = k; - - // Setup graph dependent context parameters - _ctx.setup(*_graph_ptr); // Initialize console output if (_output_level >= OutputLevel::APPLICATION) { @@ -226,7 +334,15 @@ EdgeWeight KaMinPar::compute_partition( // Cut off isolated nodes if the graph has been rearranged such that the isolated nodes are placed // at the end. if (_graph_ptr->sorted()) { - graph::remove_isolated_nodes(*_graph_ptr, _ctx.partition); + const NodeID num_isolated_nodes = graph::count_isolated_nodes(*_graph_ptr); + _graph_ptr->remove_isolated_nodes(num_isolated_nodes); + _ctx.partition.n = _graph_ptr->n(); + _ctx.partition.total_node_weight = _graph_ptr->total_node_weight(); + + cio::print_delimiter("Preprocessing"); + LOG << "Removed " << num_isolated_nodes << " isolated nodes"; + LOG << " Remaining nodes: " << _graph_ptr->n(); + LOG << " Remaining total node weight: " << _graph_ptr->total_node_weight(); } // Perform actual partitioning @@ -249,8 +365,7 @@ EdgeWeight KaMinPar::compute_partition( SCOPED_HEAP_PROFILER("Re-integrate isolated nodes"); SCOPED_TIMER("Re-integrate isolated nodes"); - const NodeID num_isolated_nodes = - graph::integrate_isolated_nodes(*_graph_ptr, original_epsilon, _ctx); + const NodeID num_isolated_nodes = _graph_ptr->integrate_isolated_nodes(); p_graph = graph::assign_isolated_nodes(std::move(p_graph), num_isolated_nodes, _ctx.partition); } @@ -258,7 +373,7 @@ EdgeWeight KaMinPar::compute_partition( STOP_HEAP_PROFILER(); START_TIMER("IO"); - if (_graph_ptr->permuted() && use_initial_node_ordering) { + if (_graph_ptr->permuted()) { tbb::parallel_for(0, p_graph.n(), [&](const NodeID u) { partition[u] = p_graph.block(_graph_ptr->map_original_node(u)); }); diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h index 81cf4a69..e1d6f1bd 100644 --- a/kaminpar-shm/kaminpar.h +++ b/kaminpar-shm/kaminpar.h @@ -8,9 +8,11 @@ #pragma once #include +#include #include #include #include +#include #include #include #include @@ -338,51 +340,110 @@ struct InitialPartitioningContext { InitialRefinementContext refinement; bool refine_pool_partition; + bool use_adaptive_epsilon; }; // // Application level // -class AbstractGraph; -class Graph; -struct PartitionContext; +struct PartitionContext { + NodeID original_n = kInvalidNodeID; + NodeID n = kInvalidNodeID; + EdgeID m = kInvalidEdgeID; + NodeWeight original_total_node_weight = kInvalidNodeWeight; + NodeWeight total_node_weight = kInvalidNodeWeight; + EdgeWeight total_edge_weight = kInvalidEdgeWeight; + NodeWeight max_node_weight = kInvalidNodeWeight; -struct BlockWeightsContext { - void setup(const PartitionContext &ctx, const bool parallel = true); - void setup(const PartitionContext &ctx, const BlockID input_k, const bool parallel = true); + BlockID k; + + [[nodiscard]] BlockWeight perfectly_balanced_block_weight(const BlockID block) const { + return std::ceil(1.0 * _unrelaxed_max_block_weights[block] / (1 + inferred_epsilon())); + } - [[nodiscard]] BlockWeight max(BlockID b) const { - return _max_block_weights[b]; + [[nodiscard]] BlockWeight max_block_weight(const BlockID block) const { + return _max_block_weights[block]; } - [[nodiscard]] const std::vector &all_max() const; + [[nodiscard]] BlockWeight total_max_block_weights(const BlockID begin, const BlockID end) const { + if (_uniform_block_weights) { + return _max_block_weights[begin] * (end - begin); + } - [[nodiscard]] BlockWeight perfectly_balanced(BlockID b) const { - return _perfectly_balanced_block_weights[b]; + return std::accumulate( + _max_block_weights.begin() + begin, + _max_block_weights.begin() + end, + static_cast(0) + ); } - [[nodiscard]] const std::vector &all_perfectly_balanced() const; + [[nodiscard]] BlockWeight + total_unrelaxed_max_block_weights(const BlockID begin, const BlockID end) const { + if (_uniform_block_weights) { + const double max = + (1.0 + inferred_epsilon()) * std::ceil(1.0 * (end - begin) * total_node_weight / k); + return max; + // return _unrelaxed_max_block_weights[begin] * (end - begin); + } + + return std::accumulate( + _unrelaxed_max_block_weights.begin() + begin, + _unrelaxed_max_block_weights.begin() + end, + static_cast(0) + ); + } -private: - std::vector _perfectly_balanced_block_weights; - std::vector _max_block_weights; -}; + [[nodiscard]] double epsilon() const { + return _epsilon < 0.0 ? inferred_epsilon() : _epsilon; + } -struct PartitionContext { - double epsilon; - BlockID k; + [[nodiscard]] double infer_epsilon(const NodeWeight actual_total_node_weight) const { + if (_uniform_block_weights) { + const double max = (1.0 + _epsilon) * std::ceil(1.0 * original_total_node_weight / k); + return max / std::ceil(1.0 * actual_total_node_weight / k) - 1.0; + } - BlockWeightsContext block_weights{}; - void setup_block_weights(); + return 1.0 * _total_max_block_weights / actual_total_node_weight - 1.0; + } - NodeID n = kInvalidNodeID; - EdgeID m = kInvalidEdgeID; - NodeWeight total_node_weight = kInvalidNodeWeight; - EdgeWeight total_edge_weight = kInvalidEdgeWeight; - NodeWeight max_node_weight = kInvalidNodeWeight; + [[nodiscard]] double inferred_epsilon() const { + return infer_epsilon(total_node_weight); + // return 1.0 * _total_max_block_weights / total_node_weight - 1.0; + } + + void set_epsilon(const double eps) { + _epsilon = eps; + } + + [[nodiscard]] bool has_epsilon() const { + return _epsilon > 0.0; + } + + [[nodiscard]] bool has_uniform_block_weights() const { + return _uniform_block_weights; + } + + void setup( + const class AbstractGraph &graph, + BlockID k, + double epsilon, + bool relax_max_block_weights = false + ); + + void setup( + const class AbstractGraph &graph, + std::vector max_block_weights, + bool relax_max_block_weights = false + ); - void setup(const AbstractGraph &graph, const bool setup_block_weights = true); +private: + std::vector _max_block_weights{}; + std::vector _unrelaxed_max_block_weights{}; + + BlockWeight _total_max_block_weights = 0; + double _epsilon = -1.0; + bool _uniform_block_weights = false; }; struct ParallelContext { @@ -437,7 +498,7 @@ struct GraphCompressionContext { std::size_t num_interval_nodes = std::numeric_limits::max(); std::size_t num_intervals = std::numeric_limits::max(); - void setup(const Graph &graph); + void setup(const class Graph &graph); }; struct Context { @@ -452,9 +513,8 @@ struct Context { RefinementContext refinement; ParallelContext parallel; DebugContext debug; - - void setup(const Graph &graph); }; + } // namespace kaminpar::shm // @@ -490,6 +550,12 @@ Context create_noref_context(); namespace kaminpar { +namespace shm { + +class Graph; + +} // namespace shm + class KaMinPar { public: KaMinPar(int num_threads, shm::Context ctx); @@ -575,20 +641,46 @@ class KaMinPar { void set_graph(shm::Graph graph); /*! - * Partitions the graph set by `borrow_and_mutate_graph()` or `copy_graph()` into `k` blocks. + * Partitions the graph set by `borrow_and_mutate_graph()` or `copy_graph()` into `k` blocks with + * a maximum imbalance of 3%. + * + * @param k Number of blocks. + * @param[out] partition Span of length `n` to store the partitioning. + * + * @return Expected edge cut of the partition. + */ + shm::EdgeWeight compute_partition(shm::BlockID k, shm::BlockID *partition); + + /*! + * Partitions the graph set by `borrow_and_mutate_graph()` or `copy_graph()` into `k` blocks with + * a maximum imbalance of `epsilon`. * - * @param k The number of blocks to partition the graph into. - * @param partition Array of length `n` for storing the partition. The caller is reponsible for - * allocating and freeing the memory. + * @param k Number of blocks. + * @param epsilon Balance constraint (e.g., 0.03 for max 3% imbalance). + * @param[out] partition Span of length `n` to store the partitioning. * - * @return The edge-cut of the partition. + * @return Expected edge cut of the partition. + */ + shm::EdgeWeight compute_partition(shm::BlockID k, double epsilon, shm::BlockID *partition); + + /*! + * Partitions the graph set by `borrow_and_mutate_graph()` or `copy_graph()` such that the + * weight of each block is upper bounded by `max_block_weights`. The number of blocks is given + * implicitly by the size of `max_block_weights`. + * + * @param max_block_weights Maximum weight for each block of the partition. + * @param[out] partition Span of length `n` to store the partitioning. + * + * @return Expected edge cut of the partition. */ shm::EdgeWeight - compute_partition(shm::BlockID k, shm::BlockID *partition, bool use_initial_node_ordering = true); + compute_partition(std::vector max_block_weights, shm::BlockID *partition); const shm::Graph *graph(); private: + shm::EdgeWeight compute_partition(shm::BlockID *partition); + int _num_threads; int _max_timer_depth = std::numeric_limits::max(); diff --git a/kaminpar-shm/metrics.h b/kaminpar-shm/metrics.h index 6b21604b..dada315e 100644 --- a/kaminpar-shm/metrics.h +++ b/kaminpar-shm/metrics.h @@ -73,12 +73,11 @@ template double imbalance(const PartitionedGraph &p_ } template -NodeWeight total_overload(const PartitionedGraph &p_graph, const PartitionContext &context) { +NodeWeight total_overload(const PartitionedGraph &p_graph, const PartitionContext &p_ctx) { NodeWeight total_overload = 0; for (const BlockID b : p_graph.blocks()) { - total_overload += - std::max(0, p_graph.block_weight(b) - context.block_weights.max(b)); + total_overload += std::max(0, p_graph.block_weight(b) - p_ctx.max_block_weight(b)); } return total_overload; @@ -87,7 +86,7 @@ NodeWeight total_overload(const PartitionedGraph &p_graph, const PartitionContex template bool is_balanced(const PartitionedGraph &p_graph, const PartitionContext &p_ctx) { return std::all_of(p_graph.blocks().begin(), p_graph.blocks().end(), [&](const BlockID b) { - return p_graph.block_weight(b) <= p_ctx.block_weights.max(b); + return p_graph.block_weight(b) <= p_ctx.max_block_weight(b); }); } @@ -105,4 +104,5 @@ template bool is_feasible(const PartitionedGraph &p_graph, const PartitionContext &p_ctx) { return is_balanced(p_graph, p_ctx); } + } // namespace kaminpar::shm::metrics diff --git a/kaminpar-shm/partitioning/debug.cc b/kaminpar-shm/partitioning/debug.cc index 50435c72..79a888c5 100644 --- a/kaminpar-shm/partitioning/debug.cc +++ b/kaminpar-shm/partitioning/debug.cc @@ -8,18 +8,20 @@ #include "kaminpar-shm/partitioning/debug.h" #include -#include +#include #include #include -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/graph.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/random.h" #include "kaminpar-common/strutils.h" namespace kaminpar::shm::debug { + namespace { + std::string generate_filename(const std::string &pattern, const Graph &graph, const Context &ctx) { std::string filename = pattern; return str::replace_all( @@ -29,7 +31,7 @@ std::string generate_filename(const std::string &pattern, const Graph &graph, co {"%n", std::to_string(graph.n())}, {"%m", std::to_string(graph.m())}, {"%k", std::to_string(ctx.partition.k)}, - {"%epsilon", std::to_string(ctx.partition.epsilon)}, + {"%epsilon", std::to_string(ctx.partition.inferred_epsilon())}, {"%P", std::to_string(ctx.parallel.num_threads)}, {"%seed", std::to_string(Random::get_seed())}, } @@ -45,6 +47,7 @@ std::string generate_partition_filename(const std::string &suffix, const Graph &graph, const Context &ctx) { return generate_filename(ctx.debug.dump_partition_filename + "." + suffix, graph, ctx); } + } // namespace void dump_coarsest_graph(const Graph &graph, const Context &ctx) { @@ -120,4 +123,69 @@ void dump_partition(const PartitionedGraph &p_graph, const std::string &filename out << p_graph.block(u) << "\n"; } } + +std::string describe_partition_context(const PartitionContext &p_ctx) { + std::stringstream ss; + + ss << p_ctx.k << "-way context (inferred epsilon = " << p_ctx.inferred_epsilon() << "):\n"; + ss << " Total node weight: " << p_ctx.total_node_weight << " (ctx)\n"; + ss << " Number of nodes: " << p_ctx.n << " (ctx)\n"; + ss << " Number of edges: " << p_ctx.m << " (ctx)\n"; + ss << " Max block weights: ["; + for (BlockID block = 0; block < p_ctx.k; ++block) { + ss << p_ctx.max_block_weight(block) << ", "; + } + ss << "\b\b]\n"; + ss << " PB block weights: ["; + for (BlockID block = 0; block < p_ctx.k; ++block) { + ss << p_ctx.perfectly_balanced_block_weight(block) << ", "; + } + ss << "\b\b]\n"; + return ss.str(); +} + +std::string +describe_partition_state(const PartitionedGraph &p_graph, const PartitionContext &p_ctx) { + std::stringstream ss; + + ss << p_graph.k() << "-way partition with " << p_ctx.k + << "-way context (inferred epsilon = " << p_ctx.inferred_epsilon() << "):\n"; + ss << " Total node weight: " << p_graph.total_node_weight() << " (graph) <-> " + << p_ctx.total_node_weight << " (ctx)\n"; + ss << " Number of nodes: " << p_graph.n() << " (graph) <-> " << p_ctx.n << " (ctx)\n"; + ss << " Number of edges: " << p_graph.m() << " (graph) <-> " << p_ctx.m << " (ctx)\n"; + if (p_graph.k() == p_ctx.k) { + ss << " Block weights: ["; + for (BlockID block : p_graph.blocks()) { + ss << p_graph.block_weight(block); + if (p_graph.block_weight(block) < p_ctx.max_block_weight(block)) { + ss << " < "; + } else if (p_graph.block_weight(block) > p_ctx.max_block_weight(block)) { + ss << " > "; + } else { + ss << " = "; + } + ss << p_ctx.max_block_weight(block) << ", "; + } + ss << "\b\b]\n"; + } else { + ss << " Block weights: ["; + for (BlockID block : p_graph.blocks()) { + ss << p_graph.block_weight(block) << ", "; + } + ss << "\b\b]\n"; + ss << " Max block weights: ["; + for (BlockID block : p_graph.blocks()) { + ss << p_ctx.max_block_weight(block) << ", "; + } + ss << "\b\b]\n"; + } + ss << " PB block weights: ["; + for (BlockID block : p_graph.blocks()) { + ss << p_ctx.perfectly_balanced_block_weight(block) << ", "; + } + ss << "\b\b]\n"; + return ss.str(); +} + } // namespace kaminpar::shm::debug diff --git a/kaminpar-shm/partitioning/debug.h b/kaminpar-shm/partitioning/debug.h index 50070ec5..6e8daeb5 100644 --- a/kaminpar-shm/partitioning/debug.h +++ b/kaminpar-shm/partitioning/debug.h @@ -9,11 +9,12 @@ #include -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" +#include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm::debug { + void dump_coarsest_graph(const Graph &graph, const Context &ctx); void dump_graph_hierarchy(const Graph &graph, int level, const Context &ctx); @@ -27,4 +28,10 @@ void dump_partition_hierarchy( ); void dump_partition(const PartitionedGraph &p_graph, const std::string &filename); + +std::string describe_partition_context(const PartitionContext &p_ctx); + +std::string +describe_partition_state(const PartitionedGraph &p_graph, const PartitionContext &p_ctx); + } // namespace kaminpar::shm::debug diff --git a/kaminpar-shm/partitioning/deep/async_initial_partitioning.cc b/kaminpar-shm/partitioning/deep/async_initial_partitioning.cc index 6791e8bf..fc0d49c6 100644 --- a/kaminpar-shm/partitioning/deep/async_initial_partitioning.cc +++ b/kaminpar-shm/partitioning/deep/async_initial_partitioning.cc @@ -14,13 +14,17 @@ #include #include +#include "kaminpar-shm/coarsening/coarsener.h" #include "kaminpar-shm/factories.h" #include "kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h" #include "kaminpar-shm/partitioning/partition_utils.h" namespace kaminpar::shm::partitioning { + namespace { + SET_DEBUG(true); + } AsyncInitialPartitioner::AsyncInitialPartitioner( @@ -45,16 +49,16 @@ PartitionedGraph AsyncInitialPartitioner::partition_recursive( // Base case: only one thread left <=> compute bipartition if (num_threads == 1) { - return bipartition(graph, _input_ctx.partition.k, _bipartitioner_pool, true); + return _bipartitioner_pool.bipartition(graph, 0, 1, true); } // Otherwise, coarsen further and proceed recursively auto coarsener = factory::create_coarsener(_input_ctx); coarsener->initialize(graph); - const bool shrunk = coarsen_once(coarsener.get(), graph, p_ctx); + const bool shrunk = coarsener->coarsen(); PartitionedGraph p_graph = split_and_join(coarsener.get(), p_ctx, !shrunk, num_threads); - p_graph = uncoarsen_once(coarsener.get(), std::move(p_graph), p_ctx, _input_ctx.partition); + p_graph = coarsener->uncoarsen(std::move(p_graph)); // The Context object is used to pre-allocate memory for the finest graph of the input hierarchy // Since this refiner is never used for the finest graph, we need to adjust the context to @@ -62,8 +66,11 @@ PartitionedGraph AsyncInitialPartitioner::partition_recursive( Context small_ctx = _input_ctx; small_ctx.partition.n = p_graph.n(); small_ctx.partition.m = p_graph.m(); + + p_ctx = create_kway_context(_input_ctx, p_graph); auto refiner = factory::create_refiner(small_ctx); - refine(refiner.get(), p_graph, p_ctx); + refiner->initialize(p_graph); + refiner->refine(p_graph, p_ctx); const BlockID k_prime = std::min( _input_ctx.partition.k, @@ -75,11 +82,11 @@ PartitionedGraph AsyncInitialPartitioner::partition_recursive( p_graph, k_prime, _input_ctx, - p_ctx, _tmp_extraction_mem_pool_ets, _bipartitioner_pool, num_threads ); + p_ctx = create_kway_context(_input_ctx, p_graph); } return p_graph; @@ -102,18 +109,19 @@ PartitionedGraph AsyncInitialPartitioner::split_and_join( for (std::size_t copy = 0; copy < num_copies; ++copy) { tg.run([this, - copy, + copy, // `copy` must be captured by value coarsener, threads_per_copy, &p_graphs, - &p_ctx_copies] { // must capture copy by value! + &p_ctx_copies] { p_graphs[copy] = partition_recursive(coarsener, p_ctx_copies[copy], threads_per_copy); }); } tg.wait(); - // select best result + // Select best result const std::size_t best = select_best(p_graphs, p_ctx); return std::move(p_graphs[best]); } + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/deep/async_initial_partitioning.h b/kaminpar-shm/partitioning/deep/async_initial_partitioning.h index 7a880f28..bd5a9abb 100644 --- a/kaminpar-shm/partitioning/deep/async_initial_partitioning.h +++ b/kaminpar-shm/partitioning/deep/async_initial_partitioning.h @@ -8,11 +8,13 @@ ******************************************************************************/ #pragma once +#include "kaminpar-shm/coarsening/coarsener.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" #include "kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h" #include "kaminpar-shm/partitioning/helper.h" namespace kaminpar::shm::partitioning { + class AsyncInitialPartitioner { public: AsyncInitialPartitioner( @@ -40,4 +42,5 @@ class AsyncInitialPartitioner { InitialBipartitionerWorkerPool &_bipartitioner_pool; TemporarySubgraphMemoryEts &_tmp_extraction_mem_pool_ets; }; + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/deep/deep_multilevel.cc b/kaminpar-shm/partitioning/deep/deep_multilevel.cc index e15605bb..dda2d0af 100644 --- a/kaminpar-shm/partitioning/deep/deep_multilevel.cc +++ b/kaminpar-shm/partitioning/deep/deep_multilevel.cc @@ -21,8 +21,12 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { + namespace { + SET_DEBUG(false); +constexpr static bool kDebugBlockWeights = false; + } // namespace using namespace partitioning; @@ -49,132 +53,15 @@ PartitionedGraph DeepMultilevelPartitioner::partition() { _refiner->set_output_prefix(" "); } - const Graph *c_graph = coarsen(); - PartitionedGraph p_graph = initial_partition(c_graph); - - SCOPED_HEAP_PROFILER("Uncoarsening"); - p_graph = uncoarsen(std::move(p_graph)); - - return p_graph; -} - -PartitionedGraph DeepMultilevelPartitioner::uncoarsen_once(PartitionedGraph p_graph) { - return partitioning::uncoarsen_once( - _coarsener.get(), std::move(p_graph), _current_p_ctx, _input_ctx.partition - ); -} - -void DeepMultilevelPartitioner::refine(PartitionedGraph &p_graph) { - SCOPED_HEAP_PROFILER("Refinement"); - - // If requested, dump the current partition to disk before refinement ... - debug::dump_partition_hierarchy(p_graph, _coarsener->level(), "pre-refinement", _input_ctx); - - LOG << " Running refinement on " << p_graph.k() << " blocks"; - partitioning::refine(_refiner.get(), p_graph, _current_p_ctx); - - if (_print_metrics) { - SCOPED_TIMER("Partition metrics"); - LOG << " Cut: " << metrics::edge_cut(p_graph); - LOG << " Imbalance: " << metrics::imbalance(p_graph); - LOG << " Feasible: " << metrics::is_feasible(p_graph, _current_p_ctx); - } - - // ... and dump it after refinement. - debug::dump_partition_hierarchy(p_graph, _coarsener->level(), "post-refinement", _input_ctx); -} - -void DeepMultilevelPartitioner::extend_partition(PartitionedGraph &p_graph, const BlockID k_prime) { - SCOPED_HEAP_PROFILER("Extending partition"); - LOG << " Extending partition from " << p_graph.k() << " blocks to " << k_prime << " blocks"; - - if (_input_ctx.partitioning.use_lazy_subgraph_memory) { - partitioning::extend_partition_lazy_extraction( - p_graph, - k_prime, - _input_ctx, - _current_p_ctx, - _extraction_mem_pool_ets, - _tmp_extraction_mem_pool_ets, - _bipartitioner_pool, - _input_ctx.parallel.num_threads - ); - } else { - partitioning::extend_partition( - p_graph, - k_prime, - _input_ctx, - _current_p_ctx, - _subgraph_memory, - _tmp_extraction_mem_pool_ets, - _bipartitioner_pool, - _input_ctx.parallel.num_threads - ); - } - - if (_last_initial_partitioning_level == _coarsener->level()) { - SCOPED_TIMER("Deallocation"); - _subgraph_memory.free(); - _extraction_mem_pool_ets.clear(); - _tmp_extraction_mem_pool_ets.clear(); - _bipartitioner_pool.free(); - } - - if (_print_metrics) { - SCOPED_TIMER("Partition metrics"); - LOG << " Cut: " << metrics::edge_cut(p_graph); - LOG << " Imbalance: " << metrics::imbalance(p_graph); - } -} + DBGC(kDebugBlockWeights) << "Initial partition context:"; + DBGC(kDebugBlockWeights) << debug::describe_partition_context(_input_ctx.partition); -PartitionedGraph DeepMultilevelPartitioner::uncoarsen(PartitionedGraph p_graph) { - bool refined = false; - while (!_coarsener->empty()) { - SCOPED_HEAP_PROFILER("Level", std::to_string(_coarsener->level() - 1)); - - LOG; - LOG << "Uncoarsening -> Level " << (_coarsener->level() - 1); - - p_graph = uncoarsen_once(std::move(p_graph)); - - LOG << " Number of nodes: " << p_graph.n() << " | Number of edges: " << p_graph.m(); - - refine(p_graph); - refined = true; - - const BlockID desired_k = partitioning::compute_k_for_n(p_graph.n(), _input_ctx); - if (p_graph.k() < desired_k) { - extend_partition(p_graph, desired_k); - refined = false; - - if (_input_ctx.partitioning.refine_after_extending_partition) { - refine(p_graph); - refined = true; - } - } - } - - if (!refined || p_graph.k() < _input_ctx.partition.k) { - SCOPED_HEAP_PROFILER("Toplevel"); - - LOG; - LOG << "Toplevel:"; - LOG << " Number of nodes: " << p_graph.n() << " | Number of edges: " << p_graph.m(); - - if (!refined) { - refine(p_graph); - } - if (p_graph.k() < _input_ctx.partition.k) { - extend_partition(p_graph, _input_ctx.partition.k); - refine(p_graph); - } - } - - return p_graph; + return uncoarsen(initial_partition(coarsen())); } const Graph *DeepMultilevelPartitioner::coarsen() { SCOPED_HEAP_PROFILER("Coarsening"); + SCOPED_TIMER("Coarsening"); const Graph *c_graph = &_input_graph; NodeID prev_c_graph_n = c_graph->n(); @@ -198,7 +85,7 @@ const Graph *DeepMultilevelPartitioner::coarsen() { prev_c_graph_total_node_weight = c_graph->total_node_weight(); // Build next coarse graph - shrunk = partitioning::coarsen_once(_coarsener.get(), c_graph, _current_p_ctx); + shrunk = _coarsener->coarsen(); c_graph = &_coarsener->current(); // _subgraph_memory stores the block-induced subgraphs of the partitioned graph during recursive @@ -229,17 +116,18 @@ const Graph *DeepMultilevelPartitioner::coarsen() { } // Print some metrics for the coarse graphs + DBG << "Using inferred epsilon: " + << _current_p_ctx.infer_epsilon(prev_c_graph_total_node_weight); + LOG << "Coarsening -> Level " << _coarsener->level(); LOG << " Number of nodes: " << c_graph->n() << " | Number of edges: " << c_graph->m(); - LLOG << " Maximum node weight: " << c_graph->max_node_weight() << " "; - LLOG << "<= " - << compute_max_cluster_weight( - _input_ctx.coarsening, - _input_ctx.partition, - prev_c_graph_n, - prev_c_graph_total_node_weight - ); - LOG; + LOG << " Maximum node weight: " << c_graph->max_node_weight() << " <= " + << compute_max_cluster_weight( + _input_ctx.coarsening, + _input_ctx.partition, + prev_c_graph_n, + prev_c_graph_total_node_weight + ); LOG; } @@ -248,16 +136,14 @@ const Graph *DeepMultilevelPartitioner::coarsen() { _subgraph_memory_m = _subgraph_memory_m_weights = prev_c_graph_m; } - TIMED_SCOPE("Coarsening") { - _coarsener->release_allocated_memory(); - }; + _coarsener->release_allocated_memory(); if (shrunk) { LOG << "==> Coarsening terminated with less than " << initial_partitioning_threshold() - << " nodes."; + << " nodes"; LOG; } else { - LOG << "==> Coarsening converged."; + LOG << "==> Coarsening converged"; LOG; } @@ -265,10 +151,15 @@ const Graph *DeepMultilevelPartitioner::coarsen() { } NodeID DeepMultilevelPartitioner::initial_partitioning_threshold() { - if (partitioning::parallel_ip_mode(_input_ctx.partitioning.deep_initial_partitioning_mode)) { - return _input_ctx.parallel.num_threads * _input_ctx.coarsening.contraction_limit; // p * C - } else { - return 2 * _input_ctx.coarsening.contraction_limit; // 2 * C + const auto mode = _input_ctx.partitioning.deep_initial_partitioning_mode; + const bool is_parallel_mode = + (mode == InitialPartitioningMode::SYNCHRONOUS_PARALLEL || + mode == InitialPartitioningMode::ASYNCHRONOUS_PARALLEL); + + if (is_parallel_mode) { // Parallel: copy for each thread once n <= p * C + return _input_ctx.parallel.num_threads * _input_ctx.coarsening.contraction_limit; + } else { // Sequential: coarsen until until n <= 2 * C + return 2 * _input_ctx.coarsening.contraction_limit; } } @@ -304,7 +195,7 @@ PartitionedGraph DeepMultilevelPartitioner::initial_partition(const Graph *graph PartitionedGraph p_graph = [&] { switch (_input_ctx.partitioning.deep_initial_partitioning_mode) { case InitialPartitioningMode::SEQUENTIAL: - return partitioning::bipartition(graph, _input_ctx.partition.k, _bipartitioner_pool, true); + return _bipartitioner_pool.bipartition(graph, 0, 1, true); case InitialPartitioningMode::SYNCHRONOUS_PARALLEL: return SyncInitialPartitioner(_input_ctx, _bipartitioner_pool, _tmp_extraction_mem_pool_ets) @@ -318,7 +209,11 @@ PartitionedGraph DeepMultilevelPartitioner::initial_partition(const Graph *graph __builtin_unreachable(); }(); ENABLE_TIMERS(); - partitioning::update_partition_context(_current_p_ctx, p_graph, _input_ctx.partition.k); + + _current_p_ctx = create_kway_context(_input_ctx, p_graph); + + DBGC(kDebugBlockWeights) << "Initial partition context:"; + DBGC(kDebugBlockWeights) << debug::describe_partition_state(p_graph, _current_p_ctx); // Print some metrics for the initial partition. LOG << " Number of blocks: " << p_graph.k(); @@ -336,4 +231,124 @@ PartitionedGraph DeepMultilevelPartitioner::initial_partition(const Graph *graph return p_graph; } + +PartitionedGraph DeepMultilevelPartitioner::uncoarsen(PartitionedGraph p_graph) { + SCOPED_HEAP_PROFILER("Uncoarsening"); + + bool refined = false; + while (!_coarsener->empty()) { + SCOPED_HEAP_PROFILER("Level", std::to_string(_coarsener->level() - 1)); + + LOG; + LOG << "Uncoarsening -> Level " << (_coarsener->level() - 1); + + p_graph = _coarsener->uncoarsen(std::move(p_graph)); + _current_p_ctx = create_kway_context(_input_ctx, p_graph); + + LOG << " Number of nodes: " << p_graph.n() << " | Number of edges: " << p_graph.m(); + + refine(p_graph); + refined = true; + + const BlockID desired_k = partitioning::compute_k_for_n(p_graph.n(), _input_ctx); + if (p_graph.k() < desired_k) { + extend_partition(p_graph, desired_k); + _current_p_ctx = create_kway_context(_input_ctx, p_graph); + refined = false; + + if (_input_ctx.partitioning.refine_after_extending_partition) { + refine(p_graph); + refined = true; + } + } + } + + _current_p_ctx = create_kway_context(_input_ctx, p_graph); + + if (!refined || p_graph.k() < _input_ctx.partition.k) { + SCOPED_HEAP_PROFILER("Toplevel"); + + LOG; + LOG << "Toplevel:"; + LOG << " Number of nodes: " << p_graph.n() << " | Number of edges: " << p_graph.m(); + + if (!refined) { + refine(p_graph); + } + if (p_graph.k() < _input_ctx.partition.k) { + extend_partition(p_graph, _input_ctx.partition.k); + _current_p_ctx = create_kway_context(_input_ctx, p_graph); + refine(p_graph); + } + } + + return p_graph; +} + +void DeepMultilevelPartitioner::refine(PartitionedGraph &p_graph) { + SCOPED_HEAP_PROFILER("Refinement"); + SCOPED_TIMER("Refinement"); + + DBGC(kDebugBlockWeights) << "Partition context for refinement:"; + DBGC(kDebugBlockWeights) << debug::describe_partition_state(p_graph, _current_p_ctx); + + // If requested, dump the current partition to disk before refinement ... + debug::dump_partition_hierarchy(p_graph, _coarsener->level(), "pre-refinement", _input_ctx); + + LOG << " Running refinement on " << p_graph.k() << " blocks"; + _refiner->initialize(p_graph); + _refiner->refine(p_graph, _current_p_ctx); + + if (_print_metrics) { + SCOPED_TIMER("Partition metrics"); + LOG << " Cut: " << metrics::edge_cut(p_graph); + LOG << " Imbalance: " << metrics::imbalance(p_graph); + LOG << " Feasible: " << metrics::is_feasible(p_graph, _current_p_ctx); + } + + // ... and dump it after refinement. + debug::dump_partition_hierarchy(p_graph, _coarsener->level(), "post-refinement", _input_ctx); +} + +void DeepMultilevelPartitioner::extend_partition(PartitionedGraph &p_graph, const BlockID k_prime) { + SCOPED_HEAP_PROFILER("Extending partition"); + LOG << " Extending partition from " << p_graph.k() << " blocks to " << k_prime << " blocks"; + + if (_input_ctx.partitioning.use_lazy_subgraph_memory) { + partitioning::extend_partition_lazy_extraction( + p_graph, + k_prime, + _input_ctx, + _extraction_mem_pool_ets, + _tmp_extraction_mem_pool_ets, + _bipartitioner_pool, + _input_ctx.parallel.num_threads + ); + } else { + partitioning::extend_partition( + p_graph, + k_prime, + _input_ctx, + _subgraph_memory, + _tmp_extraction_mem_pool_ets, + _bipartitioner_pool, + _input_ctx.parallel.num_threads + ); + } + + if (_last_initial_partitioning_level == _coarsener->level()) { + SCOPED_TIMER("Deallocation"); + _subgraph_memory.free(); + _extraction_mem_pool_ets.clear(); + _tmp_extraction_mem_pool_ets.clear(); + _bipartitioner_pool.free(); + } + + if (_print_metrics) { + SCOPED_TIMER("Partition metrics"); + LOG << " Cut: " << metrics::edge_cut(p_graph); + LOG << " Imbalance: " << metrics::imbalance(p_graph); + } +} + } // namespace kaminpar::shm diff --git a/kaminpar-shm/partitioning/deep/deep_multilevel.h b/kaminpar-shm/partitioning/deep/deep_multilevel.h index 9e0bb8b5..bf1d5a24 100644 --- a/kaminpar-shm/partitioning/deep/deep_multilevel.h +++ b/kaminpar-shm/partitioning/deep/deep_multilevel.h @@ -19,6 +19,7 @@ #include "kaminpar-shm/refinement/refiner.h" namespace kaminpar::shm { + class DeepMultilevelPartitioner : public Partitioner { public: DeepMultilevelPartitioner(const Graph &input_graph, const Context &input_ctx); @@ -32,36 +33,38 @@ class DeepMultilevelPartitioner : public Partitioner { PartitionedGraph partition() final; private: - PartitionedGraph uncoarsen(PartitionedGraph p_graph); - - inline PartitionedGraph uncoarsen_once(PartitionedGraph p_graph); - - void refine(PartitionedGraph &p_graph); - - inline void extend_partition(PartitionedGraph &p_graph, BlockID k_prime); - const Graph *coarsen(); NodeID initial_partitioning_threshold(); PartitionedGraph initial_partition(const Graph *graph); + PartitionedGraph uncoarsen(PartitionedGraph p_graph); + + void refine(PartitionedGraph &p_graph); + + inline void extend_partition(PartitionedGraph &p_graph, BlockID k_prime); + void print_statistics(); const Graph &_input_graph; const Context &_input_ctx; + PartitionContext _current_p_ctx; - std::unique_ptr _coarsener; - std::unique_ptr _refiner; + std::unique_ptr _coarsener = nullptr; + std::unique_ptr _refiner = nullptr; - std::size_t _last_initial_partitioning_level; - NodeID _subgraph_memory_n, _subgraph_memory_n_weights; - EdgeID _subgraph_memory_m, _subgraph_memory_m_weights; + std::size_t _last_initial_partitioning_level = 0; + NodeID _subgraph_memory_n = 0; + NodeID _subgraph_memory_n_weights = 0; + EdgeID _subgraph_memory_m = 0; + EdgeID _subgraph_memory_m_weights = 0; graph::SubgraphMemory _subgraph_memory; partitioning::SubgraphMemoryEts _extraction_mem_pool_ets; partitioning::TemporarySubgraphMemoryEts _tmp_extraction_mem_pool_ets; InitialBipartitionerWorkerPool _bipartitioner_pool; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/partitioning/deep/sync_initial_partitioning.cc b/kaminpar-shm/partitioning/deep/sync_initial_partitioning.cc index e582890e..536f460c 100644 --- a/kaminpar-shm/partitioning/deep/sync_initial_partitioning.cc +++ b/kaminpar-shm/partitioning/deep/sync_initial_partitioning.cc @@ -14,13 +14,17 @@ #include #include +#include "kaminpar-shm/coarsening/coarsener.h" #include "kaminpar-shm/factories.h" #include "kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h" #include "kaminpar-shm/partitioning/partition_utils.h" namespace kaminpar::shm::partitioning { + namespace { + SET_DEBUG(false); + } SyncInitialPartitioner::SyncInitialPartitioner( @@ -53,8 +57,6 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon compute_num_copies(_input_ctx, n, converged, num_current_threads); num_local_copies_record.push_back(num_local_copies); - DBG << V(num_current_copies) << V(num_threads) << V(num_current_threads) << V(num_local_copies); - // Create coarseners and partition contexts for next coarsening iteration coarseners.emplace_back(num_current_copies * num_local_copies); auto &next_coarseners = coarseners.back(); @@ -77,8 +79,7 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon // Perform coarsening iteration, converge if all coarseners converged converged = true; tbb::parallel_for(static_cast(0), num_current_copies, [&](const std::size_t i) { - const bool shrunk = - coarsen_once(next_coarseners[i].get(), &next_coarseners[i]->current(), current_p_ctxs[i]); + const bool shrunk = next_coarseners[i]->coarsen(); if (shrunk) { converged = false; } @@ -90,7 +91,7 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon tbb::parallel_for(static_cast(0), num_threads, [&](const std::size_t i) { auto ¤t_coarseners = coarseners.back(); const Graph *graph = ¤t_coarseners[i]->current(); - current_p_graphs[i] = bipartition(graph, _input_ctx.partition.k, _bipartitioner_pool, true); + current_p_graphs[i] = _bipartitioner_pool.bipartition(graph, 0, 1, true); }); // Uncoarsen and join graphs @@ -100,12 +101,13 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon auto ¤t_coarseners = coarseners.back(); - // uncoarsen and refine + // Uncoarsen and refine tbb::parallel_for(static_cast(0), num_current_copies, [&](const std::size_t i) { auto &p_graph = current_p_graphs[i]; auto &coarsener = current_coarseners[i]; auto &p_ctx = current_p_ctxs[i]; - p_graph = uncoarsen_once(coarsener.get(), std::move(p_graph), p_ctx, _input_ctx.partition); + p_graph = coarsener->uncoarsen(std::move(p_graph)); + p_ctx = create_kway_context(_input_ctx, p_graph); // The Context object is used to pre-allocate memory for the finest graph of the input // hierarchy Since this refiner is never used for the finest graph, we need to adjust the @@ -113,8 +115,10 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon Context small_ctx = _input_ctx; small_ctx.partition.n = p_graph.n(); small_ctx.partition.m = p_graph.m(); + auto refiner = factory::create_refiner(small_ctx); - refine(refiner.get(), p_graph, p_ctx); + refiner->initialize(p_graph); + refiner->refine(p_graph, p_ctx); // extend partition const BlockID k_prime = compute_k_for_n(p_graph.n(), _input_ctx); @@ -123,11 +127,11 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon p_graph, k_prime, _input_ctx, - p_ctx, _tmp_extraction_mem_pool_ets, _bipartitioner_pool, num_threads ); + p_ctx = create_kway_context(_input_ctx, p_graph); } }); @@ -168,4 +172,5 @@ std::unique_ptr SyncInitialPartitioner::duplicate_coarsener(const Coa duplication->initialize(&coarsener->current()); return duplication; } + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/deep/sync_initial_partitioning.h b/kaminpar-shm/partitioning/deep/sync_initial_partitioning.h index ba1b36fe..6275f78c 100644 --- a/kaminpar-shm/partitioning/deep/sync_initial_partitioning.h +++ b/kaminpar-shm/partitioning/deep/sync_initial_partitioning.h @@ -8,10 +8,12 @@ ******************************************************************************/ #pragma once +#include "kaminpar-shm/coarsening/coarsener.h" #include "kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h" #include "kaminpar-shm/partitioning/helper.h" namespace kaminpar::shm::partitioning { + class SyncInitialPartitioner { public: SyncInitialPartitioner( @@ -29,4 +31,5 @@ class SyncInitialPartitioner { InitialBipartitionerWorkerPool &_bipartitioner_pool; TemporarySubgraphMemoryEts &_tmp_extraction_mem_pool_ets; }; + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/helper.cc b/kaminpar-shm/partitioning/helper.cc index 201087ad..c09a091e 100644 --- a/kaminpar-shm/partitioning/helper.cc +++ b/kaminpar-shm/partitioning/helper.cc @@ -14,112 +14,68 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm::partitioning { + namespace { + SET_DEBUG(false); SET_STATISTICS_FROM_GLOBAL(); -} // namespace - -void update_partition_context( - PartitionContext ¤t_p_ctx, const PartitionedGraph &p_graph, const BlockID input_k -) { - current_p_ctx.setup(p_graph.graph()); - current_p_ctx.k = p_graph.k(); - current_p_ctx.block_weights.setup(current_p_ctx, input_k); -} - -PartitionedGraph uncoarsen_once( - Coarsener *coarsener, - PartitionedGraph p_graph, - PartitionContext ¤t_p_ctx, - const PartitionContext &input_p_ctx -) { - SCOPED_HEAP_PROFILER("Uncoarsen"); - SCOPED_TIMER("Uncoarsening"); - - if (!coarsener->empty()) { - p_graph = coarsener->uncoarsen(std::move(p_graph)); - update_partition_context(current_p_ctx, p_graph, input_p_ctx.k); - } - - return p_graph; -} -void refine(Refiner *refiner, PartitionedGraph &p_graph, const PartitionContext ¤t_p_ctx) { - SCOPED_TIMER("Refinement"); - refiner->initialize(p_graph); - refiner->refine(p_graph, current_p_ctx); -} +} // namespace -PartitionedGraph bipartition( - const Graph *graph, - const BlockID final_k, - InitialBipartitionerWorkerPool &initial_bipartitioner_pool, - const bool partition_lifespan, - BipartitionTimingInfo *timings -) { - timer::LocalTimer timer; - - const CSRGraph *csr = dynamic_cast(graph->underlying_graph()); - - // If we work with something other than a CSRGraph, construct a CSR copy to call the initial - // partitioning code - // This should only be necessary if the graph is too small for coarsening *and* we are using the - // compressed mode - std::unique_ptr csr_cpy; - if (csr == nullptr) { - DBG << "Bipartitioning a non-CSR graph is not supported by the initial partitioning code: " - "constructing a CSR-graph copy of the given graph with n=" - << graph->n() << ", m=" << graph->m(); - DBG << "Note: this should only happen when partitioning a very small graph using the " - "compressed mode"; - - csr_cpy = std::make_unique(*graph); - csr = csr_cpy.get(); +PartitionContext create_kway_context(const Context &input_ctx, const PartitionedGraph &p_graph) { + //if (p_graph.k() == input_ctx.partition.k && p_graph.n() == input_ctx.partition.n) { + // return input_ctx.partition; + //} + + const BlockID input_k = input_ctx.partition.k; + const BlockID current_k = p_graph.k(); + + std::vector max_block_weights(p_graph.k()); + BlockID cur_fine_block = 0; + for (const BlockID coarse_block : p_graph.blocks()) { + const BlockID num = compute_final_k(coarse_block, current_k, input_k); + const BlockID begin = cur_fine_block; + const BlockID end = cur_fine_block + num; + cur_fine_block += num; + + max_block_weights[coarse_block] = + input_ctx.partition.total_unrelaxed_max_block_weights(begin, end); + // LOG << "Block " << coarse_block << ": max weight " << max_block_weights[coarse_block] + // << " with inferred_eps()=" << input_ctx.partition.inferred_epsilon(); + + // if (p_graph.k() != input_ctx.partition.k) { // @todo + // max_block_weights[coarse_block] += end - begin; + //} } - timer.reset(); - auto bipartition = [&] { - if (graph->n() == 0) { - return StaticArray{}; - } - - InitialMultilevelBipartitioner bipartitioner = initial_bipartitioner_pool.get(); - bipartitioner.initialize(*csr, final_k); - auto bipartition = - bipartitioner.partition(timings ? &(timings->ip_timings) : nullptr).take_raw_partition(); + const bool is_toplevel_ctx = (p_graph.n() == input_ctx.partition.n); + const bool relax_max_block_weights = !is_toplevel_ctx; - if (partition_lifespan) { - StaticArray owned_bipartition(bipartition.size(), static_array::noinit); - std::copy(bipartition.begin(), bipartition.end(), owned_bipartition.begin()); + PartitionContext new_p_ctx; + new_p_ctx.setup(p_graph.graph(), std::move(max_block_weights), relax_max_block_weights); - initial_bipartitioner_pool.put(std::move(bipartitioner)); - - return owned_bipartition; - } else { - initial_bipartitioner_pool.put(std::move(bipartitioner)); - return bipartition; - } - }(); - - if (timings != nullptr) { - timings->bipartitioner_ms += timer.elapsed(); + // @todo + if (input_ctx.partition.has_epsilon()) { + new_p_ctx.set_epsilon(input_ctx.partition.epsilon()); } - timer.reset(); - PartitionedGraph p_graph(PartitionedGraph::seq{}, *graph, 2, std::move(bipartition)); - if (timings != nullptr) { - timings->graph_init_ms += timer.elapsed(); - } + // for (const BlockID coarse_block : p_graph.blocks()) { + // LOG << "Block " << coarse_block << ": max weight " << + // new_p_ctx.max_block_weight(coarse_block) + // << ", perfectly balanced weight: " + // << new_p_ctx.perfectly_balanced_block_weight(coarse_block); + // } - return p_graph; + return new_p_ctx; } void extend_partition_recursive( const Graph &graph, StaticArray &partition, - const BlockID b0, - const BlockID k, - const BlockID final_k, + const BlockID current_rel_block, + const BlockID current_abs_block, + const BlockID num_subblocks, + const BlockID current_k, const Context &input_ctx, const graph::SubgraphMemoryStartPosition position, graph::SubgraphMemory &subgraph_memory, @@ -127,52 +83,45 @@ void extend_partition_recursive( InitialBipartitionerWorkerPool &bipartitioner_pool, BipartitionTimingInfo *timings = nullptr ) { - KASSERT(k > 1u); - - PartitionedGraph p_graph = bipartition(&graph, final_k, bipartitioner_pool, false, timings); + KASSERT(num_subblocks > 1u); - timer::LocalTimer timer; + PartitionedGraph p_graph = + bipartitioner_pool.bipartition(&graph, current_abs_block, current_k, false); - timer.reset(); - std::array final_ks{0, 0}; std::array ks{0, 0}; - std::tie(final_ks[0], final_ks[1]) = math::split_integral(final_k); - std::tie(ks[0], ks[1]) = math::split_integral(k); - std::array b{b0, b0 + ks[0]}; - if (timings != nullptr) - timings->misc_ms += timer.elapsed(); - - DBG << "bipartitioning graph with weight " << graph.total_node_weight() << " = " - << p_graph.block_weight(0) << " + " << p_graph.block_weight(1) << " for final k " << final_k - << " = " << final_ks[0] << " + " << final_ks[1] << ", for total of " << k << " = " << ks[0] - << " + " << ks[1] << " blocks"; - - KASSERT(ks[0] >= 1u); - KASSERT(ks[1] >= 1u); - KASSERT(final_ks[0] >= ks[0]); - KASSERT(final_ks[1] >= ks[1]); - KASSERT(b[0] < input_ctx.partition.k); - KASSERT(b[1] < input_ctx.partition.k); - - // Copy p_graph to partition -> replace b0 with b0 or b1 - { - timer.reset(); + std::tie(ks[0], ks[1]) = math::split_integral(num_subblocks); + std::array rel_b{current_rel_block, current_rel_block + ks[0]}; + + // @todo should be correct, but needs clean ups + std::array abs_b; + if (2 * current_k >= input_ctx.partition.k) { + abs_b = { + compute_first_sub_block(current_abs_block, current_k, input_ctx.partition.k), + compute_first_sub_block(current_abs_block, current_k, input_ctx.partition.k) + 1 + }; + } else { + abs_b = {2 * current_abs_block, 2 * current_abs_block + 1}; + } + + DBG << "[k=" << current_k << "] Apply partition of block abs/" << current_abs_block << "-rel/" + << current_rel_block << " into blocks abs/" << abs_b[0] << "-rel/" << rel_b[0] << " and abs/" + << abs_b[1] << "-rel/" << rel_b[1] << ", num sub-blocks: " << num_subblocks; + + { // Copy p_graph to partition NodeID node = 0; for (BlockID &block : partition) { - block = (block == b0) ? b[p_graph.block(node++)] : block; + block = (block == current_rel_block) ? rel_b[p_graph.block(node++)] : block; } - KASSERT(node == p_graph.n()); - if (timings != nullptr) - timings->copy_ms += timer.elapsed(); } - if (k > 2) { - timer.reset(); + const BlockID final_k = compute_final_k(current_abs_block, current_k, input_ctx.partition.k); + std::array final_ks{0, 0}; + std::tie(final_ks[0], final_ks[1]) = math::split_integral(final_k); + + if (num_subblocks > 2) { auto [subgraphs, positions] = extract_subgraphs_sequential( p_graph, final_ks, position, subgraph_memory, tmp_extraction_mem_pool ); - if (timings != nullptr) - timings->extract_ms += timer.elapsed(); for (const std::size_t i : {0, 1}) { if (ks[i] <= 1) { @@ -182,9 +131,10 @@ void extend_partition_recursive( extend_partition_recursive( subgraphs[i], partition, - b[i], + rel_b[i], + abs_b[i], ks[i], - final_ks[i], + partitioning::compute_next_k(current_k, input_ctx), input_ctx, positions[i], subgraph_memory, @@ -198,13 +148,12 @@ void extend_partition_recursive( void extend_partition_lazy_extraction( PartitionedGraph &p_graph, // stores current k - const BlockID k_prime, // extend to this many blocks + const BlockID desired_k, // extend to this many blocks const Context &input_ctx, // stores input k - PartitionContext ¤t_p_ctx, SubgraphMemoryEts &extraction_mem_pool_ets, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + const int num_active_threads ) { if (input_ctx.partitioning.min_consecutive_seq_bipartitioning_levels > 0) { // Depending on the coarsening level and the deep multilevel implementation, it can occur that @@ -217,12 +166,12 @@ void extend_partition_lazy_extraction( // more parallel compute resources. // @todo change async_initial_partitioning.{cc, h} to make this obsolete ... const int factor = 2 << (input_ctx.partitioning.min_consecutive_seq_bipartitioning_levels - 1); - while (k_prime > factor * p_graph.k() && num_active_threads > p_graph.k()) { + while (desired_k > factor * p_graph.k() && + static_cast(num_active_threads) > p_graph.k()) { extend_partition_lazy_extraction( p_graph, factor * p_graph.k(), input_ctx, - current_p_ctx, extraction_mem_pool_ets, tmp_extraction_mem_pool_ets, bipartitioner_pool, @@ -257,7 +206,7 @@ void extend_partition_lazy_extraction( tbb::parallel_for(0, k, [&](const BlockID b) { const BlockID final_kb = compute_final_k(b, k, input_ctx.partition.k); - const BlockID subgraph_k = (k_prime == input_ctx.partition.k) ? final_kb : k_prime / k; + const BlockID subgraph_k = (desired_k == input_ctx.partition.k) ? final_kb : desired_k / k; if (subgraph_k <= 1) { return; } @@ -303,8 +252,9 @@ void extend_partition_lazy_extraction( subgraph, subgraph_partitions[b], 0, + b, subgraph_k, - final_kb, + p_graph.k(), input_ctx, {.nodes_start_pos = 0, .edges_start_pos = 0}, subgraph_memory, @@ -313,54 +263,26 @@ void extend_partition_lazy_extraction( &timing ); }); - - if constexpr (kDebug) { - const auto timings = dbg_timings_ets.combine([](auto &a, const auto &b) { return a += b; }); - const auto to_ms = [](const auto ns) { - return static_cast(ns / 1e6); - }; - - LOG << "bipartitioner_init_ms: " << to_ms(timings.bipartitioner_init_ms); - LOG << "bipartitioner_ms: " << to_ms(timings.bipartitioner_ms); - LOG << " total_ms: " << to_ms(timings.ip_timings.total_ms); - LOG << " misc_ms: " << to_ms(timings.ip_timings.misc_ms); - LOG << " coarsening_ms: " << to_ms(timings.ip_timings.coarsening_ms); - LOG << " misc_ms: " << to_ms(timings.ip_timings.coarsening_misc_ms); - LOG << " call_ms: " << to_ms(timings.ip_timings.coarsening_call_ms); - LOG << " alloc_ms: " << to_ms(timings.ip_timings.coarsening.alloc_ms); - LOG << " contract_ms: " << to_ms(timings.ip_timings.coarsening.contract_ms); - LOG << " lp_ms: " << to_ms(timings.ip_timings.coarsening.lp_ms); - LOG << " interleaved1: " << to_ms(timings.ip_timings.coarsening.interleaved1_ms); - LOG << " interleaved2: " << to_ms(timings.ip_timings.coarsening.interleaved2_ms); - LOG << " bipartitioning_ms: " << to_ms(timings.ip_timings.bipartitioning_ms); - LOG << " uncoarsening_ms: " << to_ms(timings.ip_timings.uncoarsening_ms); - LOG << "graph_init_ms: " << to_ms(timings.graph_init_ms); - LOG << "extract_ms: " << to_ms(timings.extract_ms); - LOG << "copy_ms: " << to_ms(timings.copy_ms); - LOG << "misc_ms: " << to_ms(timings.misc_ms); - } }; TIMED_SCOPE("Copy subgraph partitions") { SCOPED_HEAP_PROFILER("Copy subgraph partitions"); p_graph = graph::copy_subgraph_partitions( - std::move(p_graph), subgraph_partitions, k_prime, input_ctx.partition.k, mapping + std::move(p_graph), subgraph_partitions, desired_k, input_ctx.partition.k, mapping ); }; - update_partition_context(current_p_ctx, p_graph, input_ctx.partition.k); - KASSERT(p_graph.k() == k_prime); + KASSERT(p_graph.k() == desired_k); } void extend_partition( PartitionedGraph &p_graph, // stores current k - const BlockID k_prime, // extend to this many blocks + const BlockID desired_k, // extend to this many blocks const Context &input_ctx, // stores input k - PartitionContext ¤t_p_ctx, graph::SubgraphMemory &subgraph_memory, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + const int num_active_threads ) { if (input_ctx.partitioning.min_consecutive_seq_bipartitioning_levels > 0) { // Depending on the coarsening level and the deep multilevel implementation, it can occur that @@ -373,12 +295,12 @@ void extend_partition( // more parallel compute resources. // @todo change async_initial_partitioning.{cc, h} to make this obsolete ... const int factor = 2 << (input_ctx.partitioning.min_consecutive_seq_bipartitioning_levels - 1); - while (k_prime > factor * p_graph.k() && num_active_threads > p_graph.k()) { + while (desired_k > factor * p_graph.k() && + static_cast(num_active_threads) > p_graph.k()) { extend_partition( p_graph, factor * p_graph.k(), input_ctx, - current_p_ctx, subgraph_memory, tmp_extraction_mem_pool_ets, bipartitioner_pool, @@ -416,7 +338,7 @@ void extend_partition( const BlockID final_kb = compute_final_k(b, p_graph.k(), input_ctx.partition.k); const BlockID subgraph_k = - (k_prime == input_ctx.partition.k) ? final_kb : k_prime / p_graph.k(); + (desired_k == input_ctx.partition.k) ? final_kb : desired_k / p_graph.k(); if (subgraph_k > 1) { DBG << "initial extend_partition_recursive() for block " << b << ", final k " << final_kb @@ -427,8 +349,9 @@ void extend_partition( subgraph, subgraph_partitions[b], 0, + b, subgraph_k, - final_kb, + p_graph.k(), input_ctx, positions[b], subgraph_memory, @@ -443,49 +366,21 @@ void extend_partition( TIMED_SCOPE("Copy subgraph partitions") { SCOPED_HEAP_PROFILER("Copy subgraph partitions"); p_graph = graph::copy_subgraph_partitions( - std::move(p_graph), subgraph_partitions, k_prime, input_ctx.partition.k, mapping + std::move(p_graph), subgraph_partitions, desired_k, input_ctx.partition.k, mapping ); }; - if constexpr (kDebug) { - const auto timings = timings_ets.combine([](auto &a, const auto &b) { return a += b; }); - const auto to_ms = [](const auto ns) { - return static_cast(ns / 1e6); - }; - - LOG << "bipartitioner_init_ms: " << to_ms(timings.bipartitioner_init_ms); - LOG << "bipartitioner_ms: " << to_ms(timings.bipartitioner_ms); - LOG << " total_ms: " << to_ms(timings.ip_timings.total_ms); - LOG << " misc_ms: " << to_ms(timings.ip_timings.misc_ms); - LOG << " coarsening_ms: " << to_ms(timings.ip_timings.coarsening_ms); - LOG << " misc_ms: " << to_ms(timings.ip_timings.coarsening_misc_ms); - LOG << " call_ms: " << to_ms(timings.ip_timings.coarsening_call_ms); - LOG << " alloc_ms: " << to_ms(timings.ip_timings.coarsening.alloc_ms); - LOG << " contract_ms: " << to_ms(timings.ip_timings.coarsening.contract_ms); - LOG << " lp_ms: " << to_ms(timings.ip_timings.coarsening.lp_ms); - LOG << " interleaved1: " << to_ms(timings.ip_timings.coarsening.interleaved1_ms); - LOG << " interleaved2: " << to_ms(timings.ip_timings.coarsening.interleaved2_ms); - LOG << " bipartitioning_ms: " << to_ms(timings.ip_timings.bipartitioning_ms); - LOG << " uncoarsening_ms: " << to_ms(timings.ip_timings.uncoarsening_ms); - LOG << "graph_init_ms: " << to_ms(timings.graph_init_ms); - LOG << "extract_ms: " << to_ms(timings.extract_ms); - LOG << "copy_ms: " << to_ms(timings.copy_ms); - LOG << "misc_ms: " << to_ms(timings.misc_ms); - } - - update_partition_context(current_p_ctx, p_graph, input_ctx.partition.k); - KASSERT(p_graph.k() == k_prime); + KASSERT(p_graph.k() == desired_k); } // extend_partition with local memory allocation for subgraphs void extend_partition( PartitionedGraph &p_graph, - const BlockID k_prime, + const BlockID desired_k, const Context &input_ctx, - PartitionContext ¤t_p_ctx, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + const int num_active_threads ) { graph::SubgraphMemory memory; @@ -499,9 +394,8 @@ void extend_partition( extend_partition( p_graph, - k_prime, + desired_k, input_ctx, - current_p_ctx, memory, tmp_extraction_mem_pool_ets, bipartitioner_pool, @@ -509,23 +403,4 @@ void extend_partition( ); } -bool coarsen_once( - Coarsener *coarsener, [[maybe_unused]] const Graph *graph, PartitionContext ¤t_p_ctx -) { - SCOPED_TIMER("Coarsening"); - - const auto shrunk = coarsener->coarsen(); - const auto &c_graph = coarsener->current(); - - // @todo always do this? - if (shrunk) { - current_p_ctx.setup(c_graph); - } - - return shrunk; -} -std::size_t -select_best(const ScalableVector &p_graphs, const PartitionContext &p_ctx) { - return select_best(p_graphs.begin(), p_graphs.end(), p_ctx); -} } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/helper.h b/kaminpar-shm/partitioning/helper.h index c5934980..e1391082 100644 --- a/kaminpar-shm/partitioning/helper.h +++ b/kaminpar-shm/partitioning/helper.h @@ -7,101 +7,74 @@ ******************************************************************************/ #pragma once -#include +#include -#include "kaminpar-shm/coarsening/coarsener.h" -#include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/graphutils/subgraph_extractor.h" #include "kaminpar-shm/initial_partitioning/initial_bipartitioner_worker_pool.h" #include "kaminpar-shm/kaminpar.h" #include "kaminpar-shm/metrics.h" -#include "kaminpar-shm/refinement/refiner.h" #include "kaminpar-common/assert.h" namespace kaminpar::shm::partitioning { -using SubgraphMemoryEts = tbb::enumerable_thread_specific; -using TemporarySubgraphMemoryEts = tbb::enumerable_thread_specific; - -void update_partition_context( - PartitionContext &p_ctx, const PartitionedGraph &p_graph, BlockID input_k -); -PartitionedGraph uncoarsen_once( - Coarsener *coarsener, - PartitionedGraph p_graph, - PartitionContext ¤t_p_ctx, - const PartitionContext &input_p_ctx -); - -struct BipartitionTimingInfo { - std::uint64_t bipartitioner_init_ms = 0; - std::uint64_t bipartitioner_ms = 0; - std::uint64_t graph_init_ms = 0; - std::uint64_t extract_ms = 0; - std::uint64_t copy_ms = 0; - std::uint64_t misc_ms = 0; - InitialPartitionerTimings ip_timings{}; - - BipartitionTimingInfo &operator+=(const BipartitionTimingInfo &other) { - bipartitioner_init_ms += other.bipartitioner_init_ms; - bipartitioner_ms += other.bipartitioner_ms; - graph_init_ms += other.graph_init_ms; - extract_ms += other.extract_ms; - copy_ms += other.copy_ms; - misc_ms += other.misc_ms; - ip_timings += other.ip_timings; - return *this; - } -}; - -PartitionedGraph bipartition( - const Graph *graph, - BlockID final_k, - InitialBipartitionerWorkerPool &bipartitioner_pool_ets, - bool partition_lifespan, - BipartitionTimingInfo *timing_info = nullptr -); +PartitionContext create_kway_context(const Context &input_ctx, const PartitionedGraph &p_graph); -void refine(Refiner *refiner, PartitionedGraph &p_graph, const PartitionContext ¤t_p_ctx); +using SubgraphMemoryEts = tbb::enumerable_thread_specific; +using TemporarySubgraphMemoryEts = tbb::enumerable_thread_specific; +/** + * Peforms recursive bipartitioning on the blocks of `p_graph` to obtain a partition with + * `desired_k` blocks. + * + * In contrast to the non-lazy version, this function does not extract all block-induced subgraphs + * of `p_graph` in advance. Instead, it extracts the blocks one-by-one and immediately partitions + * them. + * + * @param p_graph The partitioned graph of which the blocks will be recursively bipartitioned. + * @param desired_k The number of blocks in the final partition. + * @param input_ctx The input context, used to compute max block weights. + * @param extraction_mem_pool_ets ... + * @param tmp_extraction_mem_pool_ets ... + * @param bipartitioner_pool The worker pool used to compute the bipartitions. + * @param num_active_threads The number of currently active threads (in this replication branch of + * deep multilevel). + */ void extend_partition_lazy_extraction( PartitionedGraph &p_graph, - BlockID k_prime, + BlockID desired_k, const Context &input_ctx, - PartitionContext ¤t_p_ctx, SubgraphMemoryEts &extraction_mem_pool_ets, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + int num_active_threads ); +/** + * @deprecated Use `extend_partition_lazy_extraction` instead. + */ void extend_partition( PartitionedGraph &p_graph, - BlockID k_prime, + BlockID desired_k, const Context &input_ctx, - PartitionContext ¤t_p_ctx, graph::SubgraphMemory &subgraph_memory, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + int num_active_threads ); +/** + * @deprecated Use `extend_partition_lazy_extraction` instead. + */ void extend_partition( PartitionedGraph &p_graph, - BlockID k_prime, + BlockID desired_k, const Context &input_ctx, - PartitionContext ¤t_p_ctx, TemporarySubgraphMemoryEts &tmp_extraction_mem_pool_ets, InitialBipartitionerWorkerPool &bipartitioner_pool, - std::size_t num_active_threads + int num_active_threads ); -bool coarsen_once(Coarsener *coarsener, const Graph *graph, PartitionContext ¤t_p_ctx); - -std::size_t -select_best(const ScalableVector &p_graphs, const PartitionContext &p_ctx); - template std::size_t select_best( const Iterator p_graphs_begin, const Iterator p_graphs_end, const PartitionContext &p_ctx @@ -131,8 +104,9 @@ std::size_t select_best( return best_index; } -inline bool parallel_ip_mode(const InitialPartitioningMode &mode) { - return mode == InitialPartitioningMode::ASYNCHRONOUS_PARALLEL || - mode == InitialPartitioningMode::SYNCHRONOUS_PARALLEL; +inline std::size_t +select_best(const ScalableVector &p_graphs, const PartitionContext &p_ctx) { + return select_best(p_graphs.begin(), p_graphs.end(), p_ctx); } + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/kway/kway_multilevel.cc b/kaminpar-shm/partitioning/kway/kway_multilevel.cc index 0b3710f8..d3cd0657 100644 --- a/kaminpar-shm/partitioning/kway/kway_multilevel.cc +++ b/kaminpar-shm/partitioning/kway/kway_multilevel.cc @@ -17,9 +17,12 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { + namespace { + SET_DEBUG(false); SET_STATISTICS_FROM_GLOBAL(); + } // namespace KWayMultilevelPartitioner::KWayMultilevelPartitioner( @@ -45,7 +48,8 @@ void KWayMultilevelPartitioner::refine(PartitionedGraph &p_graph) { // If requested, dump the current partition to disk before refinement ... debug::dump_partition_hierarchy(p_graph, _coarsener->level(), "pre-refinement", _input_ctx); - partitioning::refine(_refiner.get(), p_graph, _current_p_ctx); + _refiner->initialize(p_graph); + _refiner->refine(p_graph, _current_p_ctx); if (_print_metrics) { SCOPED_TIMER("Partition metrics"); LOG << " Cut: " << metrics::edge_cut(p_graph); @@ -66,9 +70,7 @@ PartitionedGraph KWayMultilevelPartitioner::uncoarsen(PartitionedGraph p_graph) LOG; LOG << "Uncoarsening -> Level " << _coarsener->level(); - p_graph = partitioning::uncoarsen_once( - _coarsener.get(), std::move(p_graph), _current_p_ctx, _input_ctx.partition - ); + p_graph = _coarsener->uncoarsen(std::move(p_graph)); refine(p_graph); } @@ -89,7 +91,7 @@ const Graph *KWayMultilevelPartitioner::coarsen() { debug::dump_graph_hierarchy(*c_graph, _coarsener->level(), _input_ctx); // Build next coarse graph - shrunk = partitioning::coarsen_once(_coarsener.get(), c_graph, _current_p_ctx); + shrunk = _coarsener->coarsen(); c_graph = &_coarsener->current(); // Print some metrics for the coarse graphs @@ -139,9 +141,7 @@ PartitionedGraph KWayMultilevelPartitioner::initial_partition(const Graph *graph // Since timers are not multi-threaded, we disable them during (parallel) // initial partitioning. DISABLE_TIMERS(); - PartitionedGraph p_graph = - partitioning::bipartition(graph, _input_ctx.partition.k, _bipartitioner_pool, true); - partitioning::update_partition_context(_current_p_ctx, p_graph, _input_ctx.partition.k); + PartitionedGraph p_graph = _bipartitioner_pool.bipartition(graph, 0, 1, true); graph::SubgraphMemory subgraph_memory(p_graph.n(), _input_ctx.partition.k, p_graph.m()); partitioning::TemporarySubgraphMemoryEts ip_extraction_pool_ets; @@ -150,14 +150,12 @@ PartitionedGraph KWayMultilevelPartitioner::initial_partition(const Graph *graph p_graph, _input_ctx.partition.k, _input_ctx, - _current_p_ctx, subgraph_memory, ip_extraction_pool_ets, _bipartitioner_pool, _input_ctx.parallel.num_threads ); - partitioning::update_partition_context(_current_p_ctx, p_graph, _input_ctx.partition.k); ENABLE_TIMERS(); // Print some metrics for the initial partition. @@ -175,4 +173,5 @@ PartitionedGraph KWayMultilevelPartitioner::initial_partition(const Graph *graph return p_graph; } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/partitioning/kway/kway_multilevel.h b/kaminpar-shm/partitioning/kway/kway_multilevel.h index 27fa4825..ad31e88d 100644 --- a/kaminpar-shm/partitioning/kway/kway_multilevel.h +++ b/kaminpar-shm/partitioning/kway/kway_multilevel.h @@ -15,6 +15,7 @@ #include "kaminpar-shm/refinement/refiner.h" namespace kaminpar::shm { + class KWayMultilevelPartitioner : public Partitioner { public: KWayMultilevelPartitioner(const Graph &input_graph, const Context &input_ctx); @@ -47,4 +48,5 @@ class KWayMultilevelPartitioner : public Partitioner { InitialBipartitionerWorkerPool _bipartitioner_pool; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/partitioning/partition_utils.cc b/kaminpar-shm/partitioning/partition_utils.cc index 558e34e6..3f50d36c 100644 --- a/kaminpar-shm/partitioning/partition_utils.cc +++ b/kaminpar-shm/partitioning/partition_utils.cc @@ -17,36 +17,6 @@ #include "kaminpar-common/math.h" namespace kaminpar::shm::partitioning { -double compute_2way_adaptive_epsilon( - const NodeWeight total_node_weight, const BlockID k, const PartitionContext &p_ctx -) { - KASSERT(p_ctx.k > 0u); - KASSERT(total_node_weight > 0); - - const double base = - (1.0 + p_ctx.epsilon) * k * p_ctx.total_node_weight / p_ctx.k / total_node_weight; - const double exponent = 1.0 / math::ceil_log2(k); - const double epsilon_prime = std::pow(base, exponent) - 1.0; - const double adaptive_epsilon = std::max(epsilon_prime, 0.0001); - - return adaptive_epsilon; -} - -PartitionContext create_bipartition_context( - const AbstractGraph &subgraph, - const BlockID k1, - const BlockID k2, - const PartitionContext &kway_p_ctx, - const bool parallel -) { - PartitionContext twoway_p_ctx; - twoway_p_ctx.k = 2; - twoway_p_ctx.setup(subgraph, false); - twoway_p_ctx.epsilon = - compute_2way_adaptive_epsilon(subgraph.total_node_weight(), k1 + k2, kway_p_ctx); - twoway_p_ctx.block_weights.setup(twoway_p_ctx, k1 + k2, parallel); - return twoway_p_ctx; -} BlockID compute_final_k(const BlockID block, const BlockID current_k, const BlockID input_k) { if (current_k == input_k) { @@ -79,6 +49,46 @@ BlockID compute_final_k(const BlockID block, const BlockID current_k, const Bloc return base + (reversed_block < num_plus_one_blocks); } +// @todo optimize +BlockID +compute_first_sub_block(const BlockID block, const BlockID current_k, const BlockID input_k) { + if (current_k == 1) { + return 0; + } + + int level = math::ceil_log2(current_k); + int mask = 1 << (level - 1); + + BlockID width = 1; + BlockID current_value = input_k; + BlockID ans = 0; + while (width <= current_k) { + width *= 2; + auto [lhs, rhs] = math::split_integral(current_value); + if (block & mask) { + current_value = rhs; + ans += lhs; + } else { + current_value = lhs; + } + mask >>= 1; + } + return ans; + + // BlockID first_sub_block = 0; + // for (BlockID b = 0; b < block; ++b) { + // first_sub_block += compute_final_k(b, current_k, input_k); + //} + // return first_sub_block; +} + +BlockID compute_first_invalid_sub_block( + const BlockID block, const BlockID current_k, const BlockID input_k +) { + return compute_first_sub_block(block, current_k, input_k) + + compute_final_k(block, current_k, input_k); +} + BlockID compute_k_for_n(const NodeID n, const Context &input_ctx) { // Catch special case where log is negative: if (n < 2 * input_ctx.coarsening.contraction_limit) { @@ -117,4 +127,9 @@ int compute_num_threads_for_parallel_ip(const Context &input_ctx) { 1.0 * input_ctx.parallel.num_threads * input_ctx.partitioning.deep_initial_partitioning_load )); } + +BlockID compute_next_k(const BlockID current_k, const Context &input_ctx) { + return std::min(current_k * 2, input_ctx.partition.k); +} + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/partition_utils.h b/kaminpar-shm/partitioning/partition_utils.h index c228488f..a47daa79 100644 --- a/kaminpar-shm/partitioning/partition_utils.h +++ b/kaminpar-shm/partitioning/partition_utils.h @@ -7,21 +7,9 @@ ******************************************************************************/ #pragma once -#include "kaminpar-shm/datastructures/abstract_graph.h" #include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm::partitioning { -double compute_2way_adaptive_epsilon( - NodeWeight total_node_weight, BlockID k, const PartitionContext &p_ctx -); - -PartitionContext create_bipartition_context( - const AbstractGraph &subgraph, - const BlockID k1, - const BlockID k2, - const PartitionContext &kway_p_ctx, - const bool parallel = true -); /** * Given a block $0 <= B < k'$ of an intermediate partition with $k' < k$ blocks, this function @@ -48,11 +36,17 @@ PartitionContext create_bipartition_context( */ BlockID compute_final_k(BlockID block, BlockID current_k, BlockID input_k); -// compute smallest k_prime such that it is a power of 2 and n / k_prime <= C +BlockID compute_first_sub_block(BlockID block, BlockID current_k, BlockID input_k); +BlockID compute_first_invalid_sub_block(BlockID block, BlockID current_k, BlockID input_k); + +// Compute smallest k_prime such that it is a power of 2 and n / k_prime <= C BlockID compute_k_for_n(NodeID n, const Context &input_ctx); std::size_t compute_num_copies(const Context &input_ctx, NodeID n, bool converged, std::size_t num_threads); int compute_num_threads_for_parallel_ip(const Context &input_ctx); + +BlockID compute_next_k(BlockID current_k, const Context &input_ctx); + } // namespace kaminpar::shm::partitioning diff --git a/kaminpar-shm/partitioning/partitioner.h b/kaminpar-shm/partitioning/partitioner.h index a94618fe..968519cd 100644 --- a/kaminpar-shm/partitioning/partitioner.h +++ b/kaminpar-shm/partitioning/partitioner.h @@ -10,6 +10,7 @@ #include "kaminpar-shm/datastructures/partitioned_graph.h" namespace kaminpar::shm { + class Partitioner { public: virtual ~Partitioner() = default; @@ -22,4 +23,5 @@ class Partitioner { protected: bool _print_metrics = false; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/partitioning/rb/rb_multilevel.h b/kaminpar-shm/partitioning/rb/rb_multilevel.h index 49449d33..88f7dbe3 100644 --- a/kaminpar-shm/partitioning/rb/rb_multilevel.h +++ b/kaminpar-shm/partitioning/rb/rb_multilevel.h @@ -19,6 +19,7 @@ #include "kaminpar-common/timer.h" namespace kaminpar::shm { + class RBMultilevelPartitioner : public Partitioner { public: RBMultilevelPartitioner(const Graph &input_graph, const Context &input_ctx) @@ -84,25 +85,25 @@ class RBMultilevelPartitioner : public Partitioner { create_bipartition_context(graph, final_k / 2, final_k / 2, _input_ctx.partition); bool shrunk = true; while (shrunk && c_graph->n() > 2 * _input_ctx.coarsening.contraction_limit) { - shrunk = partitioning::coarsen_once(coarsener.get(), c_graph, p_ctx); + shrunk = coarsener->coarsen(); c_graph = &coarsener->current(); } // initial bipartitioning PartitionedGraph p_graph = partitioning::bipartition(c_graph, final_k, _bipartitioner_pool, true); - partitioning::update_partition_context(p_ctx, p_graph, _input_ctx.partition.k); // refine auto refiner = factory::create_refiner(_input_ctx); while (!coarsener->empty()) { - partitioning::refine(refiner.get(), p_graph, p_ctx); - p_graph = partitioning::uncoarsen_once( - coarsener.get(), std::move(p_graph), p_ctx, _input_ctx.partition - ); + refiner->initialize(p_graph); + refiner->refine(p_graph, p_ctx); + p_graph = coarsener->uncoarsen(std::move(p_graph)); } - partitioning::refine(refiner.get(), p_graph, p_ctx); + + refiner->initialize(p_graph); + refiner->refine(p_graph, p_ctx); return p_graph; } @@ -113,4 +114,5 @@ class RBMultilevelPartitioner : public Partitioner { InitialBipartitionerWorkerPool _bipartitioner_pool; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/presets.cc b/kaminpar-shm/presets.cc index bf597542..908861ab 100644 --- a/kaminpar-shm/presets.cc +++ b/kaminpar-shm/presets.cc @@ -11,7 +11,7 @@ #include #include -#include "kaminpar-shm/context.h" +#include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { @@ -86,12 +86,7 @@ Context create_default_context() { .refine_after_extending_partition = false, .use_lazy_subgraph_memory = true, }, - .partition = - { - // Context -> Partition - .epsilon = 0.03, - .k = kInvalidBlockID /* must be set */, - }, + .partition = {}, .coarsening = { // Context -> Coarsening @@ -178,6 +173,7 @@ Context create_default_context() { .improvement_abortion_threshold = 0.0001, }, .refine_pool_partition = false, + .use_adaptive_epsilon = true, }, .refinement = { diff --git a/kaminpar-shm/refinement/adapters/mtkahypar_refiner.cc b/kaminpar-shm/refinement/adapters/mtkahypar_refiner.cc index 28dc56c5..8b2711c8 100644 --- a/kaminpar-shm/refinement/adapters/mtkahypar_refiner.cc +++ b/kaminpar-shm/refinement/adapters/mtkahypar_refiner.cc @@ -7,8 +7,8 @@ ******************************************************************************/ #include "kaminpar-shm/refinement/adapters/mtkahypar_refiner.h" -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/logger.h" diff --git a/kaminpar-shm/refinement/adapters/mtkahypar_refiner.h b/kaminpar-shm/refinement/adapters/mtkahypar_refiner.h index ce718d95..7ea1b896 100644 --- a/kaminpar-shm/refinement/adapters/mtkahypar_refiner.h +++ b/kaminpar-shm/refinement/adapters/mtkahypar_refiner.h @@ -7,8 +7,8 @@ ******************************************************************************/ #pragma once -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-shm/refinement/refiner.h" namespace kaminpar::shm { diff --git a/kaminpar-shm/refinement/balancer/greedy_balancer.cc b/kaminpar-shm/refinement/balancer/greedy_balancer.cc index dcd8c020..02c1a7b0 100644 --- a/kaminpar-shm/refinement/balancer/greedy_balancer.cc +++ b/kaminpar-shm/refinement/balancer/greedy_balancer.cc @@ -156,7 +156,7 @@ template class GreedyBalancerImpl { while (current_overload > 0 && !_pq.empty(from)) { KASSERT( current_overload == - std::max(0, _p_graph->block_weight(from) - _p_ctx->block_weights.max(from)) + std::max(0, _p_graph->block_weight(from) - _p_ctx->max_block_weight(from)) ); const NodeID u = _pq.peek_max_id(from); @@ -212,7 +212,7 @@ template class GreedyBalancerImpl { KASSERT( current_overload == - std::max(0, _p_graph->block_weight(from) - _p_ctx->block_weights.max(from)) + std::max(0, _p_graph->block_weight(from) - _p_ctx->max_block_weight(from)) ); }); STOP_TIMER(); @@ -340,7 +340,7 @@ template class GreedyBalancerImpl { _graph->adjacent_nodes(u, [&](const NodeID v, const EdgeID w) { const BlockID v_block = _p_graph->block(v); if (u_block != v_block && - _p_graph->block_weight(v_block) + u_weight <= _p_ctx->block_weights.max(v_block)) { + _p_graph->block_weight(v_block) + u_weight <= _p_ctx->max_block_weight(v_block)) { map[v_block] += w; } else if (u_block == v_block) { internal_degree += w; @@ -367,7 +367,7 @@ template class GreedyBalancerImpl { } bool move_node_if_possible(const NodeID u, const BlockID from, const BlockID to) { - if (_p_graph->move(u, from, to, _p_ctx->block_weights.max(to))) { + if (_p_graph->move(u, from, to, _p_ctx->max_block_weight(to))) { if (_gain_cache != nullptr) { _gain_cache->move(u, from, to); } @@ -409,7 +409,7 @@ template class GreedyBalancerImpl { auto &blocks = _feasible_target_blocks.local(); blocks.clear(); for (const BlockID b : _p_graph->blocks()) { - if (_p_graph->block_weight(b) < _p_ctx->block_weights.perfectly_balanced(b)) { + if (_p_graph->block_weight(b) < _p_ctx->perfectly_balanced_block_weight(b)) { blocks.push_back(b); } } @@ -422,7 +422,7 @@ template class GreedyBalancerImpl { "block weights!" ); - return std::max(0, _p_graph->block_weight(b) - _p_ctx->block_weights.max(b)); + return std::max(0, _p_graph->block_weight(b) - _p_ctx->max_block_weight(b)); } [[nodiscard]] static inline double diff --git a/kaminpar-shm/refinement/fm/fm_refiner.cc b/kaminpar-shm/refinement/fm/fm_refiner.cc index 795d2fdd..568c0b7d 100644 --- a/kaminpar-shm/refinement/fm/fm_refiner.cc +++ b/kaminpar-shm/refinement/fm/fm_refiner.cc @@ -165,7 +165,7 @@ template class LocalizedFMRefiner { // Accept the move if the target block does not get overloaded const NodeWeight node_weight = _graph.node_weight(node); - if (_d_graph.block_weight(block_to) + node_weight <= _p_ctx.block_weights.max(block_to)) { + if (_d_graph.block_weight(block_to) + node_weight <= _p_ctx.max_block_weight(block_to)) { current_total_gain += actual_gain; // If we found a new local minimum, apply the moves to the global @@ -321,7 +321,7 @@ template class LocalizedFMRefiner { // In this case, old_target_block got even better // We only need to consider other blocks if old_target_block is full now if (_d_graph.block_weight(old_target_block) + _d_graph.node_weight(node) <= - _p_ctx.block_weights.max(old_target_block)) { + _p_ctx.max_block_weight(old_target_block)) { _node_pqs[old_block].change_priority( node, _d_gain_cache.gain(node, old_block, old_target_block) ); @@ -350,7 +350,7 @@ template class LocalizedFMRefiner { if (gain_moved_to > gain_old_target_block && _d_graph.block_weight(moved_to) + _d_graph.node_weight(node) <= - _p_ctx.block_weights.max(moved_to)) { + _p_ctx.max_block_weight(moved_to)) { _shared.target_blocks[node] = moved_to; _node_pqs[old_block].change_priority(node, gain_moved_to); } else { @@ -371,11 +371,11 @@ template class LocalizedFMRefiner { EdgeWeight best_gain = std::numeric_limits::min(); BlockID best_target_block = from; NodeWeight best_target_block_weight_gap = - _p_ctx.block_weights.max(from) - p_graph.block_weight(from); + _p_ctx.max_block_weight(from) - p_graph.block_weight(from); gain_cache.gains(u, from, [&](const BlockID to, auto &&compute_gain) { const NodeWeight target_block_weight = p_graph.block_weight(to) + weight; - const NodeWeight max_block_weight = _p_ctx.block_weights.max(to); + const NodeWeight max_block_weight = _p_ctx.max_block_weight(to); const NodeWeight block_weight_gap = max_block_weight - target_block_weight; if (block_weight_gap < std::min(best_target_block_weight_gap, 0)) { return; diff --git a/kaminpar-shm/refinement/jet/jet_refiner.h b/kaminpar-shm/refinement/jet/jet_refiner.h index a78b3f93..d27f90c4 100644 --- a/kaminpar-shm/refinement/jet/jet_refiner.h +++ b/kaminpar-shm/refinement/jet/jet_refiner.h @@ -8,8 +8,8 @@ ******************************************************************************/ #pragma once -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-shm/refinement/refiner.h" namespace kaminpar::shm { diff --git a/kaminpar-shm/refinement/lp/lp_refiner.cc b/kaminpar-shm/refinement/lp/lp_refiner.cc index fcd749b4..8d11c0a9 100644 --- a/kaminpar-shm/refinement/lp/lp_refiner.cc +++ b/kaminpar-shm/refinement/lp/lp_refiner.cc @@ -126,7 +126,7 @@ class LPRefinerImpl final return _p_graph->k(); } [[nodiscard]] BlockWeight max_cluster_weight(const BlockID block) { - return _p_ctx->block_weights.max(block); + return _p_ctx->max_block_weight(block); } template diff --git a/kaminpar-shm/refinement/multi_refiner.cc b/kaminpar-shm/refinement/multi_refiner.cc index 1673697e..9b448aff 100644 --- a/kaminpar-shm/refinement/multi_refiner.cc +++ b/kaminpar-shm/refinement/multi_refiner.cc @@ -7,7 +7,7 @@ ******************************************************************************/ #include "kaminpar-shm/refinement/multi_refiner.h" -#include "kaminpar-shm/context.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-shm/metrics.h" #include "kaminpar-shm/refinement/refiner.h" @@ -60,6 +60,7 @@ bool MultiRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_c imbalance_before = imbalance_after; cut_before = cut_after; + feasible_before = feasible_after; } else if (_output_level >= OutputLevel::INFO) { LOG; } diff --git a/kaminpar-shm/refinement/multi_refiner.h b/kaminpar-shm/refinement/multi_refiner.h index d414e292..764b82db 100644 --- a/kaminpar-shm/refinement/multi_refiner.h +++ b/kaminpar-shm/refinement/multi_refiner.h @@ -11,7 +11,7 @@ #include #include -#include "kaminpar-shm/context.h" +#include "kaminpar-shm/kaminpar.h" #include "kaminpar-shm/refinement/refiner.h" namespace kaminpar::shm { diff --git a/kaminpar-shm/refinement/refiner.h b/kaminpar-shm/refinement/refiner.h index 14feaebf..5d082e10 100644 --- a/kaminpar-shm/refinement/refiner.h +++ b/kaminpar-shm/refinement/refiner.h @@ -9,8 +9,8 @@ #include -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" +#include "kaminpar-shm/kaminpar.h" namespace kaminpar::shm { diff --git a/tests/shm/coarsening/cluster_contraction_test.cc b/tests/shm/coarsening/cluster_contraction_test.cc index 9d2f893f..3f4e8b6f 100644 --- a/tests/shm/coarsening/cluster_contraction_test.cc +++ b/tests/shm/coarsening/cluster_contraction_test.cc @@ -227,23 +227,14 @@ TEST( * 10 11 */ Graph graph = make_graph({0, 0, 1, 3, 4, 5, 5, 5, 7, 8, 8, 8, 8}, {2, 1, 3, 2, 7, 4, 8, 7}); - - PartitionContext p_ctx; - p_ctx.k = 2; - p_ctx.epsilon = 0.17; // max block weight 7 - graph = graph::rearrange_by_degree_buckets(*dynamic_cast(graph.underlying_graph())); - graph::remove_isolated_nodes(graph, p_ctx); + graph.remove_isolated_nodes(graph::count_isolated_nodes(graph)); EXPECT_EQ(graph.n(), 6); EXPECT_EQ(graph.m(), 8); for (const NodeID v : (*dynamic_cast(graph.underlying_graph())).raw_edges()) { EXPECT_LT(v, 7); - } // edges are valid - - // total weight of new graph: 6, perfectly balanced block weight: 3 - // hence eps' should be 1.3333.... - EXPECT_THAT(p_ctx.epsilon, AllOf(Gt(1.33), Lt(1.34))); + } } // diff --git a/tests/shm/initial_partitioning/initial_coarsener_test.cc b/tests/shm/initial_partitioning/initial_coarsener_test.cc index 94cc8453..397b5ffa 100644 --- a/tests/shm/initial_partitioning/initial_coarsener_test.cc +++ b/tests/shm/initial_partitioning/initial_coarsener_test.cc @@ -10,8 +10,8 @@ #include "tests.h" #include "tests/shm/matcher.h" -#include "kaminpar-shm/context.h" #include "kaminpar-shm/initial_partitioning/initial_coarsener.h" +#include "kaminpar-shm/kaminpar.h" using ::testing::UnorderedElementsAre; diff --git a/tests/shm/metrics_test.cc b/tests/shm/metrics_test.cc index d4cfc2e3..e4c4c75b 100644 --- a/tests/shm/metrics_test.cc +++ b/tests/shm/metrics_test.cc @@ -6,6 +6,7 @@ #include "kaminpar-shm/metrics.h" namespace kaminpar::shm::testing { + class MetricsTestFixture : public ::testing::Test { public: Graph graph = make_graph( @@ -68,12 +69,14 @@ TEST_F(MetricsTestFixture, imbalanced_bipartition_balance) { EXPECT_DOUBLE_EQ(metrics::imbalance(p_graph), 0.5); } -inline Context -create_testing_context(const Graph &graph, const BlockID k = 2, const double epsilon = 0.03) { +inline Context create_testing_context( + const Graph &graph, + const BlockID k = 2, + const double epsilon = 0.03, + const bool relax_block_weights = true +) { Context context = create_default_context(); - context.partition.k = k; - context.partition.epsilon = epsilon; - context.setup(graph); + context.partition.setup(graph, k, epsilon, relax_block_weights); return context; } @@ -95,4 +98,5 @@ TEST(MetricsTest, is_feasible_with_multiple_nodes) { p_graph.set_block(2, 0); EXPECT_FALSE(metrics::is_feasible(p_graph, ctx.partition)); } + } // namespace kaminpar::shm::testing diff --git a/tests/shm/refinement/gain_cache_test.cc b/tests/shm/refinement/gain_cache_test.cc index 3ef9b8bc..be92db39 100644 --- a/tests/shm/refinement/gain_cache_test.cc +++ b/tests/shm/refinement/gain_cache_test.cc @@ -24,8 +24,7 @@ namespace { template class GainCacheTest : public ::testing::Test { public: void init(const PartitionedGraph &p_graph) { - _ctx.partition.k = p_graph.k(); - _ctx.setup(p_graph.graph()); + _ctx.partition.setup(p_graph.graph(), p_graph.k(), 0.03); this->_gain_cache = std::make_unique(this->_ctx, p_graph.n(), p_graph.k()); this->_gain_cache->initialize(p_graph.graph(), p_graph);