From 0d86738be070ac5e0b7ec1d9d40dbf3f37165619 Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 13:43:49 +0100 Subject: [PATCH 1/6] refactor(dist): remove OpenMP dependency in sparse alltoall --- kaminpar-dist/graphutils/communication.h | 312 ++++++++++++----------- 1 file changed, 158 insertions(+), 154 deletions(-) diff --git a/kaminpar-dist/graphutils/communication.h b/kaminpar-dist/graphutils/communication.h index 7e78e4b8..445fbf30 100644 --- a/kaminpar-dist/graphutils/communication.h +++ b/kaminpar-dist/graphutils/communication.h @@ -9,7 +9,7 @@ #include -#include +#include #include "kaminpar-mpi/sparse_alltoall.h" #include "kaminpar-mpi/utils.h" @@ -21,6 +21,7 @@ #include "kaminpar-common/datastructures/marker.h" #include "kaminpar-common/datastructures/noinit_vector.h" #include "kaminpar-common/logger.h" +#include "kaminpar-common/parallel/loops.h" #define SPARSE_ALLTOALL_NOFILTER \ [](NodeID) { \ @@ -28,10 +29,12 @@ } namespace kaminpar::mpi::graph { + using namespace kaminpar::dist; SET_DEBUG(false); namespace internal { + template void inclusive_col_prefix_sum(Data &data) { if (data.empty()) { return; @@ -46,6 +49,7 @@ template void inclusive_col_prefix_sum(Data &data) { } } } + } // namespace internal /** @@ -144,40 +148,41 @@ void sparse_alltoall_interface_to_ghost_custom_range( const auto [size, rank] = mpi::get_comm_info(graph.communicator()); - // START_TIMER("Message construction"); - // Allocate message counters - const PEID num_threads = omp_get_max_threads(); + const PEID num_threads = tbb::this_task_arena::max_concurrency(); std::vector> num_messages( num_threads, CacheAlignedVector(size) ); // Count messages to each PE for each thread -#pragma omp parallel for default(none) shared(graph, from, to, mapper, num_messages, filter) - for (NodeID seq_u = from; seq_u < to; ++seq_u) { - const NodeID u = mapper(seq_u); - - if constexpr (filter_invocable_with_node) { - if (!filter(u)) { - continue; - } - } - - const PEID thread = omp_get_thread_num(); - - graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - if (graph.is_ghost_node(v)) { - if constexpr (filter_invocable_with_edge) { - if (!filter(u, e, v, w)) { - return; + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + for (NodeID seq_u = range_from; seq_u < range_to; ++seq_u) { + const NodeID u = mapper(seq_u); + + if constexpr (filter_invocable_with_node) { + if (!filter(u)) { + continue; + } } - } - const PEID owner = graph.ghost_owner(v); - ++num_messages[thread][owner]; + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if (graph.is_ghost_node(v)) { + if constexpr (filter_invocable_with_edge) { + if (!filter(u, e, v, w)) { + return; + } + } + + const PEID owner = graph.ghost_owner(v); + ++num_messages[thread][owner]; + } + }); + } } - }); - } + ); // Offset messages for each thread internal::inclusive_col_prefix_sum(num_messages); @@ -188,38 +193,39 @@ void sparse_alltoall_interface_to_ghost_custom_range( send_buffers[pe].resize(num_messages.back()[pe]); }); -#pragma omp parallel for default(none) \ - shared(send_buffers, from, to, mapper, filter, graph, builder, num_messages) - for (NodeID seq_u = from; seq_u < to; ++seq_u) { - const NodeID u = mapper(seq_u); - - if constexpr (filter_invocable_with_node) { - if (!filter(u)) { - continue; - } - } - - const PEID thread = omp_get_thread_num(); - graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - if (graph.is_ghost_node(v)) { - if constexpr (filter_invocable_with_edge) { - if (!filter(u, e, v, w)) { - return; + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + for (NodeID seq_u = range_from; seq_u < range_to; ++seq_u) { + const NodeID u = mapper(seq_u); + + if constexpr (filter_invocable_with_node) { + if (!filter(u)) { + continue; + } } - } - const PEID pe = graph.ghost_owner(v); - const std::size_t slot = --num_messages[thread][pe]; - if constexpr (builder_invocable_with_pe) { - send_buffers[pe][slot] = builder(u, e, v, w, pe); - } else /* if (builder_invocable_without_pe) */ { - send_buffers[pe][slot] = builder(u, e, v, w); + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if (graph.is_ghost_node(v)) { + if constexpr (filter_invocable_with_edge) { + if (!filter(u, e, v, w)) { + return; + } + } + + const PEID pe = graph.ghost_owner(v); + const std::size_t slot = --num_messages[thread][pe]; + if constexpr (builder_invocable_with_pe) { + send_buffers[pe][slot] = builder(u, e, v, w, pe); + } else /* if (builder_invocable_without_pe) */ { + send_buffers[pe][slot] = builder(u, e, v, w); + } + } + }); } } - }); - } - - // STOP_TIMER(); + ); sparse_alltoall( std::move(send_buffers), std::forward(receiver), graph.communicator() @@ -470,48 +476,49 @@ void sparse_alltoall_interface_to_pe_custom_range( // START_TIMER("Message construction"); // Allocate message counters - const PEID num_threads = omp_get_max_threads(); + const PEID num_threads = tbb::this_task_arena::max_concurrency(); std::vector> num_messages( num_threads, CacheAlignedVector(size) ); -#pragma omp parallel default(none) shared(size, from, to, mapper, filter, graph, num_messages) - { - Marker<> created_message_for_pe(static_cast(size)); - const PEID thread = omp_get_thread_num(); + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + Marker<> created_message_for_pe(static_cast(size)); + + for (NodeID seq_u = range_from; seq_u < range_to; ++seq_u) { + const NodeID u = mapper(seq_u); + + if constexpr (filter_invocable_with_unmapped_node) { + if (!filter(seq_u, u)) { + continue; + } + } else { + if (!filter(u)) { + continue; + } + } -#pragma omp for - for (NodeID seq_u = from; seq_u < to; ++seq_u) { - const NodeID u = mapper(seq_u); + graph.adjacent_nodes(u, [&](const NodeID v) { + if (!graph.is_ghost_node(v)) { + return; + } - if constexpr (filter_invocable_with_unmapped_node) { - if (!filter(seq_u, u)) { - continue; - } - } else { - if (!filter(u)) { - continue; - } - } + const PEID pe = graph.ghost_owner(v); - graph.adjacent_nodes(u, [&](const NodeID v) { - if (!graph.is_ghost_node(v)) { - return; - } + if (created_message_for_pe.get(pe)) { + return; + } + created_message_for_pe.set(pe); - const PEID pe = graph.ghost_owner(v); + ++num_messages[thread][pe]; + }); - if (created_message_for_pe.get(pe)) { - return; + created_message_for_pe.reset(); } - created_message_for_pe.set(pe); - - ++num_messages[thread][pe]; - }); - - created_message_for_pe.reset(); - } - } + } + ); // Offset messages for each thread internal::inclusive_col_prefix_sum(num_messages); @@ -522,55 +529,52 @@ void sparse_alltoall_interface_to_pe_custom_range( send_buffers[pe].resize(num_messages.back()[pe]); }); - // Fill buffers -#pragma omp parallel default(none) \ - shared(send_buffers, size, from, to, mapper, builder, filter, graph, num_messages) - { - Marker<> created_message_for_pe(static_cast(size)); - const PEID thread = omp_get_thread_num(); - -#pragma omp for - for (NodeID seq_u = from; seq_u < to; ++seq_u) { - const NodeID u = mapper(seq_u); - - if constexpr (filter_invocable_with_unmapped_node) { - if (!filter(seq_u, u)) { - continue; - } - } else { - if (!filter(u)) { - continue; - } - } - - graph.adjacent_nodes(u, [&](const NodeID v) { - if (!graph.is_ghost_node(v)) { - return; - } + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + Marker<> created_message_for_pe(static_cast(size)); + + for (NodeID seq_u = range_from; seq_u < range_to; ++seq_u) { + const NodeID u = mapper(seq_u); + + if constexpr (filter_invocable_with_unmapped_node) { + if (!filter(seq_u, u)) { + continue; + } + } else { + if (!filter(u)) { + continue; + } + } - const PEID pe = graph.ghost_owner(v); + graph.adjacent_nodes(u, [&](const NodeID v) { + if (!graph.is_ghost_node(v)) { + return; + } - if (created_message_for_pe.get(pe)) { - return; - } - created_message_for_pe.set(pe); + const PEID pe = graph.ghost_owner(v); - const auto slot = --num_messages[thread][pe]; + if (created_message_for_pe.get(pe)) { + return; + } + created_message_for_pe.set(pe); - if constexpr (builder_invocable_with_pe) { - send_buffers[pe][slot] = builder(u, pe); - } else if constexpr (builder_invocable_with_pe_and_unmapped_node) { - send_buffers[pe][slot] = builder(seq_u, u, pe); - } else { - send_buffers[pe][slot] = builder(u); - } - }); + const auto slot = --num_messages[thread][pe]; - created_message_for_pe.reset(); - } - } + if constexpr (builder_invocable_with_pe) { + send_buffers[pe][slot] = builder(u, pe); + } else if constexpr (builder_invocable_with_pe_and_unmapped_node) { + send_buffers[pe][slot] = builder(seq_u, u, pe); + } else { + send_buffers[pe][slot] = builder(u); + } + }); - // STOP_TIMER(); + created_message_for_pe.reset(); + } + } + ); sparse_alltoall( std::move(send_buffers), std::forward(receiver), graph.communicator() @@ -752,22 +756,23 @@ void sparse_alltoall_custom( // START_TIMER("Message construction"); // Allocate message counters - const PEID num_threads = omp_get_max_threads(); + const PEID num_threads = tbb::this_task_arena::max_concurrency(); std::vector> num_messages( num_threads, CacheAlignedVector(size) ); // Count messages to each PE for each thread -#pragma omp parallel default(none) shared(pe_getter, size, from, to, filter, graph, num_messages) - { - const PEID thread = omp_get_thread_num(); -#pragma omp for - for (NodeID u = from; u < to; ++u) { - if (filter(u)) { - ++num_messages[thread][pe_getter(u)]; + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + for (NodeID u = range_from; u < range_to; ++u) { + if (filter(u)) { + ++num_messages[thread][pe_getter(u)]; + } + } } - } - } + ); // Offset messages for each thread internal::inclusive_col_prefix_sum(num_messages); @@ -778,22 +783,20 @@ void sparse_alltoall_custom( send_buffers[pe].resize(num_messages.back()[pe]); }); - // fill buffers -#pragma omp parallel default(none) \ - shared(pe_getter, send_buffers, size, from, to, builder, filter, graph, num_messages) - { - const PEID thread = omp_get_thread_num(); -#pragma omp for - for (NodeID u = from; u < to; ++u) { - if (filter(u)) { - const PEID pe = pe_getter(u); - const auto slot = --num_messages[thread][pe]; - send_buffers[pe][slot] = builder(u); + // Fill buffers + parallel::deterministic_for( + from, + to, + [&](const NodeID range_from, const NodeID range_to, const PEID thread) { + for (NodeID u = range_from; u < range_to; ++u) { + if (filter(u)) { + const PEID pe = pe_getter(u); + const auto slot = --num_messages[thread][pe]; + send_buffers[pe][slot] = builder(u); + } + } } - } - } - - // STOP_TIMER(); + ); sparse_alltoall( std::move(send_buffers), std::forward(receiver), graph.communicator() @@ -828,4 +831,5 @@ std::vector sparse_alltoall_custom( ); return recv_buffers; } + } // namespace kaminpar::mpi::graph From 71c49d8325ce8ade821819523858a3fc85a22898 Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 13:52:12 +0100 Subject: [PATCH 2/6] fix(dist): crash in distributed contraction code when executing in hybrid mode due to non-parallelized section --- .../coarsening/contraction/global_cluster_contraction.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc index b4f0182d..556f040a 100644 --- a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc @@ -311,11 +311,13 @@ find_nonlocal_nodes(const Graph &graph, const StaticArray &lnode_t std::unordered_map nonlocal_nodes; std::atomic size = 0; - graph.pfor_all_nodes([&](const NodeID lnode) { + for (NodeID lnode : graph.all_nodes()) { + // graph.pfor_all_nodes([&](const NodeID lnode) { const GlobalNodeID gcluster = lnode_to_gcluster[lnode]; if (graph.is_owned_global_node(gcluster)) { - return; + // return; + continue; } const NodeWeight weight = graph.is_owned_node(lnode) ? graph.node_weight(lnode) : 0; @@ -333,7 +335,8 @@ find_nonlocal_nodes(const Graph &graph, const StaticArray &lnode_t size.fetch_add(1, std::memory_order_relaxed); nonlocal_nodes[gcluster + 1] = weight; } - }); + //}); + } RECORD("nonlocal_nodes") StaticArray dense_nonlocal_nodes(size); std::size_t i = 0; From 288ed7ee9e7e07d96033e3756d11741a2a1e2c46 Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 14:03:53 +0100 Subject: [PATCH 3/6] tests(dist): add hybryd endtoend test --- tests/endtoend/dist_endtoend_test.cc | 33 ++++++++-------------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/tests/endtoend/dist_endtoend_test.cc b/tests/endtoend/dist_endtoend_test.cc index 8b6670a4..1bceebfd 100644 --- a/tests/endtoend/dist_endtoend_test.cc +++ b/tests/endtoend/dist_endtoend_test.cc @@ -144,17 +144,14 @@ TEST(DistEndToEndTest, partitions_unweighted_walshaw_data_graph) { EXPECT_EQ(reported_cut, actual_cut / 2); } -// Disabled: can fail since we offset the PRNG seed by the thread ID, and not all calls are made by -// the same threads across multiple runs -/* -TEST(DistEndToEndTest, partitions_unweighted_walshaw_data_graph_multiple_times_with_same_seed) { - const PEID size = mpi::get_comm_size(MPI_COMM_WORLD); +TEST( + DistEndToEndTest, partitions_unweighted_walshaw_data_graph_multiple_times_with_different_seeds +) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); auto vtxdist = data::create_vtxdist(); auto xadj = data::create_xadj(); - const GlobalNodeID global_n = data::global_xadj.size() - 1; const NodeID n = xadj.size() - 1; GlobalNodeID *vtxdist_ptr = vtxdist.data(); @@ -166,23 +163,20 @@ TEST(DistEndToEndTest, partitions_unweighted_walshaw_data_graph_multiple_times_w dKaMinPar dist(MPI_COMM_WORLD, 1, create_default_context()); // 1 thread: deterministic dist.set_output_level(OutputLevel::QUIET); dist.import_graph(vtxdist_ptr, xadj_ptr, adjncy_ptr, nullptr, nullptr); - const EdgeWeight reported_cut = dist.compute_partition(16, seed0_partition.data()); + dist.compute_partition(16, seed0_partition.data()); - for (const int seed : {0, 0, 0}) { + for (const int seed : {1, 2, 3}) { std::vector partition(n); dKaMinPar::reseed(seed); dKaMinPar dist(MPI_COMM_WORLD, 1, create_default_context()); // 1 thread: deterministic dist.set_output_level(OutputLevel::QUIET); dist.import_graph(vtxdist_ptr, xadj_ptr, adjncy_ptr, nullptr, nullptr); dist.compute_partition(16, partition.data()); - EXPECT_EQ(partition, seed0_partition); + EXPECT_NE(partition, seed0_partition); } } -*/ -TEST( - DistEndToEndTest, partitions_unweighted_walshaw_data_graph_multiple_times_with_different_seeds -) { +TEST(DistEndToEndTest, partitions_unweighted_walshaw_data_graph_with_three_threads_per_mpi) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); auto vtxdist = data::create_vtxdist(); @@ -196,18 +190,9 @@ TEST( std::vector seed0_partition(n); dKaMinPar::reseed(0); - dKaMinPar dist(MPI_COMM_WORLD, 1, create_default_context()); // 1 thread: deterministic + dKaMinPar dist(MPI_COMM_WORLD, 3, create_default_context()); dist.set_output_level(OutputLevel::QUIET); dist.import_graph(vtxdist_ptr, xadj_ptr, adjncy_ptr, nullptr, nullptr); - - for (const int seed : {1, 2, 3}) { - std::vector partition(n); - dKaMinPar::reseed(seed); - dKaMinPar dist(MPI_COMM_WORLD, 1, create_default_context()); // 1 thread: deterministic - dist.set_output_level(OutputLevel::QUIET); - dist.import_graph(vtxdist_ptr, xadj_ptr, adjncy_ptr, nullptr, nullptr); - dist.compute_partition(16, partition.data()); - EXPECT_NE(partition, seed0_partition); - } + dist.compute_partition(16, seed0_partition.data()); } } // namespace kaminpar::dist From e5c53fff6ae242fc922a2a695304b1a5547e056f Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 14:05:00 +0100 Subject: [PATCH 4/6] style(tests): add empty lines after namespace --- tests/endtoend/dist_endtoend_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/endtoend/dist_endtoend_test.cc b/tests/endtoend/dist_endtoend_test.cc index 1bceebfd..9f7cfb23 100644 --- a/tests/endtoend/dist_endtoend_test.cc +++ b/tests/endtoend/dist_endtoend_test.cc @@ -17,7 +17,9 @@ #include "kaminpar-common/math.h" namespace kaminpar::dist { + namespace data { + static std::vector global_xadj = { #include "data.graph.xadj" }; @@ -54,6 +56,7 @@ std::vector create_xadj() { return xadj; } + } // namespace data TEST(DistEndToEndTest, partitions_empty_unweighted_graph) { @@ -195,4 +198,5 @@ TEST(DistEndToEndTest, partitions_unweighted_walshaw_data_graph_with_three_threa dist.import_graph(vtxdist_ptr, xadj_ptr, adjncy_ptr, nullptr, nullptr); dist.compute_partition(16, seed0_partition.data()); } + } // namespace kaminpar::dist From 7142ff424e9ef51e3d51ba401c294cb6e83ff9ae Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 14:11:58 +0100 Subject: [PATCH 5/6] refactor(dist): remove remaining OpenMP dependencies --- apps/benchmarks/dist_block_clustering_benchmark.cc | 2 -- apps/benchmarks/dist_coarsening_benchmark.cc | 2 -- apps/benchmarks/dist_coloring_benchmark.cc | 2 -- apps/benchmarks/dist_contraction_benchmark.cc | 2 -- apps/benchmarks/dist_refinement_benchmark.cc | 2 -- kaminpar-dist/datastructures/growt.h | 12 ++++++------ kaminpar-dist/dkaminpar.cc | 2 -- kaminpar-mpi/CMakeLists.txt | 3 +-- 8 files changed, 7 insertions(+), 20 deletions(-) diff --git a/apps/benchmarks/dist_block_clustering_benchmark.cc b/apps/benchmarks/dist_block_clustering_benchmark.cc index bffda636..9e399c3c 100644 --- a/apps/benchmarks/dist_block_clustering_benchmark.cc +++ b/apps/benchmarks/dist_block_clustering_benchmark.cc @@ -12,7 +12,6 @@ #include #include -#include #include "kaminpar-dist/context.h" #include "kaminpar-dist/context_io.h" @@ -56,7 +55,6 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - omp_set_num_threads(ctx.parallel.num_threads); auto wrapper = load_partitioned_graph(graph_filename, partition_filename); auto &graph = *wrapper.graph; diff --git a/apps/benchmarks/dist_coarsening_benchmark.cc b/apps/benchmarks/dist_coarsening_benchmark.cc index 9a175253..e1f19e81 100644 --- a/apps/benchmarks/dist_coarsening_benchmark.cc +++ b/apps/benchmarks/dist_coarsening_benchmark.cc @@ -10,7 +10,6 @@ // clang-format on #include -#include #include "kaminpar-dist/coarsening/coarsener.h" #include "kaminpar-dist/context.h" @@ -49,7 +48,6 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - omp_set_num_threads(ctx.parallel.num_threads); auto wrapper = load_graph(graph_filename); auto &graph = *wrapper.graph; diff --git a/apps/benchmarks/dist_coloring_benchmark.cc b/apps/benchmarks/dist_coloring_benchmark.cc index 8ac731c3..e47b826f 100644 --- a/apps/benchmarks/dist_coloring_benchmark.cc +++ b/apps/benchmarks/dist_coloring_benchmark.cc @@ -10,7 +10,6 @@ // clang-format on #include -#include #include #include @@ -38,7 +37,6 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - omp_set_num_threads(ctx.parallel.num_threads); auto wrapper = load_graph(graph_filename); auto &graph = *wrapper.graph; diff --git a/apps/benchmarks/dist_contraction_benchmark.cc b/apps/benchmarks/dist_contraction_benchmark.cc index 1ba08532..9a4776ae 100644 --- a/apps/benchmarks/dist_contraction_benchmark.cc +++ b/apps/benchmarks/dist_contraction_benchmark.cc @@ -10,7 +10,6 @@ // clang-format on #include -#include #include "kaminpar-dist/coarsening/contraction/global_cluster_contraction.h" #include "kaminpar-dist/context.h" @@ -42,7 +41,6 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - omp_set_num_threads(ctx.parallel.num_threads); auto wrapper = load_graph(graph_filename); auto &graph = *wrapper.graph; diff --git a/apps/benchmarks/dist_refinement_benchmark.cc b/apps/benchmarks/dist_refinement_benchmark.cc index 993bc3e9..0eca225e 100644 --- a/apps/benchmarks/dist_refinement_benchmark.cc +++ b/apps/benchmarks/dist_refinement_benchmark.cc @@ -12,7 +12,6 @@ #include #include -#include #include "kaminpar-dist/context.h" #include "kaminpar-dist/context_io.h" @@ -62,7 +61,6 @@ int main(int argc, char *argv[]) { } tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - omp_set_num_threads(ctx.parallel.num_threads); auto wrapper = load_partitioned_graph(graph_filename, partition_filename); auto &graph = *wrapper.graph; diff --git a/kaminpar-dist/datastructures/growt.h b/kaminpar-dist/datastructures/growt.h index 069625c2..e504fb8e 100644 --- a/kaminpar-dist/datastructures/growt.h +++ b/kaminpar-dist/datastructures/growt.h @@ -10,6 +10,8 @@ #include #include +#include +#include #include "kaminpar-dist/dkaminpar.h" @@ -59,8 +61,7 @@ using StaticGhostNodeMapping = typename ::growt:: template void pfor_map(Map &map, Lambda &&lambda) { std::atomic_size_t counter = 0; -#pragma omp parallel default(none) shared(map, counter, lambda) - { + tbb::parallel_for(0, tbb::this_task_arena::max_concurrency(), [&](const int) { const std::size_t capacity = map.capacity(); std::size_t cur_block = counter.fetch_add(4096); @@ -71,14 +72,13 @@ template void pfor_map(Map &map, Lambda &&lambda } cur_block = counter.fetch_add(4096); } - } + }); } template void pfor_handles(Handles &handles, Lambda &&lambda) { std::atomic_size_t counter = 0; -#pragma omp parallel default(none) shared(handles, counter, lambda) - { + tbb::parallel_for(0, tbb::this_task_arena::max_concurrency(), [&](const int) { auto &handle = handles.local(); const std::size_t capacity = handle.capacity(); std::size_t cur_block = counter.fetch_add(4096); @@ -90,6 +90,6 @@ template void pfor_handles(Handles &handles, } cur_block = counter.fetch_add(4096); } - } + }); } } // namespace kaminpar::dist::growt diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index 965bafe4..5aa967b1 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -157,7 +156,6 @@ dKaMinPar::dKaMinPar(MPI_Comm comm, const int num_threads, const Context ctx) _num_threads(num_threads), _ctx(ctx), _gc(tbb::global_control::max_allowed_parallelism, num_threads) { - omp_set_num_threads(num_threads); #ifdef KAMINPAR_ENABLE_TIMERS GLOBAL_TIMER.reset(); #endif // KAMINPAR_ENABLE_TIMERS diff --git a/kaminpar-mpi/CMakeLists.txt b/kaminpar-mpi/CMakeLists.txt index 8ad648aa..d3c782dd 100644 --- a/kaminpar-mpi/CMakeLists.txt +++ b/kaminpar-mpi/CMakeLists.txt @@ -8,5 +8,4 @@ target_compile_options(kaminpar_mpi PRIVATE ${KAMINPAR_WARNING_FLAGS}) find_library(NUMA_LIB numa) # optional -find_package(OpenMP REQUIRED) -target_link_libraries(kaminpar_mpi PUBLIC kaminpar_common MPI::MPI_CXX OpenMP::OpenMP_CXX) +target_link_libraries(kaminpar_mpi PUBLIC kaminpar_common MPI::MPI_CXX) From 6940b69f306eb32b5b1a6db1fa1ca8730c61fdb7 Mon Sep 17 00:00:00 2001 From: Daniel Seemaier Date: Wed, 30 Oct 2024 14:14:05 +0100 Subject: [PATCH 6/6] ci: no longer install OpenMP --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index aff26c47..dd92f39e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,7 @@ jobs: build-mode: [Release] steps: - name: Install dependencies - run: brew install llvm libomp tbb open-mpi google-sparsehash + run: brew install llvm tbb open-mpi google-sparsehash - name: Checkout HEAD uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: @@ -63,7 +63,7 @@ jobs: build-mode: [Release] steps: - name: Install dependencies - run: sudo apt-get install -y libtbb-dev libhwloc-dev mpi libopenmpi-dev libomp-dev libsparsehash-dev + run: sudo apt-get install -y libtbb-dev libhwloc-dev mpi libopenmpi-dev libsparsehash-dev - name: Checkout HEAD uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: