Skip to content

Commit

Permalink
[KaMinPar] Rework settings for max core usage during deep initial par…
Browse files Browse the repository at this point in the history
…titioning
  • Loading branch information
DanielSeemaier committed Sep 18, 2023
1 parent 49f59a2 commit f157df3
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 33 deletions.
11 changes: 9 additions & 2 deletions kaminpar/context_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) {
}

void print(const InitialPartitioningContext &i_ctx, std::ostream &out) {
out << "Initial partitioning mode: " << i_ctx.mode << "\n";
out << "Adaptive algorithm selection: "
<< (i_ctx.use_adaptive_bipartitioner_selection ? "yes" : "no") << "\n";
}
Expand Down Expand Up @@ -206,12 +205,20 @@ void print(const PartitionContext &p_ctx, std::ostream &out) {
<< p_ctx.block_weights.perfectly_balanced(0) << " + " << 100 * p_ctx.epsilon << "%)\n";
}

void print(const PartitioningContext &p_ctx, std::ostream &out) {
out << "Partitioning mode: " << p_ctx.mode << "\n";
if (p_ctx.mode == PartitioningMode::DEEP) {
out << " Deep initial part. mode: " << p_ctx.deep_initial_partitioning_mode << "\n";
out << " Deep initial part. load: " << p_ctx.deep_initial_partitioning_load << "\n";
}
}

void print(const Context &ctx, std::ostream &out) {
out << "Execution mode: " << ctx.parallel.num_threads << "\n";
out << "Graph: " << ctx.debug.graph_name << "\n";
print(ctx.partition, out);
cio::print_delimiter("Partitioning Scheme", '-');
out << "Partitioning mode: " << ctx.mode << "\n";
print(ctx.partitioning, out);
cio::print_delimiter("Coarsening", '-');
print(ctx.coarsening, out);
cio::print_delimiter("Initial Partitioning", '-');
Expand Down
1 change: 1 addition & 0 deletions kaminpar/context_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ std::ostream &operator<<(std::ostream &out, InitialPartitioningMode mode);
std::unordered_map<std::string, InitialPartitioningMode> get_initial_partitioning_modes();

void print(const Context &ctx, std::ostream &out);
void print(const PartitioningContext &p_ctx, std::ostream &out);
void print(const PartitionContext &p_ctx, std::ostream &out);
void print(const RefinementContext &r_ctx, std::ostream &out);
void print(const CoarseningContext &c_ctx, std::ostream &out);
Expand Down
2 changes: 1 addition & 1 deletion kaminpar/factories.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

namespace kaminpar::shm::factory {
std::unique_ptr<Partitioner> create_partitioner(const Graph &graph, const Context &ctx) {
switch (ctx.mode) {
switch (ctx.partitioning.mode) {
case PartitioningMode::DEEP: {
return std::make_unique<DeepMultilevelPartitioner>(graph, ctx);
}
Expand Down
11 changes: 7 additions & 4 deletions kaminpar/kaminpar.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,6 @@ struct InitialRefinementContext {
};

struct InitialPartitioningContext {
InitialPartitioningMode mode;

InitialCoarseningContext coarsening;
InitialRefinementContext refinement;

Expand All @@ -203,7 +201,6 @@ struct InitialPartitioningContext {
std::size_t max_num_repetitions;
std::size_t num_seed_iterations;
bool use_adaptive_bipartitioner_selection;
std::size_t multiplier_exponent;
};

//
Expand Down Expand Up @@ -260,9 +257,15 @@ enum class PartitioningMode {
RB,
};

struct Context {
struct PartitioningContext {
PartitioningMode mode;

InitialPartitioningMode deep_initial_partitioning_mode;
double deep_initial_partitioning_load;
};

struct Context {
PartitioningContext partitioning;
PartitionContext partition;
CoarseningContext coarsening;
InitialPartitioningContext initial_partitioning;
Expand Down
7 changes: 4 additions & 3 deletions kaminpar/partitioning/deep/deep_multilevel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ const Graph *DeepMultilevelPartitioner::coarsen() {
}

NodeID DeepMultilevelPartitioner::initial_partitioning_threshold() {
if (helper::parallel_ip_mode(_input_ctx.initial_partitioning.mode)) {
if (helper::parallel_ip_mode(_input_ctx.partitioning.deep_initial_partitioning_mode)) {
return _input_ctx.parallel.num_threads * _input_ctx.coarsening.contraction_limit; // p * C
} else {
return 2 * _input_ctx.coarsening.contraction_limit; // 2 * C
Expand All @@ -149,15 +149,16 @@ PartitionedGraph DeepMultilevelPartitioner::initial_partition(const Graph *graph
// If requested, dump the coarsest graph to disk. Note that in the context of
// deep multilevel, this is not actually the coarsest graph, but rather the
// coarsest graph before splitting PEs and duplicating the graph.
// Disable worker splitting with --i-mode=sequential to obtain coarser graphs.
// Disable worker splitting with --p-deep-initial-partitioning-mode=sequential to obtain coarser
// graphs.
debug::dump_coarsest_graph(*graph, _input_ctx.debug);
debug::dump_graph_hierarchy(*graph, _coarsener->size(), _input_ctx.debug);

// Since timers are not multi-threaded, we disable them during (parallel)
// initial partitioning.
DISABLE_TIMERS();
PartitionedGraph p_graph = [&] {
switch (_input_ctx.initial_partitioning.mode) {
switch (_input_ctx.partitioning.deep_initial_partitioning_mode) {
case InitialPartitioningMode::SEQUENTIAL:
return helper::bipartition(graph, _input_ctx.partition.k, _input_ctx, _ip_m_ctx_pool);

Expand Down
4 changes: 4 additions & 0 deletions kaminpar/partitioning/deep/sync_initial_partitioning.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ SyncInitialPartitioner::partition(const Coarsener *coarsener, const PartitionCon
std::atomic<bool> converged = false;

std::vector<std::size_t> num_local_copies_record;

while (num_current_copies < num_threads) {
const NodeID n = coarseners.back()[0]->coarsest_graph()->n();
const std::size_t num_local_copies =
helper::compute_num_copies(_input_ctx, n, converged, num_current_threads);
num_local_copies_record.push_back(num_local_copies);

DBG << V(num_current_copies) << V(num_threads) << V(num_current_threads) << V(num_local_copies);


// Create coarseners and partition contexts for next coarsening iteration
coarseners.emplace_back(num_current_copies * num_local_copies);
auto &next_coarseners = coarseners.back();
Expand Down
5 changes: 3 additions & 2 deletions kaminpar/partitioning/helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ select_best(const scalable_vector<PartitionedGraph> &p_graphs, const PartitionCo
}

std::size_t compute_num_threads_for_parallel_ip(const Context &input_ctx) {
return math::floor2(static_cast<unsigned int>(input_ctx.parallel.num_threads)) *
(1 << input_ctx.initial_partitioning.multiplier_exponent);
return math::floor2(static_cast<unsigned int>(
1.0 * input_ctx.parallel.num_threads * input_ctx.partitioning.deep_initial_partitioning_load
));
}
} // namespace kaminpar::shm::partitioning::helper
12 changes: 7 additions & 5 deletions kaminpar/presets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ std::unordered_set<std::string> get_preset_names() {

Context create_default_context() {
return {
.mode = PartitioningMode::DEEP,
// Context
.partitioning =
{
.mode = PartitioningMode::DEEP,
.deep_initial_partitioning_mode = InitialPartitioningMode::SYNCHRONOUS_PARALLEL,
.deep_initial_partitioning_load = 1.0,
},
.partition =
{
// Context -> Partition
Expand All @@ -73,7 +77,6 @@ Context create_default_context() {
},
.initial_partitioning =
{
.mode = InitialPartitioningMode::SYNCHRONOUS_PARALLEL,
.coarsening =
{
.contraction_limit = 20,
Expand All @@ -97,7 +100,6 @@ Context create_default_context() {
.max_num_repetitions = 50,
.num_seed_iterations = 1,
.use_adaptive_bipartitioner_selection = true,
.multiplier_exponent = 0,
},
.refinement =
{
Expand Down Expand Up @@ -153,11 +155,11 @@ Context create_default_context() {

Context create_fast_context() {
Context ctx = create_default_context();
ctx.partitioning.deep_initial_partitioning_mode = InitialPartitioningMode::SEQUENTIAL;
ctx.coarsening.lp.num_iterations = 1;
ctx.initial_partitioning.min_num_repetitions = 1;
ctx.initial_partitioning.min_num_non_adaptive_repetitions = 1;
ctx.initial_partitioning.max_num_repetitions = 1;
ctx.initial_partitioning.mode = InitialPartitioningMode::SEQUENTIAL;
return ctx;
}

Expand Down
41 changes: 25 additions & 16 deletions kaminpar_cli/kaminpar_arguments.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,30 @@ CLI::Option_group *create_partitioning_options(CLI::App *app, Context &ctx) {
->check(CLI::NonNegativeNumber)
->capture_default_str();

partitioning->add_option("-m,--mode", ctx.mode)
// Partitioning options
partitioning->add_option("-m,--p-mode", ctx.partitioning.mode)
->transform(CLI::CheckedTransformer(get_partitioning_modes()).description(""))
->description(R"(Partitioning scheme:
- deep: deep multilevel
- rb: recursive multilevel bipartitioning)")
->capture_default_str();
partitioning
->add_option(
"--p-deep-initial-partitioning-mode", ctx.partitioning.deep_initial_partitioning_mode
)
->transform(CLI::CheckedTransformer(get_initial_partitioning_modes()).description(""))
->description(R"(Chooses the initial partitioning mode:
- sequential: do not diversify initial partitioning by replicating coarse graphs
- async-parallel: diversify initial partitioning by replicating coarse graphs each branch of the replication tree asynchronously
- sync-parallel: same as async-parallel, but process branches synchronously)")
->capture_default_str();
partitioning->add_option(
"--p-deep-initial-partitioning-load",
ctx.partitioning.deep_initial_partitioning_load,
"Fraction of cores that should be used for the coarse graph replication phase of deep MGP. A "
"value of '1' will replicate the graph once for every PE, whereas smaller values lead to "
"fewer replications."
);

return partitioning;
}
Expand Down Expand Up @@ -132,14 +150,6 @@ CLI::Option_group *create_lp_coarsening_options(CLI::App *app, Context &ctx) {
CLI::Option_group *create_initial_partitioning_options(CLI::App *app, Context &ctx) {
auto *ip = app->add_option_group("Initial Partitioning");

ip->add_option("--i-mode", ctx.initial_partitioning.mode)
->transform(CLI::CheckedTransformer(get_initial_partitioning_modes()).description(""))
->description(R"(Chooses the initial partitioning mode:
- sequential: do not diversify initial partitioning by replicating coarse graphs
- async-parallel: diversify initial partitioning by replicating coarse graphs each branch of the replication tree asynchronously
- sync-parallel: same as async-parallel, but process branches synchronously)")
->capture_default_str();

/*
ip->add_option(
"--i-c-contraction-limit",
Expand All @@ -152,10 +162,11 @@ CLI::Option_group *create_initial_partitioning_options(CLI::App *app, Context &c
)
->transform(CLI::CheckedTransformer(get_cluster_weight_limits()).description(""))
->description(
R"(This option selects the formula used to compute the weight limit for nodes in coarse graphs.
The weight limit can additionally be scaled by a constant multiplier set by the --c-cluster-weight-multiplier option.
Options are:
- epsilon-block-weight: Cmax = eps * c(V) * min{n' / C, k}, where n' is the number of nodes in the current (coarse) graph
R"(This option selects the formula used to compute the weight limit for nodes in coarse
graphs. The weight limit can additionally be scaled by a constant multiplier set by the
--c-cluster-weight-multiplier option. Options are:
- epsilon-block-weight: Cmax = eps * c(V) * min{n' / C, k}, where n' is the number of nodes in the
current (coarse) graph
- static-block-weight: Cmax = c(V) / k
- one: Cmax = 1
- zero: Cmax = 0 (disable coarsening))"
Expand All @@ -177,8 +188,6 @@ Options are:
*/

/*
ip->add_option("--i-rep-exp", ctx.initial_partitioning.multiplier_exponent)
->capture_default_str();
ip->add_option("--i-rep-multiplier", ctx.initial_partitioning.repetition_multiplier)
->capture_default_str();
ip->add_option("--i-min-reps", ctx.initial_partitioning.min_num_repetitions)
Expand All @@ -200,7 +209,7 @@ Options are:
"--i-r-disable", ctx.initial_partitioning.refinement.disabled, "Disable initial refinement."
)
->capture_default_str();

/*
ip->add_option("--i-r-stopping-rule", ctx.initial_partitioning.refinement.stopping_rule)
->transform(CLI::CheckedTransformer(get_fm_stopping_rules()).description(""))
Expand Down

0 comments on commit f157df3

Please sign in to comment.