Skip to content

Commit

Permalink
Enable binding internal PRRTE progress thread
Browse files Browse the repository at this point in the history
In some scenarios, it is desirable to bind progress threads
to one or more specific cores - e.g., a core designated for
utility usage and not assigned application processes. Provide
an attribute by which the user can specify the core(s) to
which the internal PRRTE progress thread is to be bound, and
to indicate if the binding is mandatory (i.e., return an
error if the thread cannot be bound).

Signed-off-by: Ralph Castain <[email protected]>
(cherry picked from commit 0828725)
  • Loading branch information
rhc54 committed Jan 24, 2022
1 parent bd34aee commit 413c681
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 4 deletions.
3 changes: 3 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,9 @@ AC_PROG_LN_S
AC_PROG_GREP
AC_PROG_EGREP

# This check must come after PRTE_CONFIG_THREADS
AC_CHECK_FUNCS([pthread_setaffinity_np])

#
# We need as and lex
#
Expand Down
11 changes: 10 additions & 1 deletion src/prted/pmix/pmix_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -694,6 +694,15 @@ int pmix_server_init(void)
}
#endif

#ifdef PMIX_BIND_PROGRESS_THREAD
if (NULL != prte_progress_thread_cpus) {
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_BIND_PROGRESS_THREAD,
prte_progress_thread_cpus, PMIX_STRING);
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_BIND_REQUIRED,
&prte_bind_progress_thread_reqd, PMIX_BOOL);
}
#endif

/* if we are the MASTER, then we are the scheduler
* as well as a gateway */
if (PRTE_PROC_IS_MASTER) {
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/prte_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2017-2020 IBM Corporation. All rights reserved.
* Copyright (c) 2017-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -70,6 +70,8 @@ PRTE_EXPORT extern bool prte_event_base_active; /* instantiated in src/runtim
PRTE_EXPORT extern bool prte_proc_is_bound; /* instantiated in src/runtime/prte_init.c */
PRTE_EXPORT extern int prte_progress_thread_debug; /* instantiated in src/runtime/prte_init.c */
PRTE_EXPORT extern char *prte_tool_basename; // argv[0] of prun or one of its symlinks
PRTE_EXPORT extern char *prte_progress_thread_cpus;
PRTE_EXPORT extern bool prte_bind_progress_thread_reqd;

/**
* Global indicating where this process was bound to at launch (will
Expand Down
17 changes: 16 additions & 1 deletion src/runtime/prte_mca_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -64,6 +64,8 @@ char *prte_if_include = NULL;
char *prte_if_exclude = NULL;
char *prte_set_max_sys_limits = NULL;
int prte_pmix_verbose_output = 0;
char *prte_progress_thread_cpus = NULL;
bool prte_bind_progress_thread_reqd = false;

int prte_max_thread_in_progress = 1;

Expand Down Expand Up @@ -666,6 +668,19 @@ int prte_register_params(void)
NULL, 0, PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_5,
PRTE_MCA_BASE_VAR_SCOPE_READONLY, &prte_pmix_verbose_output);

(void) prte_mca_base_var_register("prte", "prte", NULL, "progress_thread_cpus",
"Comma-delimited list of ranges of CPUs to which"
"the internal PRRTE progress thread is to be bound",
PRTE_MCA_BASE_VAR_TYPE_STRING, NULL, 0,
PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_9,
PRTE_MCA_BASE_VAR_SCOPE_ALL, &prte_progress_thread_cpus);

(void) prte_mca_base_var_register("prte", "prte", NULL, "bind_progress_thread_reqd",
"Whether binding of internal PRRTE progress thread is required",
PRTE_MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_9,
PRTE_MCA_BASE_VAR_SCOPE_ALL, &prte_bind_progress_thread_reqd);

#if PRTE_ENABLE_FT
prte_mca_base_var_register("prte", "prte", NULL, "enable_ft", "Enable/disable fault tolerance",
PRTE_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, PRTE_MCA_BASE_VAR_FLAG_NONE,
Expand Down
37 changes: 36 additions & 1 deletion src/runtime/prte_progress_threads.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -19,6 +19,7 @@

#include "src/class/prte_list.h"
#include "src/event/event-internal.h"
#include "src/runtime/prte_globals.h"
#include "src/threads/threads.h"
#include "src/util/error.h"
#include "src/util/fd.h"
Expand Down Expand Up @@ -239,6 +240,12 @@ static void stop_progress_engine(prte_progress_tracker_t *trk)

static int start_progress_engine(prte_progress_tracker_t *trk)
{
#ifdef HAVE_PTHREAD_SETAFFINITY_NP
cpu_set_t cpuset;
char **ranges, *dash;
int k, n, start, end;
#endif

assert(!trk->ev_active);
trk->ev_active = true;

Expand All @@ -251,6 +258,34 @@ static int start_progress_engine(prte_progress_tracker_t *trk)
PRTE_ERROR_LOG(rc);
}

#ifdef HAVE_PTHREAD_SETAFFINITY_NP
if (NULL != prte_progress_thread_cpus) {
CPU_ZERO(&cpuset);
// comma-delimited list of cpu ranges
ranges = prte_argv_split(prte_progress_thread_cpus, ',');
for (n=0; NULL != ranges[n]; n++) {
// look for '-'
start = strtoul(ranges[n], &dash, 10);
if (NULL == dash) {
CPU_SET(start, &cpuset);
} else {
++dash; // skip over the '-'
end = strtoul(dash, NULL, 10);
for (k=start; k < end; k++) {
CPU_SET(k, &cpuset);
}
}
}
rc = pthread_setaffinity_np(trk->engine.t_handle, sizeof(cpu_set_t), &cpuset);
if (0 != rc && prte_bind_progress_thread_reqd) {
prte_output(0, "Failed to bind progress thread %s",
(NULL == trk->name) ? "NULL" : trk->name);
rc = PRTE_ERR_NOT_SUPPORTED;
} else {
rc = PRTE_SUCCESS;
}
}
#endif
return rc;
}

Expand Down

0 comments on commit 413c681

Please sign in to comment.