diff --git a/config/Makefile.am b/config/Makefile.am index af7e00d8fa..81fcb37674 100644 --- a/config/Makefile.am +++ b/config/Makefile.am @@ -15,7 +15,7 @@ # Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2021 Nanook Consulting. All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -33,4 +33,57 @@ EXTRA_DIST = \ find_common_syms \ getdate.sh \ from-savannah/upstream-config.guess \ - from-savannah/upstream-config.sub + from-savannah/upstream-config.sub \ + c_weak_symbols.m4 \ + pkg.m4 \ + prte_case_sensitive_fs_setup.m4 \ + prte_check_alps.m4 \ + prte_check_attributes.m4 \ + prte_check_cflags.m4 \ + prte_check_compiler_version.m4 \ + prte_check_compiler_works.m4 \ + prte_check_ident.m4 \ + prte_check_libnl.m4 \ + prte_check_lock.m4 \ + prte_check_lsf.m4 \ + prte_check_moab.m4 \ + prte_check_offsetof.m4 \ + prte_check_os_flavors.m4 \ + prte_check_package.m4 \ + prte_check_ps.m4 \ + prte_check_pthread_pids.m4 \ + prte_check_ptrace.m4 \ + prte_check_sge.m4 \ + prte_check_singularity.m4 \ + prte_check_slurm.m4 \ + prte_check_tm.m4 \ + prte_check_vendor.m4 \ + prte_check_version.m4 \ + prte_check_visibility.m4 \ + prte_check_withdir.m4 \ + prte_config_asm.m4 \ + prte_config_files.m4 \ + prte_config_pthreads.m4 \ + prte_config_subdir.m4 \ + prte_config_subdir_args.m4 \ + prte_config_threads.m4 \ + prte_configure_options.m4 \ + prte_ensure_contains_optflags.m4 \ + prte_find_type.m4 \ + prte_functions.m4 \ + prte_get_version.m4 \ + prte_load_platform.m4 \ + prte_mca.m4 \ + prte_save_version.m4 \ + prte_search_libs.m4 \ + prte_set_mca_prefix.m4 \ + prte_setup_cc.m4 \ + prte_setup_cli.m4 \ + prte_setup_component_package.m4 \ + prte_setup_hwloc.m4 \ + prte_setup_libev.m4 \ + prte_setup_libevent.m4 \ + prte_setup_pmix.m4 \ + prte_strip_optflags.m4 \ + prte_summary.m4 \ + prte_try_assemble.m4 diff --git a/config/prte_check_icc.m4 b/config/prte_check_icc.m4 deleted file mode 100644 index 1e85d3c406..0000000000 --- a/config/prte_check_icc.m4 +++ /dev/null @@ -1,66 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright (c) 2004-2005 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright (c) 2004-2005 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016-2019 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -AC_DEFUN([PRTE_CHECK_ICC_VARARGS],[ -dnl -dnl On EM64T, icc-8.1 before version 8.1.027 segfaulted, since -dnl va_start was miscompiled... -dnl -AC_MSG_CHECKING([whether icc-8.1 for EM64T works with variable arguments]) -AC_RUN_IFELSE([AC_LANG_SOURCE([ -#include -#include -#include - -void func (int c, char * f, ...) -{ - va_list arglist; - va_start (arglist, f); - /* vprintf (f, arglist); */ - va_end (arglist); -} - -int main () -{ - FILE *f; - func (4711, "Help %d [%s]\n", 10, "ten"); - f=fopen ("conftestval", "w"); - if (!f) exit (1); - fclose(f); - return 0; -} - -])], -[prte_ac_icc_varargs=`test -f conftestval`], -[prte_ac_icc_varargs=1],[prte_ac_icc_varargs=1]) - -if test "$prte_ac_icc_varargs" = "1"; then - AC_MSG_WARN([*** Problem running configure test!]) - AC_MSG_WARN([*** Your icc-8.1 compiler seems to miscompile va_start!]) - AC_MSG_WARN([*** Please upgrade compiler to at least version 8.1.027]) - AC_MSG_ERROR([*** Cannot continue.]) -fi - -AC_MSG_RESULT([yes]) - -rm -rf conftest*])dnl diff --git a/config/prte_check_lsf.m4 b/config/prte_check_lsf.m4 index 73d9f6858b..d411ca62ef 100644 --- a/config/prte_check_lsf.m4 +++ b/config/prte_check_lsf.m4 @@ -17,7 +17,7 @@ dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2017-2021 IBM Corporation. All rights reserved. dnl Copyright (c) 2017-2020 Intel, Inc. All rights reserved. -dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. +dnl Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -79,10 +79,12 @@ AC_DEFUN([PRTE_CHECK_LSF],[ # on AIX it should be in libbsd # on HP-UX it should be in libBSD # on IRIX < 6 it should be in libsun (IRIX 6 and later it is in libc) + # on RHEL: libnsl, libnsl2 AND libnsl2-devel are required to link libnsl to get yp_all. AS_IF([test "$prte_check_lsf_happy" = "yes"], [PRTE_SEARCH_LIBS_COMPONENT([yp_all_nsl], [yp_all], [nsl bsd BSD sun], [prte_check_lsf_happy="yes"], - [prte_check_lsf_happy="no"])]) + [AC_MSG_WARN([[Could not find yp_all. Please see https://github.com/openpmix/prrte/wiki/Building-LSF-support for more details.]]) + prte_check_lsf_happy="no"])]) # liblsf requires shm_open, shm_unlink, which are in librt AS_IF([test "$prte_check_lsf_happy" = "yes"], @@ -154,18 +156,18 @@ AC_DEFUN([PRTE_CHECK_LSF],[ # (3) Check to see if the -levent is from Libevent (check for a symbol it has) AC_CHECK_LIB([event], [evthread_set_condition_callbacks], [AC_MSG_CHECKING([for libevent conflict]) - AC_MSG_RESULT([No. The correct libevent.so was linked.]) + AC_MSG_RESULT([No conflict found. The correct libevent.so was linked.]) prte_check_lsf_event_conflict=no], [# (4) The libevent.so is not from Libevent. Warn the user. AC_MSG_CHECKING([for libevent conflict]) - AC_MSG_RESULT([Yes. Detected a libevent.so that is not from Libevent.]) + AC_MSG_RESULT([Conflict found. Detected a libevent.so that is not from Libevent.]) prte_check_lsf_event_conflict=yes]) ], [AC_MSG_CHECKING([for libevent conflict]) - AC_MSG_RESULT([No. -levent is not being explicitly used.]) + AC_MSG_RESULT([No conflict found. -levent is not being explicitly used.]) prte_check_lsf_event_conflict=na])], [AC_MSG_CHECKING([for libevent conflict]) - AC_MSG_RESULT([No. LSF checks passed.]) + AC_MSG_RESULT([No conflict found. LSF checks passed.]) prte_check_lsf_event_conflict=na]) AS_IF([test "$prte_check_lsf_event_conflict" = "yes"], diff --git a/config/prte_check_ptrace.m4 b/config/prte_check_ptrace.m4 index 893c471018..13fdcf3864 100644 --- a/config/prte_check_ptrace.m4 +++ b/config/prte_check_ptrace.m4 @@ -2,6 +2,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2020 Intel, Inc. All rights reserved. dnl Copyright (c) 2020 Cisco Systems, Inc. All rights reserved +dnl Copyright (c) 2022 Nanook Consulting. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -14,7 +15,7 @@ dnl AC_DEFUN([PRTE_CHECK_PTRACE],[ - PRTE_VAR_SCOPE_PUSH(prte_have_ptrace_traceme prte_have_ptrace_detach prte_have_ptrace_header prte_have_ptrace prte_want_stop_on_exec prte_traceme_cmd prte_detach_cmd prte_ptrace_linux_sig) + PRTE_VAR_SCOPE_PUSH(prte_have_ptrace_traceme prte_have_ptrace_detach prte_have_ptrace_header prte_have_ptrace prte_want_stop_on_exec prte_traceme_cmd prte_detach_cmd prte_ptrace_linux_sig prte_ptrace_CFLAGS_save) prte_have_ptrace_traceme=no prte_have_ptrace_detach=no @@ -22,10 +23,10 @@ AC_DEFUN([PRTE_CHECK_PTRACE],[ prte_detach_cmd= AC_CHECK_HEADER([sys/ptrace.h], - [prte_have_ptrace_header=1], + [prte_have_ptrace_header=1 + # must manually define the header protection since check_header doesn't do it + AC_DEFINE_UNQUOTED([HAVE_SYS_PTRACE_H], [1], [Whether or not we have the ptrace header])], [prte_have_ptrace_header=0]) - # must manually define the header protection since check_header doesn't know it - AC_DEFINE_UNQUOTED([HAVE_SYS_PTRACE_H], [$prte_have_ptrace_header], [Whether or not we have the ptrace header]) AC_CHECK_FUNC([ptrace], [prte_have_ptrace=yes], @@ -84,6 +85,9 @@ AC_DEFUN([PRTE_CHECK_PTRACE],[ AC_MSG_CHECKING([Linux ptrace function signature]) AC_LANG_PUSH(C) + # must have -Werror set here + prte_ptrace_CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -Werror" AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( [[#include "sys/ptrace.h"]], @@ -97,6 +101,7 @@ AC_DEFUN([PRTE_CHECK_PTRACE],[ prte_ptrace_linux_sig=0 ]) AC_LANG_POP(C) + CFLAGS=$prte_ptrace_CFLAGS_save fi diff --git a/config/prte_configure_options.m4 b/config/prte_configure_options.m4 index f98f1040ab..021464a3a6 100644 --- a/config/prte_configure_options.m4 +++ b/config/prte_configure_options.m4 @@ -18,7 +18,7 @@ dnl reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl dnl Copyright (c) 2016-2020 Intel, Inc. All rights reserved. -dnl Copyright (c) 2021 Nanook Consulting All rights reserved. +dnl Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -284,7 +284,7 @@ if test -n "$with_proxy_bugreport"; then PRTE_PROXY_BUGREPORT=$with_proxy_bugreport else AC_MSG_RESULT([no]) - PRTE_PROXY_BUGREPORT=https://github.com/openpmix/prte/ + PRTE_PROXY_BUGREPORT=https://github.com/openpmix/prrte/ fi AC_DEFINE_UNQUOTED(PRTE_PROXY_BUGREPORT, "$PRTE_PROXY_BUGREPORT", [Bugreport string to be returned by prte when in proxy mode]) diff --git a/configure.ac b/configure.ac index 6f632c875a..b3e51a7d85 100644 --- a/configure.ac +++ b/configure.ac @@ -25,7 +25,7 @@ # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. # All Rights reserved. -# Copyright (c) 2021 Nanook Consulting All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # Copyright (c) 2021 FUJITSU LIMITED. All rights reserved. # $COPYRIGHT$ # @@ -49,7 +49,7 @@ m4_include([config/autogen_found_items.m4]) AC_INIT([prte], [m4_normalize(esyscmd([config/prte_get_version.sh VERSION --tarball]))], - [https://github.com/openpmix/prte/], [prte]) + [https://github.com/openpmix/prrte/], [prrte]) AC_PREREQ(2.69) AC_CONFIG_AUX_DIR(config) @@ -458,10 +458,6 @@ PRTE_C_WEAK_SYMBOLS PRTE_C_MACRO_WEAK_SYMBOLS -if test "x$CC" = "xicc"; then - PRTE_CHECK_ICC_VARARGS -fi - # Check if we support the offsetof compiler directive PRTE_CHECK_OFFSETOF @@ -923,6 +919,9 @@ AC_PROG_LN_S AC_PROG_GREP AC_PROG_EGREP +# This check must come after PRTE_CONFIG_THREADS +AC_CHECK_FUNCS([pthread_setaffinity_np]) + # # We need as and lex # diff --git a/src/hwloc/hwloc-internal.h b/src/hwloc/hwloc-internal.h index 5e5a9d721d..6fdd7c64d1 100644 --- a/src/hwloc/hwloc-internal.h +++ b/src/hwloc/hwloc-internal.h @@ -7,7 +7,7 @@ * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,10 +31,20 @@ # include #endif -#if HWLOC_API_VERSION < 0x00010b00 +#if HWLOC_API_VERSION < 0x10b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET #endif +#if HWLOC_API_VERSION < 0x10a00 +static inline hwloc_obj_t hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} +#endif #include "src/class/prte_list.h" #include "src/class/prte_value_array.h" diff --git a/src/mca/oob/tcp/oob_tcp_connection.c b/src/mca/oob/tcp/oob_tcp_connection.c index 18dfe9d44d..f256b556b6 100644 --- a/src/mca/oob/tcp/oob_tcp_connection.c +++ b/src/mca/oob/tcp/oob_tcp_connection.c @@ -19,7 +19,7 @@ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights * reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -160,7 +160,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) prte_oob_tcp_peer_t *peer; prte_oob_tcp_addr_t *addr; bool connected = false; - prte_if_t *interface; + prte_if_t *intf; char *host; remote_list = PRTE_NEW(prte_list_t); @@ -177,21 +177,21 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) /* Construct a list of remote prte_if_t from peer */ PRTE_LIST_FOREACH(addr, &peer->addrs, prte_oob_tcp_addr_t) { - interface = PRTE_NEW(prte_if_t); - if (NULL == interface) { + intf = PRTE_NEW(prte_if_t); + if (NULL == intf) { prte_output(0, "%s CANNOT CREATE SOCKET, OUT OF MEMORY", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); PRTE_ACTIVATE_JOB_STATE(NULL, PRTE_JOB_STATE_COMM_FAILED); goto cleanup; } - interface->af_family = addr->addr.ss_family; - memcpy(&interface->if_addr, &addr->addr, sizeof(struct sockaddr_storage)); - interface->if_mask = addr->if_mask; + intf->af_family = addr->addr.ss_family; + memcpy(&intf->if_addr, &addr->addr, sizeof(struct sockaddr_storage)); + intf->if_mask = addr->if_mask; /* We do not pass along bandwidth information, setting as arbitrary non * zero value */ - interface->if_bandwidth = 1; - prte_list_append(remote_list, &(interface->super)); + intf->if_bandwidth = 1; + prte_list_append(remote_list, &(intf->super)); } local_if_count = prte_list_get_size(local_list); remote_if_count = prte_list_get_size(remote_list); @@ -251,7 +251,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) for (i = 0; i < best_i; i++) { ptr = ptr->prte_list_next; } - interface = (prte_if_t *) ptr; + intf = (prte_if_t *) ptr; prte_output_verbose(OOB_TCP_DEBUG_CONNECT, prte_oob_base_framework.framework_output, "%s prte_tcp_peer_try_connect: " "attempting to connect to proc %s on %s:%d - %d retries", @@ -303,7 +303,7 @@ void prte_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) } /* Bind the socket manually to selected address */ - if (bind(peer->sd, (struct sockaddr *) &interface->if_addr, addrlen) < 0) { + if (bind(peer->sd, (struct sockaddr *) &intf->if_addr, addrlen) < 0) { /* If we cannot bind to this address, set remaining entries * for this address from the reachable table to no connection * and try a new connection. diff --git a/src/mca/prteif/prteif.h b/src/mca/prteif/prteif.h index 589e2ea81f..8615ee8486 100644 --- a/src/mca/prteif/prteif.h +++ b/src/mca/prteif/prteif.h @@ -6,7 +6,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,7 +71,7 @@ BEGIN_C_DECLS #endif #define DEFAULT_NUMBER_INTERFACES 10 -#define MAX_IFCONF_SIZE 10 * 1024 * 1024 +#define MAX_IFCONF_SIZE 10485760 typedef struct prte_if_t { prte_list_item_t super; diff --git a/src/prted/pmix/pmix_server.c b/src/prted/pmix/pmix_server.c index 3c3fc4723b..eae1340f33 100644 --- a/src/prted/pmix/pmix_server.c +++ b/src/prted/pmix/pmix_server.c @@ -18,7 +18,7 @@ * All rights reserved. * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -694,6 +694,15 @@ int pmix_server_init(void) } #endif +#ifdef PMIX_BIND_PROGRESS_THREAD + if (NULL != prte_progress_thread_cpus) { + PMIX_INFO_LIST_ADD(prc, ilist, PMIX_BIND_PROGRESS_THREAD, + prte_progress_thread_cpus, PMIX_STRING); + PMIX_INFO_LIST_ADD(prc, ilist, PMIX_BIND_REQUIRED, + &prte_bind_progress_thread_reqd, PMIX_BOOL); + } +#endif + /* if we are the MASTER, then we are the scheduler * as well as a gateway */ if (PRTE_PROC_IS_MASTER) { diff --git a/src/runtime/prte_globals.h b/src/runtime/prte_globals.h index 8b1feb57c9..920cacb3e6 100644 --- a/src/runtime/prte_globals.h +++ b/src/runtime/prte_globals.h @@ -17,7 +17,7 @@ * Copyright (c) 2017-2020 IBM Corporation. All rights reserved. * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,6 +70,8 @@ PRTE_EXPORT extern bool prte_event_base_active; /* instantiated in src/runtim PRTE_EXPORT extern bool prte_proc_is_bound; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern int prte_progress_thread_debug; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern char *prte_tool_basename; // argv[0] of prun or one of its symlinks +PRTE_EXPORT extern char *prte_progress_thread_cpus; +PRTE_EXPORT extern bool prte_bind_progress_thread_reqd; /** * Global indicating where this process was bound to at launch (will diff --git a/src/runtime/prte_mca_params.c b/src/runtime/prte_mca_params.c index ee745c1e0a..69fb00b3ee 100644 --- a/src/runtime/prte_mca_params.c +++ b/src/runtime/prte_mca_params.c @@ -17,7 +17,7 @@ * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,6 +64,8 @@ char *prte_if_include = NULL; char *prte_if_exclude = NULL; char *prte_set_max_sys_limits = NULL; int prte_pmix_verbose_output = 0; +char *prte_progress_thread_cpus = NULL; +bool prte_bind_progress_thread_reqd = false; int prte_max_thread_in_progress = 1; @@ -666,6 +668,19 @@ int prte_register_params(void) NULL, 0, PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_5, PRTE_MCA_BASE_VAR_SCOPE_READONLY, &prte_pmix_verbose_output); + (void) prte_mca_base_var_register("prte", "prte", NULL, "progress_thread_cpus", + "Comma-delimited list of ranges of CPUs to which" + "the internal PRRTE progress thread is to be bound", + PRTE_MCA_BASE_VAR_TYPE_STRING, NULL, 0, + PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_9, + PRTE_MCA_BASE_VAR_SCOPE_ALL, &prte_progress_thread_cpus); + + (void) prte_mca_base_var_register("prte", "prte", NULL, "bind_progress_thread_reqd", + "Whether binding of internal PRRTE progress thread is required", + PRTE_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, + PRTE_MCA_BASE_VAR_FLAG_NONE, PRTE_INFO_LVL_9, + PRTE_MCA_BASE_VAR_SCOPE_ALL, &prte_bind_progress_thread_reqd); + #if PRTE_ENABLE_FT prte_mca_base_var_register("prte", "prte", NULL, "enable_ft", "Enable/disable fault tolerance", PRTE_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, PRTE_MCA_BASE_VAR_FLAG_NONE, diff --git a/src/runtime/prte_progress_threads.c b/src/runtime/prte_progress_threads.c index 55a63cdd3e..83060fae80 100644 --- a/src/runtime/prte_progress_threads.c +++ b/src/runtime/prte_progress_threads.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,6 +19,7 @@ #include "src/class/prte_list.h" #include "src/event/event-internal.h" +#include "src/runtime/prte_globals.h" #include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/fd.h" @@ -239,6 +240,12 @@ static void stop_progress_engine(prte_progress_tracker_t *trk) static int start_progress_engine(prte_progress_tracker_t *trk) { +#ifdef HAVE_PTHREAD_SETAFFINITY_NP + cpu_set_t cpuset; + char **ranges, *dash; + int k, n, start, end; +#endif + assert(!trk->ev_active); trk->ev_active = true; @@ -251,6 +258,34 @@ static int start_progress_engine(prte_progress_tracker_t *trk) PRTE_ERROR_LOG(rc); } +#ifdef HAVE_PTHREAD_SETAFFINITY_NP + if (NULL != prte_progress_thread_cpus) { + CPU_ZERO(&cpuset); + // comma-delimited list of cpu ranges + ranges = prte_argv_split(prte_progress_thread_cpus, ','); + for (n=0; NULL != ranges[n]; n++) { + // look for '-' + start = strtoul(ranges[n], &dash, 10); + if (NULL == dash) { + CPU_SET(start, &cpuset); + } else { + ++dash; // skip over the '-' + end = strtoul(dash, NULL, 10); + for (k=start; k < end; k++) { + CPU_SET(k, &cpuset); + } + } + } + rc = pthread_setaffinity_np(trk->engine.t_handle, sizeof(cpu_set_t), &cpuset); + if (0 != rc && prte_bind_progress_thread_reqd) { + prte_output(0, "Failed to bind progress thread %s", + (NULL == trk->name) ? "NULL" : trk->name); + rc = PRTE_ERR_NOT_SUPPORTED; + } else { + rc = PRTE_SUCCESS; + } + } +#endif return rc; }