Skip to content

Commit

Permalink
Merge pull request #1187 from rhc54/cmr20/up
Browse files Browse the repository at this point in the history
Update v2.0 release branch
  • Loading branch information
rhc54 authored Feb 1, 2022
2 parents b5bc499 + 417d6f7 commit eba9dc3
Show file tree
Hide file tree
Showing 19 changed files with 251 additions and 152 deletions.
16 changes: 14 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
Copyright (c) 2018-2001 Amazon.com, Inc. or its affiliates. All Rights
reserved.
Copyright (c) 2021 Nanook Consulting. All rights reserved.
Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
$COPYRIGHT$

Additional copyrights may follow
Expand All @@ -35,11 +35,23 @@ This file contains the main features as well as overviews of specific
bug fixes (and other actions) for each version of PRRTE since
version 1.0.

2.0.1: TBD
2.0.1: 1 Feb 2022
------------------------------------
+**** NOTE: As of v2.0.1, PRRTE no longer has a dependency on "pandoc"
+**** for building and installing man pages.

**** NOTE: PRRTE has identified a bug in HWLOC versions 2.5.0 thru
**** 2.7.0 (inclusive) that causes PMIx to segfault during certain
**** operations. We have worked with the HWLOC developers to fix
**** that problem, and the fix will be included beginning in HWLOC
**** version 2.7.1. Accordingly, PRRTE now checks for the problem
**** HWLOC versions and will error out of configure if they are
**** encountered.

PR #1185 and 1186: Update HWLOC version detection
- Reject versions 2.5.0-2.7.0, inclusive
PR #1183: Always build the PBS scheduler support
PR #1182: Cleanup handling of allocated node names
PR #1169: Updates for rc5
- Enable support for PMIX_IOF_OUTPUT_RAW attribute
- Update NEWS
Expand Down
44 changes: 39 additions & 5 deletions config/prte_setup_hwloc.m4
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
# Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
# Copyright (c) 2021 Nanook Consulting. All rights reserved.
# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
# Copyright (c) 2021 Amazon.com, Inc. or its affiliates.
# All Rights reserved.
# $COPYRIGHT$
Expand Down Expand Up @@ -94,12 +94,33 @@ AC_DEFUN([PRTE_SETUP_HWLOC],[
PRTE_FLAGS_PREPEND_UNIQ([LDFLAGS], [$prte_hwloc_LDFLAGS])
PRTE_FLAGS_PREPEND_UNIQ([LIBS], [$prte_hwloc_LIBS])

AC_MSG_CHECKING([if hwloc version is in 2.5.0-2.7.0 range])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([#include <hwloc.h>],
[[
#if HWLOC_VERSION_MAJOR == 2
#if (HWLOC_VERSION_MINOR == 7 && HWLOC_VERSION_RELEASE == 0) || HWLOC_VERSION_MINOR == 6 || HWLOC_VERSION_MINOR == 5
#error "hwloc version is in blocklist range"
#endif
#endif
]])],
[AC_MSG_RESULT([no])],
[AC_MSG_RESULT([yes])
AC_MSG_WARN([***********************************************************])
AC_MSG_WARN([PRRTE is not compatible with HWLOC versions 2.5.0-2.7.0 (inclusive)])
AC_MSG_WARN([due to a bug in HWLOC's setting of environmental variables.])
AC_MSG_WARN([Please switch the HWLOC installation to a version outside])
AC_MSG_WARN([of that range.])
AC_MSG_WARN([***********************************************************])
AC_MSG_ERROR([Cannot continue])])
AC_MSG_CHECKING([if hwloc version is 1.5 or greater])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <hwloc.h>]],
[AC_LANG_PROGRAM([#include <hwloc.h>],
[[
#if HWLOC_API_VERSION < 0x00010500
#error "hwloc API version is less than 0x00010500"
#error "hwloc version is less than 0x00010500"
#endif
]])],
[AC_MSG_RESULT([yes])],
Expand All @@ -108,16 +129,29 @@ AC_DEFUN([PRTE_SETUP_HWLOC],[
AC_MSG_CHECKING([if hwloc version is 1.8 or greater])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <hwloc.h>]],
[AC_LANG_PROGRAM([#include <hwloc.h>],
[[
#if HWLOC_API_VERSION < 0x00010800
#error "hwloc API version is less than 0x00010800"
#error "hwloc version is less than 0x00010800"
#endif
]])],
[AC_MSG_RESULT([yes])
prte_have_topology_dup=1],
[AC_MSG_RESULT([no])])
AC_MSG_CHECKING([if hwloc version is 2.0 or greater])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([#include <hwloc.h>],
[[
#if HWLOC_VERSION_MAJOR < 2
#error "hwloc version is less than 2.x"
#endif
]])],
[AC_MSG_RESULT([yes])
prte_version_high=1],
[AC_MSG_RESULT([no])
prte_version_high=0])
CPPFLAGS=$prte_check_hwloc_save_CPPFLAGS
LDFLAGS=$prte_check_hwloc_save_LDFLAGS
LIBS=$prte_check_hwloc_save_LIBS
Expand Down
8 changes: 4 additions & 4 deletions src/mca/plm/base/plm_base_launch_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2020 IBM Corporation. All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -2340,11 +2340,11 @@ int prte_plm_base_setup_virtual_machine(prte_job_t *jdata)
PRTE_OUTPUT_VERBOSE((5, prte_plm_base_framework.framework_output, "%s checking node %s",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), nptr->name));
for (i = 0; i < prte_node_pool->size; i++) {
if (NULL
== (node = (prte_node_t *) prte_pointer_array_get_item(prte_node_pool, i))) {
node = (prte_node_t *) prte_pointer_array_get_item(prte_node_pool, i);
if (NULL == node) {
continue;
}
if (0 != strcmp(node->name, nptr->name)) {
if (!prte_nptr_match(node, nptr)) {
continue;
}
/* have a match - now see if we want this node */
Expand Down
15 changes: 10 additions & 5 deletions src/mca/plm/ssh/plm_ssh_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -998,7 +998,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
prte_state_caddy_t *state = (prte_state_caddy_t *) cbdata;
prte_plm_ssh_caddy_t *caddy;
prte_list_t coll;
char *username;
char *username, *nname;
int port, *portptr;
prte_namelist_t *child;

Expand Down Expand Up @@ -1117,7 +1117,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
* prefer to find some other node so we can tell what the remote
* shell is, if necessary
*/
if (0 != strcmp(node->name, prte_process_info.nodename)) {
if (!prte_check_host_is_local(node->name)) {
break;
}
}
Expand Down Expand Up @@ -1190,13 +1190,18 @@ static void launch_daemons(int fd, short args, void *cbdata)

/* setup node name */
free(argv[node_name_index1]);
if (NULL == node->rawname) {
nname = node->name;
} else {
nname = node->rawname;
}
username = NULL;
if (prte_get_attribute(&node->attributes, PRTE_NODE_USERNAME, (void **) &username,
PMIX_STRING)) {
prte_asprintf(&argv[node_name_index1], "%s@%s", username, node->name);
prte_asprintf(&argv[node_name_index1], "%s@%s", username, nname);
free(username);
} else {
argv[node_name_index1] = strdup(node->name);
argv[node_name_index1] = strdup(nname);
}

/* pass the vpid */
Expand Down
3 changes: 2 additions & 1 deletion src/mca/ras/base/ras_base_allocate.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -282,6 +282,7 @@ void prte_ras_base_allocate(int fd, short args, void *cbdata)
PRTE_LIST_FOREACH(node, &nodes, prte_node_t) {
if (!prte_net_isaddr(node->name) &&
NULL != (ptr = strchr(node->name, '.'))) {
node->rawname = strdup(node->name);
if (prte_keep_fqdn_hostnames) {
/* retain the non-fqdn name as an alias */
*ptr = '\0';
Expand Down
8 changes: 7 additions & 1 deletion src/mca/ras/base/ras_base_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -149,6 +149,12 @@ int prte_ras_base_node_insert(prte_list_t *nodes, prte_job_t *jdata)
}
/* if the node name is different, store it as an alias */
prte_argv_append_unique_nosize(&hnp_node->aliases, node->name);
if (NULL != node->rawname) {
if (NULL != hnp_node->rawname) {
free(hnp_node->rawname);
}
hnp_node->rawname = strdup(node->rawname);
}
/* don't keep duplicate copy */
PRTE_RELEASE(node);
/* create copies, if required */
Expand Down
28 changes: 14 additions & 14 deletions src/mca/ras/tm/Makefile.am → src/mca/ras/pbs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,49 +12,49 @@
# Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2020 Intel, Inc. All rights reserved.
# Copyright (c) 2021 Nanook Consulting. All rights reserved.
# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# The TM plugin does not call any Torque/PBS functions - it therefore
# The PBS plugin does not call any Torque/PBS functions - it therefore
# does NOT need to link against any Torque/PBS libraries

# Use the top-level Makefile.options

dist_prtedata_DATA = help-ras-tm.txt
dist_prtedata_DATA = help-ras-pbs.txt


sources = \
ras_tm.h \
ras_tm_component.c \
ras_tm_module.c
ras_pbs.h \
ras_pbs_component.c \
ras_pbs_module.c


# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

if MCA_BUILD_prte_ras_tm_DSO
if MCA_BUILD_prte_ras_pbs_DSO
lib =
lib_sources =
component = mca_ras_tm.la
component = mca_ras_pbs.la
component_sources = $(sources)
else
lib = libmca_ras_tm.la
lib = libmca_ras_pbs.la
lib_sources = $(sources)
component =
component_sources =
endif

mcacomponentdir = $(prtelibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_ras_tm_la_SOURCES = $(component_sources)
mca_ras_tm_la_LDFLAGS = -module -avoid-version
mca_ras_tm_la_LIBADD = $(top_builddir)/src/libprrte.la
mca_ras_pbs_la_SOURCES = $(component_sources)
mca_ras_pbs_la_LDFLAGS = -module -avoid-version
mca_ras_pbs_la_LIBADD = $(top_builddir)/src/libprrte.la

noinst_LTLIBRARIES = $(lib)
libmca_ras_tm_la_SOURCES = $(lib_sources)
libmca_ras_tm_la_LDFLAGS = -module -avoid-version
libmca_ras_pbs_la_SOURCES = $(lib_sources)
libmca_ras_pbs_la_LDFLAGS = -module -avoid-version
65 changes: 65 additions & 0 deletions src/mca/ras/pbs/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

# MCA_ras_pbs_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_prte_ras_pbs_CONFIG],[
AC_CONFIG_FILES([src/mca/ras/pbs/Makefile])

AC_ARG_WITH([pbs],
[AS_HELP_STRING([--with-pbs],
[Build PBS scheduler component (default: yes)])])

if test "$with_pbs" = "no" ; then
prte_check_pbs_happy="no"
elif test "$with_pbs" = "" ; then
# unless user asked, only build pbs component on linux, AIX,
# and OS X systems (these are the platforms that PBS
# supports)
case $host in
*-linux*|*-aix*|*-apple-darwin*)
prte_check_pbs_happy="yes"
;;
*)
AC_MSG_CHECKING([for pbsdash in PATH])
PRTE_WHICH([pbsdash], [PRTE_CHECK_PBSDASH])
if test "$PRTE_CHECK_PBSDASH" = ""; then
prte_check_pbs_happy="no"
else
prte_check_pbs_happy="yes"
fi
AC_MSG_RESULT([$prte_check_pbs_happy])
;;
esac
else
prte_check_pbs_happy="yes"
fi

PRTE_SUMMARY_ADD([[Resource Managers]],[[PBS]],[$1],[$prte_check_pbs_happy (scheduler)])

AS_IF([test "$prte_check_pbs_happy" = "yes"],
[$2],
[$3])

])dnl
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
# Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[no-nodefile]
The PBS_NODEFILE environmental variable was
not found - no nodes can be allocated
[no-nodes-found]
No nodes were found in the PBS_NODEFILE:

Expand Down
File renamed without changes.
Loading

0 comments on commit eba9dc3

Please sign in to comment.