From d386b88e062483f97df4435edca0efda792f8066 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Mon, 20 Feb 2023 14:15:30 -0600 Subject: [PATCH 1/2] Runtime Testing harness Signed-off-by: Joshua Hursey --- runtime/.ci-configure | 15 ++++++ runtime/.ci-tests | 2 + runtime/README.md | 41 ++++++++++++++ runtime/bin/cleanup-scrub-local.sh | 30 +++++++++++ runtime/bin/cleanup.sh | 62 +++++++++++++++++++++ runtime/hello_world/build.sh | 19 +++++++ runtime/hello_world/run.sh | 87 ++++++++++++++++++++++++++++++ 7 files changed, 256 insertions(+) create mode 100644 runtime/.ci-configure create mode 100644 runtime/.ci-tests create mode 100644 runtime/README.md create mode 100755 runtime/bin/cleanup-scrub-local.sh create mode 100755 runtime/bin/cleanup.sh create mode 100755 runtime/hello_world/build.sh create mode 100755 runtime/hello_world/run.sh diff --git a/runtime/.ci-configure b/runtime/.ci-configure new file mode 100644 index 0000000..41f3b20 --- /dev/null +++ b/runtime/.ci-configure @@ -0,0 +1,15 @@ +# +# Open MPI is built with the following options: +# ./configure --prefix=/opt/ci/support/exports/ompi --disable-cuda --disable-nvml --with-cuda=no --without-hcoll +# +# Additional options can be added by listing them. +# Options can be listed as either: +# - One option per line +# - Multiple options per line +# Note: Line continutions are not supported +# +# Enable Debug +--enable-debug +# With Python Bindings +# Need to install Cython on the CI machine +#--enable-python-bindings diff --git a/runtime/.ci-tests b/runtime/.ci-tests new file mode 100644 index 0000000..70822a6 --- /dev/null +++ b/runtime/.ci-tests @@ -0,0 +1,2 @@ +# Start with the basics +hello_world diff --git a/runtime/README.md b/runtime/README.md new file mode 100644 index 0000000..5cf648e --- /dev/null +++ b/runtime/README.md @@ -0,0 +1,41 @@ +# Test suite for Open MPI runtime + +This test suite is meant to be able to be run stand-alone or under CI. + +All of the tests that are intended for CI must be listed in the `.ci-tests` file. + +If the Open MPI build needs additional `configure` options those can be added to the `.ci-configure` file. + +## Running tests stand alone + + 1. Make sure that Open MPI and other required libraries are in your `PATH`/`LD_LIBRARY_PATH` + 2. Drop into a directory: + - Use the `build.sh` script to build any test articles + - Use the `run.sh` script to run the test program + + +## CI Environment Variables + +The CI infrastructure defines the following environment variables to be used in the test programs. These are defined during the `run.sh` phase and not the `build.sh` phase. + + * `CI_HOSTFILE` : Absolute path to the hostfile for this run. + * `CI_NUM_NODES` : Number of nodes in this cluster. + * `CI_OMPI_SRC` : top level directory of the Open MPI repository checkout. + * `CI_OMPI_TESTS_PUBLIC_DIR` : Top level directory of the [Open MPI Public Test](https://github.com/open-mpi/ompi-tests-public) repository checkout + * `OMPI_ROOT` : Open MPI install directory. + + +### Adding a new test for CI + + 1. Create a directory with your test. + - **Note**: Please make your test scripts such that they can be easily run with or without the CI environment variables. + 2. Create a build script named `build.sh` + - CI will call this exactly one time (with a timeout in case it hangs). + - If the script returns `0` then it is considered successful. Otherwise it is considered failed. + 3. Create a run script named `run.sh` + - The script is responsible for running your test including any runtime setup/shutdown and test result inspection. + - CI will call this exactly one time (with a timeout in case it hangs). + - If the script returns `0` then it is considered successful. Otherwise it is considered failed. + 4. Add your directory name to the `.ci-tests` file in this directory in the order that they should be executed. + - Note that adding the directory is not sufficient to have CI run the test, it must be in the file. + - Comments (starting with `#`) are allowed. diff --git a/runtime/bin/cleanup-scrub-local.sh b/runtime/bin/cleanup-scrub-local.sh new file mode 100755 index 0000000..533a3e2 --- /dev/null +++ b/runtime/bin/cleanup-scrub-local.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +PROGS="prte prted prun mpirun timeout" + +clean_files() +{ + FILES=("pmix-*" "core*" "openmpi-sessions-*" "pmix_dstor_*" "ompi.*" "prrte.*" ) + + for fn in ${FILES[@]}; do + find /tmp/ -maxdepth 1 \ + -user $USER -a \ + -name $fn \ + -exec rm -rf {} \; + + if [ -n "$TMPDIR" ] ; then + find $TMPDIR -maxdepth 1 \ + -user $USER -a \ + -name $fn \ + -exec rm -rf {} \; + fi + done +} + +killall -q ${PROGS} > /dev/null +clean_files +killall -q -9 ${PROGS} > /dev/null + +exit 0 + + diff --git a/runtime/bin/cleanup.sh b/runtime/bin/cleanup.sh new file mode 100755 index 0000000..d5d634c --- /dev/null +++ b/runtime/bin/cleanup.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +clean_server() +{ + SERVER=$1 + ITER=$2 + MAX=$3 + QUIET=$4 + + SCRIPTDIR=$PWD/`dirname $0`/ + + if [[ $QUIET == 0 ]] ; then + echo "Cleaning server ($ITER / $MAX): $SERVER" + fi + ssh -oBatchMode=yes ${SERVER} ${SCRIPTDIR}/cleanup-scrub-local.sh +} + +if [[ "x" != "x$CI_HOSTFILE" && -f "$CI_HOSTFILE" ]] ; then + ALLHOSTS=(`cat $CI_HOSTFILE | sort | uniq`) +else + ALLHOSTS=(`hostname`) +fi +LEN=${#ALLHOSTS[@]} + +# Use a background mode if running at scale +USE_BG=0 +if [ $LEN -gt 10 ] ; then + USE_BG=1 +fi + +for (( i=0; i<${LEN}; i++ )); +do + if [ $USE_BG == 1 ] ; then + if [ $(($i % 100)) == 0 ] ; then + echo "| $i" + else + if [ $(($i % 10)) == 0 ] ; then + echo -n "|" + else + echo -n "." + fi + fi + fi + + if [ $USE_BG == 1 ] ; then + clean_server ${ALLHOSTS[$i]} $i $LEN $USE_BG & + sleep 0.25 + else + clean_server ${ALLHOSTS[$i]} $i $LEN $USE_BG + echo "-------------------------" + fi +done + +if [ $USE_BG == 1 ] ; then + echo "" + echo "------------------------- Waiting" + wait +fi + +echo "------------------------- Done" + +exit 0 diff --git a/runtime/hello_world/build.sh b/runtime/hello_world/build.sh new file mode 100755 index 0000000..a9faece --- /dev/null +++ b/runtime/hello_world/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash -e + +# Wrapper compiler +_MPICC=mpicc +echo "==========================" +echo "Wrapper compiler: $_MPICC" +echo "==========================" +${_MPICC} --showme + +echo "==========================" +echo "Building MPI Hello World" +echo "==========================" +cp ${CI_OMPI_SRC}/examples/hello_c.c . +${_MPICC} hello_c.c -o hello + +echo "==========================" +echo "Success" +echo "==========================" +exit 0 diff --git a/runtime/hello_world/run.sh b/runtime/hello_world/run.sh new file mode 100755 index 0000000..678b55a --- /dev/null +++ b/runtime/hello_world/run.sh @@ -0,0 +1,87 @@ +#!/bin/bash -xe + +# Final return value +FINAL_RTN=0 + +# Number of nodes - for accounting/verification purposes +# Default: 1 +NUM_NODES=${CI_NUM_NODES:-1} + +if [ "x" != "x${CI_HOSTFILE}" ] ; then + ARG_HOSTFILE="--hostfile ${CI_HOSTFILE}" +else + ARG_HOSTFILE="" +fi + +_shutdown() +{ + # --------------------------------------- + # Cleanup + # --------------------------------------- + + exit $FINAL_RTN +} + +# --------------------------------------- +# Run the test - Hostname +# --------------------------------------- +echo "==========================" +echo "Test: hostname" +echo "==========================" +mpirun ${ARG_HOSTFILE} --map-by ppr:5:node hostname 2>&1 | tee output-hn.txt + +# --------------------------------------- +# Verify the results +# --------------------------------------- +ERRORS=`grep ERROR output-hn.txt | wc -l` +if [[ $ERRORS -ne 0 ]] ; then + echo "ERROR: Error string detected in the output" + FINAL_RTN=1 + _shutdown +fi + +LINES=`wc -l output-hn.txt | awk '{print $1}'` +if [[ $LINES -ne $(( 5 * $NUM_NODES )) ]] ; then + echo "ERROR: Incorrect number of lines of output" + FINAL_RTN=2 + _shutdown +fi + +if [ $FINAL_RTN == 0 ] ; then + echo "Success - hostname" +fi + + +# --------------------------------------- +# Run the test - Hello World +# --------------------------------------- +echo "==========================" +echo "Test: Hello World" +echo "==========================" +mpirun ${ARG_HOSTFILE} --map-by ppr:5:node ./hello 2>&1 | tee output.txt + +# --------------------------------------- +# Verify the results +# --------------------------------------- +ERRORS=`grep ERROR output.txt | wc -l` +if [[ $ERRORS -ne 0 ]] ; then + echo "ERROR: Error string detected in the output" + FINAL_RTN=1 + _shutdown +fi + +LINES=`wc -l output.txt | awk '{print $1}'` +if [[ $LINES -ne $(( 5 * $NUM_NODES )) ]] ; then + echo "ERROR: Incorrect number of lines of output" + FINAL_RTN=2 + _shutdown +fi + +if [ $FINAL_RTN == 0 ] ; then + echo "Success - hello world" +fi + +echo "==========================" +echo "Success" +echo "==========================" +_shutdown From f4bdde1ddf12a1c7a280b8d7318665ec580c4e10 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Wed, 22 Feb 2023 12:27:54 -0500 Subject: [PATCH 2/2] Runtime CI: Add a tool to display the hwloc binding without relying on MPI or PMIx interfaces Signed-off-by: Joshua Hursey --- .gitignore | 2 + runtime/README.md | 2 +- runtime/bin/pretty-print-hwloc/.gitignore | 72 ++++ runtime/bin/pretty-print-hwloc/Makefile.am | 9 + runtime/bin/pretty-print-hwloc/README.md | 66 ++++ runtime/bin/pretty-print-hwloc/autogen.sh | 3 + runtime/bin/pretty-print-hwloc/configure.ac | 146 ++++++++ .../bin/pretty-print-hwloc/src/Makefile.am | 25 ++ .../pretty-print-hwloc/src/get-pretty-cpu.c | 163 +++++++++ .../autogen/hwloc_tools_config_bottom.h | 8 + .../include/autogen/hwloc_tools_config_top.h | 5 + .../pretty-print-hwloc/src/include/utils.h | 43 +++ runtime/bin/pretty-print-hwloc/src/support.c | 333 ++++++++++++++++++ 13 files changed, 876 insertions(+), 1 deletion(-) create mode 100644 runtime/bin/pretty-print-hwloc/.gitignore create mode 100644 runtime/bin/pretty-print-hwloc/Makefile.am create mode 100644 runtime/bin/pretty-print-hwloc/README.md create mode 100755 runtime/bin/pretty-print-hwloc/autogen.sh create mode 100644 runtime/bin/pretty-print-hwloc/configure.ac create mode 100644 runtime/bin/pretty-print-hwloc/src/Makefile.am create mode 100644 runtime/bin/pretty-print-hwloc/src/get-pretty-cpu.c create mode 100644 runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_bottom.h create mode 100644 runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_top.h create mode 100644 runtime/bin/pretty-print-hwloc/src/include/utils.h create mode 100644 runtime/bin/pretty-print-hwloc/src/support.c diff --git a/.gitignore b/.gitignore index 7fe1d18..a07f70b 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,5 @@ comm_split_type/cmsplit_type singleton/hello_c singleton/simple_spawn singleton/simple_spawn_multiple + +.vscode diff --git a/runtime/README.md b/runtime/README.md index 5cf648e..c5c69ec 100644 --- a/runtime/README.md +++ b/runtime/README.md @@ -37,5 +37,5 @@ The CI infrastructure defines the following environment variables to be used in - CI will call this exactly one time (with a timeout in case it hangs). - If the script returns `0` then it is considered successful. Otherwise it is considered failed. 4. Add your directory name to the `.ci-tests` file in this directory in the order that they should be executed. - - Note that adding the directory is not sufficient to have CI run the test, it must be in the file. + - Note that adding the directory is not sufficient to have CI run the test, it must be in the `.ci-tests` file. - Comments (starting with `#`) are allowed. diff --git a/runtime/bin/pretty-print-hwloc/.gitignore b/runtime/bin/pretty-print-hwloc/.gitignore new file mode 100644 index 0000000..5c000d7 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/.gitignore @@ -0,0 +1,72 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +# Autoconf/Automake leftovers +autom4te.cache/ +compile +depcomp +aclocal.m4 +config.log +config.status +configure +install-sh +missing +.deps +.libs +*.in +Makefile +src/include/autogen/config.h* +src/include/autogen/stamp-h1 + +# Binary leftovers +src/get-pretty-cpu diff --git a/runtime/bin/pretty-print-hwloc/Makefile.am b/runtime/bin/pretty-print-hwloc/Makefile.am new file mode 100644 index 0000000..04e56a6 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/Makefile.am @@ -0,0 +1,9 @@ +# +# High level Makefile +# +headers = +sources = +nodist_headers = +EXTRA_DIST = + +SUBDIRS = . src diff --git a/runtime/bin/pretty-print-hwloc/README.md b/runtime/bin/pretty-print-hwloc/README.md new file mode 100644 index 0000000..6cf65d7 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/README.md @@ -0,0 +1,66 @@ +# Pretty Print HWLOC Process Binding + +## Building + +```shell +./autogen.sh +./configure --prefix=${YOUR_INSTALL_DIR} --with-hwloc=${HWLOC_INSTALL_PATH} +make +make install +```` + +## Running + +### Default: Print HWLOC bitmap + +```shell +shell$ get-pretty-cpu + 0/ 0 on c660f5n18) Process Bound : 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +``` + +```shell +shell$ hwloc-bind core:2 get-pretty-cpu + 0/ 0 on c660f5n18) Process Bound : 0x00ff0000 +``` + +```shell +shell$ mpirun -np 2 get-pretty-cpu + 0/ 2 on c660f5n18) Process Bound : 0x000000ff + 1/ 2 on c660f5n18) Process Bound : 0x0000ff00 +``` + +### Full descriptive output + +```shell +shell$ get-pretty-cpu -b -f + 0/ 0 on c660f5n18) Process Bound : socket 0[core 0[hwt 0-7]],socket 0[core 1[hwt 0-7]],socket 0[core 2[hwt 0-7]],socket 0[core 3[hwt 0-7]],socket 0[core 4[hwt 0-7]],socket 0[core 5[hwt 0-7]],socket 0[core 6[hwt 0-7]],socket 0[core 7[hwt 0-7]],socket 0[core 8[hwt 0-7]],socket 0[core 9[hwt 0-7]],socket 1[core 10[hwt 0-7]],socket 1[core 11[hwt 0-7]],socket 1[core 12[hwt 0-7]],socket 1[core 13[hwt 0-7]],socket 1[core 14[hwt 0-7]],socket 1[core 15[hwt 0-7]],socket 1[core 16[hwt 0-7]],socket 1[core 17[hwt 0-7]],socket 1[core 18[hwt 0-7]],socket 1[core 19[hwt 0-7]] +``` + +```shell +shell$ hwloc-bind core:2 get-pretty-cpu -b -f + 0/ 0 on c660f5n18) Process Bound : socket 0[core 2[hwt 0-7]] +``` + +```shell +shell$ mpirun -np 2 get-pretty-cpu -b -f + 1/ 2 on c660f5n18) Process Bound : socket 0[core 1[hwt 0-7]] + 0/ 2 on c660f5n18) Process Bound : socket 0[core 0[hwt 0-7]] +``` + +### Full descriptive bracketed output + +```shell +shell$ get-pretty-cpu -b -m + 0/ 0 on c660f5n18) Process Bound : [BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB][BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB/BBBBBBBB] +``` + +```shell +shell$ hwloc-bind core:2 get-pretty-cpu -b -m + 0/ 0 on c660f5n18) Process Bound : [......../......../BBBBBBBB/......../......../......../......../......../......../........][......../......../......../......../......../......../......../......../......../........] +``` + +```shell +shell$ mpirun -np 2 get-pretty-cpu -b -m + 1/ 2 on c660f5n18) Process Bound : [......../BBBBBBBB/......../......../......../......../......../......../......../........][......../......../......../......../......../......../......../......../......../........] + 0/ 2 on c660f5n18) Process Bound : [BBBBBBBB/......../......../......../......../......../......../......../......../........][......../......../......../......../......../......../......../......../......../........] +``` diff --git a/runtime/bin/pretty-print-hwloc/autogen.sh b/runtime/bin/pretty-print-hwloc/autogen.sh new file mode 100755 index 0000000..089f597 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/autogen.sh @@ -0,0 +1,3 @@ +#!/bin/bash -e + +autoreconf -ivf diff --git a/runtime/bin/pretty-print-hwloc/configure.ac b/runtime/bin/pretty-print-hwloc/configure.ac new file mode 100644 index 0000000..e24391a --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/configure.ac @@ -0,0 +1,146 @@ +# +# +# + +###################### +# Project Information +###################### +AC_INIT([HWLOC Tools], + [0.1]) +AC_PREREQ(2.63) + + +###################### +# Utilities +###################### +_show_title() { + cat < + +]) +AH_BOTTOM([ + +#include +#endif /* HWLOC_TOOLS_CONFIG_H */ +]) + +AC_CONFIG_HEADERS([src/include/autogen/config.h]) + + +###################### +# Make automake clean emacs ~ files for "make clean" +###################### +CLEANFILES="*~ .\#*" +AC_SUBST(CLEANFILES) + + +###################### +# C Compiler +###################### +_show_title "Setup C Compiler" + +CFLAGS_save="$CFLAGS" +AC_PROG_CC +CFLAGS="$CFLAGS_save" + +AC_SUBST(CFLAGS) +AC_SUBST(CXXFLAGS) +AC_SUBST(CPPFLAGS) +AC_SUBST(LDFLAGS) +AC_SUBST(LIBS) + + +###################### +# HWLOC Install +###################### +_show_title "Setup HWLOC" + +# +# --with-hwloc=DIR +# --with-hwloc-libdir=DIR +# +AC_ARG_WITH([hwloc], + [AC_HELP_STRING([--with-hwloc=DIR], + [Search for hwloc headers and libraries in DIR ])]) + +AC_ARG_WITH([hwloc-libdir], + [AC_HELP_STRING([--with-hwloc-libdir=DIR], + [Search for hwloc libraries in DIR ])]) +# HWLOC is required +AS_IF([test "$with_hwloc" = "no"], + [AC_MSG_WARN([HWLOC is required. --without-hwloc is not supported.]) + AC_MSG_ERROR([Cannot continue])]) +AS_IF([test -z "$with_hwloc" || test "$with_hwloc" == "yes"], + [AC_MSG_WARN([HWLOC is required. Default search functionality not supported.]) + AC_MSG_ERROR([Cannot continue])]) + +AC_MSG_CHECKING([HWLOC Location]) +AC_MSG_RESULT([$with_hwloc]) + +CFLAGS="-I$with_hwloc/include $CFLAGS" + +# Do we really need '-ludev'? +#LDFLAGS="-ludev $LDFLAGS" + +AC_MSG_CHECKING([If static HWLOC library is available]) +AS_IF([test -f "$with_hwloc/lib/libhwloc.a" ], + [AC_MSG_RESULT([yes]) + LIBS="$with_hwloc/lib/libhwloc.a $LIBS"], + [LDFLAGS="-L$with_hwloc/lib $LDFLAGS" + LIBS="-lhwloc $LIBS"]) + +AC_MSG_CHECKING([Final CFLAGS]) +AC_MSG_RESULT([$CFLAGS]) + +AC_MSG_CHECKING([Final LDFLAGS]) +AC_MSG_RESULT([$LDFLAGS]) + +AC_MSG_CHECKING([Final LIBS]) +AC_MSG_RESULT([$LIBS]) + +###################### +# Makefile +###################### +AC_CONFIG_FILES([Makefile src/Makefile]) + +###################### +# Done +###################### +_show_title "All Done" +AC_OUTPUT + diff --git a/runtime/bin/pretty-print-hwloc/src/Makefile.am b/runtime/bin/pretty-print-hwloc/src/Makefile.am new file mode 100644 index 0000000..5ba6718 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/Makefile.am @@ -0,0 +1,25 @@ +# +# +# + +headers = +sources = +nodist_headers = +EXTRA_DIST = + +AM_CPPFLAGS = -I./include/ +AM_LDFLAGS = -lm + +# Headers +headers += include/utils.h + +# Source +sources += \ + get-pretty-cpu.c \ + support.c + +bin_PROGRAMS = get-pretty-cpu + +get_pretty_cpu_SOURCES = $(sources) $(headers) +#get_pretty_cpu_CFLAGS = $(CFLAGS_HWLOC) +#get_pretty_cpu_LDADD = $(LIBS_HWLOC) diff --git a/runtime/bin/pretty-print-hwloc/src/get-pretty-cpu.c b/runtime/bin/pretty-print-hwloc/src/get-pretty-cpu.c new file mode 100644 index 0000000..212fc9a --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/get-pretty-cpu.c @@ -0,0 +1,163 @@ +#include +#include +#include + +#include + +#include "include/utils.h" + +static hwloc_topology_t topology; +static int global_rank, global_size; +static int local_rank, local_size; +static char hostname[HOST_NAME_MAX] = { '\0' }; + +static void display_message(char * fmt, ...); + +bool is_verbose = false; +bool is_quiet = false; +bool report_smallest = false; +bool report_full = false; +bool report_full_map = false; +bool report_hwloc_bind = true; + +int main(int argc, char **argv) +{ + hwloc_topology_t topology; + hwloc_bitmap_t bound_set; + hwloc_obj_t obj; + char type[64]; + int i; + char pretty_str[PRETTY_LEN]; + char *buffer_str = NULL; + char whoami_str[PRETTY_LEN]; + + + /* + * Simple arg parsing + */ + if( argc > 0 ) { + for( i = 1; i < argc; ++i ) { + if( 0 == strcmp(argv[i], "-v") || + 0 == strcmp(argv[i], "--v") || + 0 == strcmp(argv[i], "-verbose") || + 0 == strcmp(argv[i], "--verbose") ) { + is_verbose = true; + } + else if( 0 == strcmp(argv[i], "-q") || + 0 == strcmp(argv[i], "--q") || + 0 == strcmp(argv[i], "-quiet") || + 0 == strcmp(argv[i], "--quiet") ) { + is_quiet = true; + } + else if( 0 == strcmp(argv[i], "-s") || + 0 == strcmp(argv[i], "--s") || + 0 == strcmp(argv[i], "-smallest") || + 0 == strcmp(argv[i], "--smallest") ) { + report_smallest = true; + } + else if( 0 == strcmp(argv[i], "-f") || + 0 == strcmp(argv[i], "--f") || + 0 == strcmp(argv[i], "-full") || + 0 == strcmp(argv[i], "--full") ) { + report_full = true; + } + else if( 0 == strcmp(argv[i], "-m") || + 0 == strcmp(argv[i], "--m") || + 0 == strcmp(argv[i], "-map") || + 0 == strcmp(argv[i], "--map") ) { + report_full_map = true; + } + else if( 0 == strcmp(argv[i], "-b") || + 0 == strcmp(argv[i], "--b") || + 0 == strcmp(argv[i], "-no-bind") || + 0 == strcmp(argv[i], "--no-bind") ) { + report_hwloc_bind = false; + } + } + } + + gethostname(hostname, HOST_NAME_MAX); + + /* Get rank/size information from the launching environment */ + get_rank_size_info(&global_rank, &global_size, + &local_rank, &local_size); + sprintf(whoami_str, "%3d/%3d on %s) ", global_rank, global_size, hostname); + + /* Allocate and initialize topology object. */ + hwloc_topology_init(&topology); + + /* Perform the topology detection. */ + hwloc_topology_load(topology); + + /* retrieve the CPU binding of the current entire process */ + bound_set = hwloc_bitmap_alloc(); + + hwloc_get_cpubind(topology, bound_set, HWLOC_CPUBIND_PROCESS); + + /* print the smallest object covering the current process binding */ + if( report_smallest ) { + obj = hwloc_get_obj_covering_cpuset(topology, bound_set); + if( NULL == obj ) { + display_message("Not bound\n"); + } else { + hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); + display_message("Bound to \"%s\" logical index %u (physical index %u)\n", + type, obj->logical_index, obj->os_index); + } + } + + /* print the full descriptive output */ + if( report_full ) { + opal_hwloc_base_cset2str(pretty_str, PRETTY_LEN, topology, bound_set, true ); + if( is_verbose ) { + printf("%s Process Bound :\n%s\n", whoami_str, pretty_str); + } else { + printf("%s Process Bound : %s\n", whoami_str, pretty_str); + } + } + + /* print the full bracketed map output */ + if( report_full_map ) { + opal_hwloc_base_cset2mapstr(pretty_str, PRETTY_LEN, topology, bound_set); + if( is_verbose ) { + printf("%s Process Bound :\n%s\n", whoami_str, pretty_str); + } else { + printf("%s Process Bound : %s\n", whoami_str, pretty_str); + } + } + + /* print the hwloc binding bitmap */ + if( report_hwloc_bind ) { + hwloc_bitmap_asprintf(&buffer_str, bound_set); + if( is_verbose ) { + printf("%s Process Bound :\n%s\n", whoami_str, buffer_str); + } else { + printf("%s Process Bound : %s\n", whoami_str, buffer_str); + } + free(buffer_str); + } + + /* Destroy topology object. */ + hwloc_topology_destroy(topology); + + return 0; +} + + +static void display_message(char *fmt, ...) +{ + va_list args; + + printf("%3d/%3d on %s (%3d/%3d): ", + global_rank, global_size, + hostname, + local_rank, local_size); + va_start(args, fmt); + + vprintf(fmt, args); + if( '\n' != fmt[strlen(fmt)-1] ) { + printf("\n"); + } + + va_end(args); +} diff --git a/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_bottom.h b/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_bottom.h new file mode 100644 index 0000000..9bcd03a --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_bottom.h @@ -0,0 +1,8 @@ +/* + * + */ + +#include +#ifdef HAVE_SYS_PARAM_H +#include +#endif diff --git a/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_top.h b/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_top.h new file mode 100644 index 0000000..d27c99c --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/include/autogen/hwloc_tools_config_top.h @@ -0,0 +1,5 @@ +/* + * + */ + +// Empty diff --git a/runtime/bin/pretty-print-hwloc/src/include/utils.h b/runtime/bin/pretty-print-hwloc/src/include/utils.h new file mode 100644 index 0000000..8d97193 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/include/utils.h @@ -0,0 +1,43 @@ +/* + * + */ +#ifndef _UTILS_H +#define _UTILS_H + +#include +#include + +#ifndef HOST_NAME_MAX +#define HOST_NAME_MAX 64 +#endif + +#define PRETTY_LEN 1024 + +// Configure header +#include "include/autogen/config.h" + +/* + * Access the global/local rank/size from environment variables set by the launcher + */ +int get_rank_size_info(int *global_rank, int *global_size, + int *local_rank, int *local_size); + +/* + * Create a string representation of the binding + * 0/2 on node18) Process Bound : socket 0[core 1[hwt 0-7]],socket 1[core 10[hwt 0-7]] + */ +int opal_hwloc_base_cset2str(char *str, int len, + hwloc_topology_t topo, + hwloc_cpuset_t cpuset, + bool is_full); + +/* + * Create a string representation of the binding using a bracketed notion + * 0/2 on node18) Process Bound : [......../BBBBBBBB/......../......../......../......../......../......../......../........][BBBBBBBB/......../......../......../......../......../......../......../......../........] + */ +int opal_hwloc_base_cset2mapstr(char *str, int len, + hwloc_topology_t topo, + hwloc_cpuset_t cpuset); + +#endif /* _UTILS_H */ + diff --git a/runtime/bin/pretty-print-hwloc/src/support.c b/runtime/bin/pretty-print-hwloc/src/support.c new file mode 100644 index 0000000..0ff0fd7 --- /dev/null +++ b/runtime/bin/pretty-print-hwloc/src/support.c @@ -0,0 +1,333 @@ +/* + * + */ +#include +#include +#include + +#include "include/utils.h" + +static int build_map(int *num_sockets_arg, int *num_cores_arg, + hwloc_cpuset_t cpuset, int ***map, hwloc_topology_t topo); +static char *bitmap2rangestr(int bitmap); + + +int get_rank_size_info(int *global_rank, int *global_size, + int *local_rank, int *local_size) +{ + char *envar = NULL; + + *global_rank = 0; + *global_size = 0; + *local_rank = 0; + *local_size = 0; + + // JSM + if( NULL != getenv("JSM_NAMESPACE_SIZE") ) { + envar = getenv("JSM_NAMESPACE_RANK"); + *global_rank = atoi(envar); + envar = getenv("JSM_NAMESPACE_SIZE"); + *global_size = atoi(envar); + + envar = getenv("JSM_NAMESPACE_LOCAL_RANK"); + *local_rank = atoi(envar); + envar = getenv("JSM_NAMESPACE_LOCAL_SIZE"); + *local_size = atoi(envar); + } + // ORTE/PRRTE + else if( NULL != getenv("OMPI_COMM_WORLD_SIZE") ) { + envar = getenv("OMPI_COMM_WORLD_RANK"); + *global_rank = atoi(envar); + envar = getenv("OMPI_COMM_WORLD_SIZE"); + *global_size = atoi(envar); + + envar = getenv("OMPI_COMM_WORLD_LOCAL_RANK"); + *local_rank = atoi(envar); + envar = getenv("OMPI_COMM_WORLD_LOCAL_SIZE"); + *local_size = atoi(envar); + } + // MVAPICH2 + else if( NULL != getenv("MV2_COMM_WORLD_SIZE") ) { + envar = getenv("MV2_COMM_WORLD_RANK"); + *global_rank = atoi(envar); + envar = getenv("MV2_COMM_WORLD_SIZE"); + *global_size = atoi(envar); + + envar = getenv("MV2_COMM_WORLD_LOCAL_RANK"); + *local_rank = atoi(envar); + envar = getenv("MV2_COMM_WORLD_LOCAL_SIZE"); + *local_size = atoi(envar); + } + + return 0; +} + +int opal_hwloc_base_cset2str(char *str, int len, + hwloc_topology_t topo, + hwloc_cpuset_t cpuset, + bool is_full) +{ + bool first; + int num_sockets, num_cores; + int ret, socket_index, core_index; + char tmp[BUFSIZ]; + const int stmp = sizeof(tmp) - 1; + int **map=NULL; + //hwloc_obj_t root; + //opal_hwloc_topo_data_t *sum; + + str[0] = tmp[stmp] = '\0'; + + /* if the cpuset is all zero, then not bound */ + if (hwloc_bitmap_iszero(cpuset)) { + return -1; + } + + if (0 != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) { + return ret; + } + /* Iterate over the data matrix and build up the string */ + first = true; + for (socket_index = 0; socket_index < num_sockets; ++socket_index) { + for (core_index = 0; core_index < num_cores; ++core_index) { + if (map[socket_index][core_index] > 0) { + if (!first) { + if( is_full ) { + //strncat(str, ",\n", len - strlen(str)); + strncat(str, ",", len - strlen(str)); + } else { + strncat(str, ",", len - strlen(str)); + } + } + first = false; + + if( is_full ) { + snprintf(tmp, stmp, "socket %d[core %2d[hwt %s]]", + socket_index, core_index, + bitmap2rangestr(map[socket_index][core_index])); + } else { + snprintf(tmp, stmp, "%2d", core_index); + } + strncat(str, tmp, len - strlen(str)); + } + } + } + if (NULL != map) { + if (NULL != map[0]) { + free(map[0]); + } + free(map); + } + + return 0; +} + +int opal_hwloc_base_cset2mapstr(char *str, int len, + hwloc_topology_t topo, + hwloc_cpuset_t cpuset) +{ + char tmp[BUFSIZ]; + int core_index, pu_index; + const int stmp = sizeof(tmp) - 1; + hwloc_obj_t socket, core, pu; + //hwloc_obj_t root; + //opal_hwloc_topo_data_t *sum; + + str[0] = tmp[stmp] = '\0'; + + /* if the cpuset is all zero, then not bound */ + if (hwloc_bitmap_iszero(cpuset)) { + return -1; //OPAL_ERR_NOT_BOUND; + } + + /* Iterate over all existing sockets */ + for (socket = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0); + NULL != socket; + socket = socket->next_cousin) { + strncat(str, "[", len - strlen(str)); + + /* Iterate over all existing cores in this socket */ + core_index = 0; + for (core = hwloc_get_obj_inside_cpuset_by_type(topo, + socket->cpuset, + HWLOC_OBJ_CORE, core_index); + NULL != core; + core = hwloc_get_obj_inside_cpuset_by_type(topo, + socket->cpuset, + HWLOC_OBJ_CORE, ++core_index)) { + if (core_index > 0) { + strncat(str, "/", len - strlen(str)); + } + + /* Iterate over all existing PUs in this core */ + pu_index = 0; + for (pu = hwloc_get_obj_inside_cpuset_by_type(topo, + core->cpuset, + HWLOC_OBJ_PU, pu_index); + NULL != pu; + pu = hwloc_get_obj_inside_cpuset_by_type(topo, + core->cpuset, + HWLOC_OBJ_PU, ++pu_index)) { + + /* Is this PU in the cpuset? */ + if (hwloc_bitmap_isset(cpuset, pu->os_index)) { + strncat(str, "B", len - strlen(str)); + } else { + strncat(str, ".", len - strlen(str)); + } + } + } + strncat(str, "]", len - strlen(str)); + } + + return 0; +} + +/* + * Make a map of socket/core/hwthread tuples + */ +static int build_map(int *num_sockets_arg, int *num_cores_arg, + hwloc_cpuset_t cpuset, int ***map, hwloc_topology_t topo) +{ + int num_sockets, num_cores; + int socket_index, core_index, pu_index; + hwloc_obj_t socket, core, pu; + int **data; + + /* Find out how many sockets we have */ + num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); + /* some systems (like the iMac) only have one + * socket and so don't report a socket + */ + if (0 == num_sockets) { + num_sockets = 1; + } + /* Lazy: take the total number of cores that we have in the + topology; that'll be more than the max number of cores + under any given socket */ + num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + *num_sockets_arg = num_sockets; + *num_cores_arg = num_cores; + + /* Alloc a 2D array: sockets x cores. */ + data = malloc(num_sockets * sizeof(int *)); + if (NULL == data) { + return -1; //OPAL_ERR_OUT_OF_RESOURCE; + } + data[0] = calloc(num_sockets * num_cores, sizeof(int)); + if (NULL == data[0]) { + free(data); + return -1; //OPAL_ERR_OUT_OF_RESOURCE; + } + for (socket_index = 1; socket_index < num_sockets; ++socket_index) { + data[socket_index] = data[socket_index - 1] + num_cores; + } + + /* Iterate the PUs in this cpuset; fill in the data[][] array with + the socket/core/pu triples */ + for (pu_index = 0, + pu = hwloc_get_obj_inside_cpuset_by_type(topo, + cpuset, HWLOC_OBJ_PU, + pu_index); + NULL != pu; + pu = hwloc_get_obj_inside_cpuset_by_type(topo, + cpuset, HWLOC_OBJ_PU, + ++pu_index)) { + /* Go upward and find the core this PU belongs to */ + core = pu; + while (NULL != core && core->type != HWLOC_OBJ_CORE) { + core = core->parent; + } + core_index = 0; + if (NULL != core) { + core_index = core->logical_index; + } + + /* Go upward and find the socket this PU belongs to */ + socket = pu; + while (NULL != socket && socket->type != HWLOC_OBJ_SOCKET) { + socket = socket->parent; + } + socket_index = 0; + if (NULL != socket) { + socket_index = socket->logical_index; + } + + /* Save this socket/core/pu combo. LAZY: Assuming that we + won't have more PU's per core than (sizeof(int)*8). */ + data[socket_index][core_index] |= (1 << pu->sibling_rank); + } + + *map = data; + return 0; +} + +/* + * Turn an int bitmap to a "a-b,c" range kind of string + */ +static char *bitmap2rangestr(int bitmap) +{ + size_t i; + int range_start, range_end; + bool first, isset; + char tmp[BUFSIZ]; + const int stmp = sizeof(tmp) - 1; + static char ret[BUFSIZ]; + + memset(ret, 0, sizeof(ret)); + + first = true; + range_start = -999; + for (i = 0; i < sizeof(int) * 8; ++i) { + isset = (bitmap & (1 << i)); + + /* Do we have a running range? */ + if (range_start >= 0) { + if (isset) { + continue; + } else { + /* A range just ended; output it */ + if (!first) { + strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); + } else { + first = false; + } + + range_end = i - 1; + if (range_start == range_end) { + snprintf(tmp, stmp, "%d", range_start); + } else { + snprintf(tmp, stmp, "%d-%d", range_start, range_end); + } + strncat(ret, tmp, sizeof(ret) - strlen(ret) - 1); + + range_start = -999; + } + } + + /* No running range */ + else { + if (isset) { + range_start = i; + } + } + } + + /* If we ended the bitmap with a range open, output it */ + if (range_start >= 0) { + if (!first) { + strncat(ret, ",", sizeof(ret) - strlen(ret) - 1); + first = false; + } + + range_end = i - 1; + if (range_start == range_end) { + snprintf(tmp, stmp, "%d", range_start); + } else { + snprintf(tmp, stmp, "%d-%d", range_start, range_end); + } + strncat(ret, tmp, sizeof(ret) - strlen(ret) - 1); + } + + return ret; +}