Skip to content

Commit

Permalink
Add basic instrumentation support for GPU tracing libraries
Browse files Browse the repository at this point in the history
Add a simple instrumentation API built around the wallcycle regions
with implementations using NVIDIA NVTX, AMD ROCTX and Intel ITT libraries.

Tracing support can be enabled at build-time and allows the wallcycle
regions to show up in tracing tools which greatly aids performance analysis.

Implements #4446

Closes #4446
  • Loading branch information
pszi1ard authored and al42and committed Nov 6, 2023
1 parent d663540 commit 69085a9
Show file tree
Hide file tree
Showing 7 changed files with 480 additions and 126 deletions.
11 changes: 11 additions & 0 deletions docs/release-notes/2024/major/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,14 @@ It is set to 2 by default for increased stability.

If the TPR was generated with an earlier |Gromacs| version,
the old default value of 3 will be used.


Added support for instrumentation based on wallcycle regions using NVTX/ROCTX/ITT
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Basic support has been added for GPU tracing libraries so wallcycle main and sub-regions
will show up in tracing timelines which can help with performance analysis.
The tracing instrumentation support can be enabled with one of the following CMake variables:
``GMX_USE_NVTX``, ``GMX_USE_ROCTX``, ``GMX_USE_ITT``.

:issue:`4446`
9 changes: 9 additions & 0 deletions src/config.h.cmakein
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@
/* Add support for tracing using Extrae */
#cmakedefine01 HAVE_EXTRAE

/* Enable NVIDIA NVTX instrumentation */
#cmakedefine01 GMX_USE_NVTX

/* Enable AMD ROCTX instrumentation */
#cmakedefine01 GMX_USE_ROCTX

/* Enable Intel ITT instrumentation */
#cmakedefine01 GMX_USE_ITT

/* Use MPI (with mpicc) for parallelization */
#cmakedefine01 GMX_LIB_MPI

Expand Down
55 changes: 55 additions & 0 deletions src/gromacs/timing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,61 @@ target_link_libraries(timing INTERFACE
legacy_api
)

# Tracing support for NVTX / ROCTX / ITT (with some basic includ/lib detection support,
# sufficient for most use-cases and this is a dev-feature anyway).
if(GMX_USE_NVTX)
find_path (NVTX_INCLUDE_DIR
NAMES nvToolsExt.h
HINTS ENV CUDA_HOME "${CUDA_TOOLKIT_ROOT_DIR}"
PATH_SUFFIXES include include/nvtx3
REQUIRED
)
find_library (NVTX_LIBRARY
NAMES libnvToolsExt.so
HINTS ENV CUDA_HOME "${CUDA_TOOLKIT_ROOT_DIR}"
PATH_SUFFIXES lib64 lib
REQUIRED
)
target_include_directories(timing INTERFACE "${NVTX_INCLUDE_DIR}")
target_link_libraries(timing INTERFACE "${NVTX_LIBRARY}")
# As of CUDA 11.8, there are a lot of old-style casts in nvToolsExt.h
gmx_target_interface_warning_suppression(timing "-Wno-old-style-cast" HAS_WARNING_NO_OLD_STYLE_CAST)
endif()

if(GMX_USE_ROCTX)
find_path (ROCTX_INCLUDE_DIR
NAMES roctracer/roctx.h
HINTS ENV ROCM_PATH ENV ROCM_HOME "${HIPSYCL_SYCLCC_ROCM_PATH}"
PATH_SUFFIXES include
REQUIRED
)
find_library (ROCTX_LIBRARY
NAMES libroctx64.so
HINTS ENV ROCM_PATH ENV ROCM_HOME "${HIPSYCL_SYCLCC_ROCM_PATH}"
PATH_SUFFIXES roctracer/lib64 roctracer/lib lib64 lib
REQUIRED
)
target_include_directories(timing INTERFACE "${ROCTX_INCLUDE_DIR}")
target_link_libraries(timing INTERFACE "${ROCTX_LIBRARY}")
endif()

if(GMX_USE_ITT)
find_path (ITTNOTIFY_INCLUDE_DIR
NAMES ittnotify.h
HINTS ENV VTUNE_PROFILER_DIR
PATH_SUFFIXES include
REQUIRED
)
find_library (ITTNOTIFY_LIBRARY
NAMES libittnotify.a # We need the static library
HINTS ENV VTUNE_PROFILER_DIR
PATH_SUFFIXES lib64 lib
REQUIRED
)
target_include_directories(timing INTERFACE "${ITTNOTIFY_INCLUDE_DIR}" SYSTEM)
target_link_libraries(timing INTERFACE "${ITTNOTIFY_LIBRARY}")
endif()

# TODO: when timing is an OBJECT target
#target_link_libraries(timing PUBLIC legacy_api)
#target_link_libraries(timing PRIVATE common)
Expand Down
225 changes: 225 additions & 0 deletions src/gromacs/timing/include/gromacs/timing/instrumentation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* This file is part of the GROMACS molecular simulation package.
*
* Copyright 2023- The GROMACS Authors
* and the project initiators Erik Lindahl, Berk Hess and David van der Spoel.
* Consult the AUTHORS/COPYING files and https://www.gromacs.org for details.
*
* GROMACS is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* GROMACS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GROMACS; if not, see
* https://www.gnu.org/licenses, or write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* If you want to redistribute modifications to GROMACS, please
* consider that scientific software is very special. Version
* control is crucial - bugs must be traceable. We will be happy to
* consider code for inclusion in the official distribution, but
* derived work must not be called official GROMACS. Details are found
* in the README & COPYING files - if they are missing, get the
* official version at https://www.gromacs.org.
*
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out https://www.gromacs.org.
*/

/*! \internal \file
*
* \brief
* Define basic tracing API for manual instrumentation.
*
* This header implements a simple set of tracing range start/stop functions
* which can be used for manual instrumentation of application code.
* Since current use is only through the wallcycle module, we define two
* sets of start/stop functions corresponding to the main and sub-counters
* in the wallcycle module.
*
* The current implementation supports the following tracing APIs:
* - NVIDIA NVTX
* - AMD ROCTX
* - Intel ITT
*
* \author Szilárd Páll <[email protected]
* \author Andrey Alekseenko <[email protected]>
*
*/

#include "gromacs/utility/basedefinitions.h"


//
// Forward declarations of the tracing functions with the inlineable definitions for each tracing API below.
//

/*! \brief Start a main tracing region.
*
* Note that the \p rangeId argument is currently only used with NVTX for aiding
* in coloring of trace regions.
*
* \param[in] rangeName String containing the name of the traced range.
* \param[in] rangeId Numeric ID of the range.
*/
static void traceRangeStart(const char* rangeName, int rangeId);


/*! \brief Start a tracing sub-region.
*
* Note that the \p rangeId argument is currently only used with NVTX for aiding
* in coloring of trace regions.
*
* \param[in] rangeName String containing the name of the traced range.
* \param[in] rangeId Numeric ID of the range.
*/
static void traceSubRangeStart(const char* rangeName, int rangeId);

/*! \brief End a main tracing region.
*
* Note that this should always be paired with a traceRangeStart().
*/
static void traceRangeEnd();

/*! \brief End a tracing sub-region.
*
* Note that this should always be paired with a traceSubRangeStart().
*/
static void traceSubRangeEnd();

#if (GMX_USE_NVTX + GMX_USE_ROCTX + GMX_USE_ITT) > 1
# error "Cannot have multiple instrumentation flavors enabled at the same time"
#endif


#if GMX_USE_NVTX

# include "nvToolsExt.h"

//! List of colors for main ranges
static constexpr uint32_t c_rangeColors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff,
0xff00ffff, 0xffff0000, 0xffffffff };
//! Number of colors for main ranges
static constexpr int c_numRangeColors = sizeof(c_rangeColors) / sizeof(uint32_t);

//! List of colors for sub-ranges
static constexpr uint32_t c_subRangeColors[] = { 0x9900ff00, 0x990000ff, 0x99ffff00, 0x99ff00ff,
0x9900ffff, 0x99ff0000, 0x99ffffff };
//! Number of colors for sub-ranges
static constexpr int c_numSubRangeColors = sizeof(c_subRangeColors) / sizeof(uint32_t);

static void traceRangeStart(const char* rangeName, int rangeId)
{
int colorId = rangeId % c_numRangeColors;
nvtxEventAttributes_t eventAttrib = { 0 };
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = c_rangeColors[colorId];
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = rangeName;
nvtxRangePushEx(&eventAttrib);
}

static void traceSubRangeStart(const char* rangeName, int rangeId)
{
int colorId = rangeId % c_numSubRangeColors;
nvtxEventAttributes_t eventAttrib = { 0 };
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = c_subRangeColors[colorId];
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = rangeName;
nvtxRangePushEx(&eventAttrib);
}

static void traceRangeEnd()
{
nvtxRangePop();
}

static void traceSubRangeEnd()
{
nvtxRangePop();
}

#elif GMX_USE_ROCTX

# include "roctracer/roctx.h"

static void traceRangeStart(const char* rangeName, int /*rangeId*/)
{
roctxRangePush(rangeName);
}

static void traceSubRangeStart(const char* rangeName, int /*rangeId*/)
{
roctxRangePush(rangeName);
}

static void traceRangeEnd()
{
roctxRangePop();
}

static void traceSubRangeEnd()
{
roctxRangePop();
}

#elif GMX_USE_ITT

# ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wold-style-cast"
# pragma clang diagnostic ignored "-Wnewline-eof"
# endif
# include <ittnotify.h>
# ifdef __clang__
# pragma clang diagnostic pop
# endif

// Defined in wallcycle.cpp, initialized in wallcycle_init
extern const __itt_domain* g_ittDomain;
extern __itt_string_handle* g_ittCounterHandles[];
extern __itt_string_handle* g_ittSubCounterHandles[];

static void traceRangeStart(const char* /*rangeName*/, int rangeId)
{
__itt_task_begin(g_ittDomain, __itt_null, __itt_null, g_ittCounterHandles[rangeId]);
}

static void traceSubRangeStart(const char* /*rangeName*/, int rangeId)
{
__itt_task_begin(g_ittDomain, __itt_null, __itt_null, g_ittSubCounterHandles[rangeId]);
}

static void traceRangeEnd()
{
__itt_task_end(g_ittDomain);
}

static void traceSubRangeEnd()
{
__itt_task_end(g_ittDomain);
}

#else

gmx_unused static void traceRangeStart(gmx_unused const char* rangeName, gmx_unused int rangeId) {}
gmx_unused static void traceSubRangeStart(gmx_unused const char* rangeName, gmx_unused int rangeId)
{
}

gmx_unused static void traceRangeEnd() {}
gmx_unused static void traceSubRangeEnd() {}


#endif
Loading

0 comments on commit 69085a9

Please sign in to comment.