diff --git a/CMakeLists.txt b/CMakeLists.txt index 69f2fb2f1..0178ed0b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ endif() include(cmake/configure_variorum.cmake) set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) -set(KOKKOSTOOLS_HAS_NVPROF ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvprof should be available +set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available if(DEFINED ENV{VTUNE_HOME}) set(VTune_ROOT $ENV{VTUNE_HOME}) @@ -180,7 +180,7 @@ endif() # GPU profilers if(Kokkos_ENABLE_CUDA) add_subdirectory(profiling/nvprof-connector) - add_subdirectory(profiling/nvprof-focused-connector) + add_subdirectory(profiling/nvtx-focused-connector) endif() if(Kokkos_ENABLE_HIP) add_subdirectory(profiling/roctx-connector) diff --git a/build-all.sh b/build-all.sh index 5cd77240a..6c58f3b64 100644 --- a/build-all.sh +++ b/build-all.sh @@ -10,7 +10,7 @@ make -f $ROOT_DIR/profiling/memory-hwm/Makefile make -f $ROOT_DIR/profiling/memory-hwm-mpi/Makefile make -f $ROOT_DIR/profiling/memory-usage/Makefile make -f $ROOT_DIR/profiling/nvprof-connector/Makefile -make -f $ROOT_DIR/profiling/nvprof-focused-connector/Makefile +make -f $ROOT_DIR/profiling/nvtx-focused-connector/Makefile make -f $ROOT_DIR/profiling/papi-connector/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer-json/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer/Makefile diff --git a/common/kernel-filter/CMakeLists.txt b/common/kernel-filter/CMakeLists.txt index ae5cad488..efe6d7c6b 100644 --- a/common/kernel-filter/CMakeLists.txt +++ b/common/kernel-filter/CMakeLists.txt @@ -1 +1 @@ -add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) \ No newline at end of file +kp_add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) diff --git a/common/kernel-filter/kp_kernel_filter.cpp b/common/kernel-filter/kp_kernel_filter.cpp index 30efeb36a..341682348 100644 --- a/common/kernel-filter/kp_kernel_filter.cpp +++ b/common/kernel-filter/kp_kernel_filter.cpp @@ -101,22 +101,17 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf("============================================================\n"); printf("KokkosP: Filter File: %s\n", kernelFilterPath); printf("============================================================\n"); - FILE* kernelFilterFile = fopen(kernelFilterPath, "rt"); - if (NULL == kernelFilterFile) { fprintf(stderr, "Unable to open kernel filter: %s\n", kernelFilterPath); exit(-1); } else { char* lineBuffer = (char*)malloc(sizeof(char) * 65536); - while (kokkospReadLine(kernelFilterFile, lineBuffer)) { printf("KokkosP: Filter [%s]\n", lineBuffer); - std::regex nextRegEx(lineBuffer, std::regex::optimize); kernelNames.push_back(nextRegEx); } - free(lineBuffer); } @@ -126,63 +121,73 @@ extern "C" void kokkosp_init_library(const int loadSeq, (filterKernels ? "enabled" : "disabled")); if (filterKernels) { - char* profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); - char* envBuffer = - (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); - strcpy(envBuffer, profileLibrary); - - char* nextLibrary = strtok(envBuffer, ";"); + char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); + // check deprecated environment variable. + if (NULL == profileLibrary) { + printf( + "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " + "variable. Please use KOKKOS_TOOLS_LIBS.\n"); + profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); + + if (NULL == profileLibrary) { + printf("KokkosP: No library to call in %s\n", profileLibrary); + exit(-1); + } - for (int i = 0; i < loadSeq; i++) { - nextLibrary = strtok(NULL, ";"); - } + char* envBuffer = + (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); + strcpy(envBuffer, profileLibrary); - nextLibrary = strtok(NULL, ";"); + char* nextLibrary = strtok(envBuffer, ";"); - if (NULL == nextLibrary) { - printf("KokkosP: No child library to call in %s\n", profileLibrary); - } else { - printf("KokkosP: Next library to call: %s\n", nextLibrary); - printf("KokkosP: Loading child library ..\n"); + for (int i = 0; i < loadSeq; i++) { + nextLibrary = strtok(NULL, ";"); + } - void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); + nextLibrary = strtok(NULL, ";"); - if (NULL == childLibrary) { - fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n", - nextLibrary, dlerror()); + if (NULL == nextLibrary) { + printf("KokkosP: No child library to call in %s\n", profileLibrary); } else { - beginForCallee = - (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_for"); - beginScanCallee = - (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_scan"); - beginReduceCallee = (beginFunction)dlsym( - childLibrary, "kokkosp_begin_parallel_reduce"); - - endScanCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); - endForCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); - endReduceCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); - - initProfileLibrary = - (initFunction)dlsym(childLibrary, "kokkosp_init_library"); - finalizeProfileLibrary = - (finalizeFunction)dlsym(childLibrary, "kokkosp_finalize_library"); - - if (NULL != initProfileLibrary) { - (*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, - deviceInfo); + printf("KokkosP: Next library to call: %s\n", nextLibrary); + printf("KokkosP: Loading child library ..\n"); + + void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); + + if (NULL == childLibrary) { + fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n", + nextLibrary, dlerror()); + } else { + beginForCallee = (beginFunction)dlsym(childLibrary, + "kokkosp_begin_parallel_for"); + beginScanCallee = (beginFunction)dlsym( + childLibrary, "kokkosp_begin_parallel_scan"); + beginReduceCallee = (beginFunction)dlsym( + childLibrary, "kokkosp_begin_parallel_reduce"); + endScanCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); + endForCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); + endReduceCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); + initProfileLibrary = + (initFunction)dlsym(childLibrary, "kokkosp_init_library"); + finalizeProfileLibrary = (finalizeFunction)dlsym( + childLibrary, "kokkosp_finalize_library"); + + if (NULL != initProfileLibrary) { + (*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, + deviceInfo); + } } + free(envBuffer); } } - - free(envBuffer); } - printf("============================================================\n"); } -} + +} // end kokkosp_init_library extern "C" void kokkosp_finalize_library() { if (NULL != finalizeProfileLibrary) { diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 773753f8b..86b0f6a63 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -133,14 +133,11 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } free(envBuffer); - - uniqID = 1; - + uniqID = 1; const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); if (NULL != tool_sample) { kernelSampleSkip = atoi(tool_sample) + 1; } - if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); } @@ -152,14 +149,11 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; - if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - if (NULL != beginForCallee) { (*beginForCallee)(name, devID, kID); } @@ -181,8 +175,6 @@ void kokkosp_end_parallel_for(const uint64_t kID) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; - if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", diff --git a/profiling/all/CMakeLists.txt b/profiling/all/CMakeLists.txt index ce8b13e27..786f2c2a5 100644 --- a/profiling/all/CMakeLists.txt +++ b/profiling/all/CMakeLists.txt @@ -19,4 +19,4 @@ endif() file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp") install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR}) -install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) \ No newline at end of file +install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 6199cefcb..f839245c9 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -50,7 +50,7 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) #endif #ifdef KOKKOSTOOLS_HAS_NVPROF KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfConnector) -KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfFocusedConnector) +KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector) #endif #ifdef KOKKOSTOOLS_HAS_CALIPER namespace cali { @@ -91,9 +91,8 @@ EventSet get_event_set(const char* profiler, const char* config_str) { handlers["caliper"] = cali::get_kokkos_event_set(config_str); #endif #ifdef KOKKOSTOOLS_HAS_NVPROF - handlers["nvprof-connector"] = NVProfConnector::get_event_set(); - handlers["nvprof-focused-connector"] = - NVProfFocusedConnector::get_event_set(); + handlers["nvprof-connector"] = NVProfConnector::get_event_set(); + handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; diff --git a/profiling/nvprof-focused-connector/CMakeLists.txt b/profiling/nvprof-focused-connector/CMakeLists.txt deleted file mode 100644 index 072198bf5..000000000 --- a/profiling/nvprof-focused-connector/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -find_package(CUDAToolkit REQUIRED) -kp_add_library(kp_nvprof_focused_connector kp_nvprof_focused_connector.cpp) - -target_link_libraries(kp_nvprof_focused_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvprof-focused-connector/Makefile b/profiling/nvprof-focused-connector/Makefile deleted file mode 100644 index 06628279d..000000000 --- a/profiling/nvprof-focused-connector/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -CXX=g++ -CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include -I./ -LDFLAGS=-L$(CUDA_ROOT)/lib64 -LIBS=-lnvToolsExt -lcudart -SHARED_CXXFLAGS=-shared -fPIC - -all: kp_nvprof_focused_connector.so - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all - -kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp - $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ - -o $@ ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp $(LIBS) - -clean: - rm *.so diff --git a/profiling/nvtx-focused-connector/CMakeLists.txt b/profiling/nvtx-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..e75e93469 --- /dev/null +++ b/profiling/nvtx-focused-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvtx_focused_connector kp_nvtx_focused_connector.cpp) + +target_link_libraries(kp_nvtx_focused_connector CUDA::nvToolsExt) diff --git a/profiling/nvtx-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile new file mode 100644 index 000000000..8f2b36946 --- /dev/null +++ b/profiling/nvtx-focused-connector/Makefile @@ -0,0 +1,18 @@ +CXX=g++ +CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include -I./ +LDFLAGS=-L$(CUDA_ROOT)/lib64 +LIBS= +SHARED_CXXFLAGS=-shared -fPIC + +all: kp_nvtx_focused_connector.so + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}../../common/makefile-only -I${MAKEFILE_PATH}../all + +kp_nvtx_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp + $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ + -o $@ ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(LIBS) + +clean: + rm *.so diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp similarity index 68% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 9d971db76..4771e45c8 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -23,15 +23,26 @@ #include #include -#include "kp_nvprof_focused_connector_domain.h" +#include "kp_nvtx_focused_connector_domain.h" #include "kp_core.hpp" +static bool tool_globfences; namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { -static KernelNVProfFocusedConnectorInfo* currentKernel; -static std::unordered_map +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + settings->requires_global_fencing = true; + if (tool_globfences) { + settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; + } +} // end request tool settings + +static KernelNVTXFocusedConnectorInfo* currentKernel; +static std::unordered_map domain_map; static uint64_t nextKernelID; @@ -41,52 +52,61 @@ void kokkosp_init_library( struct Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { printf("-----------------------------------------------------------\n"); printf( - "KokkosP: NVProf Analyzer Focused Connector (sequence is %d, version: " + "KokkosP: NVTX Analyzer Focused Connector (sequence is %d, version: " "%llu)\n", loadSeq, (unsigned long long)(interfaceVer)); printf("-----------------------------------------------------------\n"); - + const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (NULL != tool_global_fences) { + tool_globfences = + (atoi(tool_global_fences) != 0); // if user sets to 0, no global fences + } else { + tool_globfences = + true; // default to true to be conservative for capturing state by tool + } + nvtxNameOsThread(pthread_self(), "Application Main Thread"); + nvtxMarkA("Kokkos::Initialization Complete"); nextKernelID = 0; -} +} // end kokkosp_init_library -KernelNVProfFocusedConnectorInfo* getFocusedConnectorInfo( +KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( const char* name, KernelExecutionType kType) { std::string nameStr(name); auto kDomain = domain_map.find(nameStr); currentKernel = NULL; if (kDomain == domain_map.end()) { - currentKernel = new KernelNVProfFocusedConnectorInfo(name, kType); - domain_map.insert(std::pair( + currentKernel = new KernelNVTXFocusedConnectorInfo(name, kType); + domain_map.insert(std::pair( nameStr, currentKernel)); } else { currentKernel = kDomain->second; } return currentKernel; -} +} // end getFocusedConnectorInfo void focusedConnectorExecuteStart() { cudaProfilerStart(); currentKernel->startRange(); -} +} // end focusedConnectorExecuteStart void focusedConnectorExecuteEnd() { currentKernel->endRange(); cudaProfilerStop(); currentKernel = NULL; -} +} // end focusedConnectorExecuteEnd void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); - printf("KokkosP: Finalization of NVProf Connector. Complete.\n"); + printf( + "KokkosP: Finalization of NVTX Analyzer Focused Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); -} +} // end kokkosp_finalize_library void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } @@ -97,8 +117,7 @@ void kokkosp_end_parallel_for(const uint64_t /*kID*/) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } @@ -109,8 +128,7 @@ void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } @@ -123,6 +141,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { Kokkos::Tools::Experimental::EventSet my_event_set; memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; my_event_set.init = kokkosp_init_library; my_event_set.finalize = kokkosp_finalize_library; my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; @@ -132,15 +151,14 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; return my_event_set; -} +} // end get_event_set -} // namespace NVProfFocusedConnector +} // namespace NVTXFocusedConnector } // namespace KokkosTools extern "C" { - -namespace impl = KokkosTools::NVProfFocusedConnector; - +namespace impl = KokkosTools::NVTXFocusedConnector; +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) @@ -149,5 +167,4 @@ EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) - } // extern "C" diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h similarity index 84% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h index e668a9a3c..d38f60538 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h @@ -14,8 +14,8 @@ // //@HEADER -#ifndef _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO -#define _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO +#ifndef KOKKOSP_KERNEL_NVTX_CONNECTOR_H +#define KOKKOSP_KERNEL_NVTX_CONNECTOR_H #include #include @@ -24,7 +24,7 @@ #include "nvToolsExt.h" namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { enum KernelExecutionType { PARALLEL_FOR = 0, @@ -32,10 +32,10 @@ enum KernelExecutionType { PARALLEL_SCAN = 2 }; -class KernelNVProfFocusedConnectorInfo { +class KernelNVTXFocusedConnectorInfo { public: - KernelNVProfFocusedConnectorInfo(std::string kName, - KernelExecutionType kernelType) { + KernelNVTXFocusedConnectorInfo(std::string kName, + KernelExecutionType kernelType) { domainNameHandle = kName; char* domainName = (char*)malloc(sizeof(char*) * (32 + kName.size())); @@ -71,7 +71,7 @@ class KernelNVProfFocusedConnectorInfo { std::string getDomainNameHandle() { return domainNameHandle; } - ~KernelNVProfFocusedConnectorInfo() { nvtxDomainDestroy(domain); } + ~KernelNVTXFocusedConnectorInfo() { nvtxDomainDestroy(domain); } private: std::string domainNameHandle; @@ -81,4 +81,4 @@ class KernelNVProfFocusedConnectorInfo { #endif } -} // KokkosTools::NVProfFocusedConnector +} // KokkosTools::NVTXFocusedConnector