Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev mem api #57

Draft
wants to merge 6 commits into
base: release-v3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
648 changes: 648 additions & 0 deletions c/include/nvtx3/nvToolsExtMem.h

Large diffs are not rendered by default.

152 changes: 152 additions & 0 deletions c/include/nvtx3/nvToolsExtMemCudaRt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTOOLSEXTV3_MEM_CUDART_V1
#define NVTOOLSEXTV3_MEM_CUDART_V1

#include "nvToolsExtMem.h"

#include "cuda.h"
#include "cuda_runtime.h"

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */


/** \brief The memory is from a CUDA runtime array.
*
* Relevant functions: cudaMallocArray, cudaMalloc3DArray
* Also cudaArray_t from other types such as cudaMipmappedArray_t
*
* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
*
* nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
* nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCudaArrayRangeDesc_t
*/
#define NVTX_MEM_TYPE_CUDA_ARRAY 0x11

/** \brief structure to describe memory in a CUDA array object
*/
typedef struct nvtxMemCudaArrayRangeDesc_v1
{
uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
uint16_t structSize; /* Size of the structure. */
uint32_t reserved0;
cudaArray_t src;
size_t offset[3];
size_t extent[3];
} nvtxMemCudaArrayRangeDesc_v1;
typedef nvtxMemCudaArrayRangeDesc_v1 nvtxMemCudaArrayRangeDesc_t;


/** \brief The memory is from a CUDA device array.
*
* Relevant functions: cuArrayCreate, cuArray3DCreate
* Also CUarray from other types such as CUmipmappedArray
*
* NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported
*
* nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo()
* nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCuArrayRangeDesc_t
*/
#define NVTX_MEM_TYPE_CU_ARRAY 0x12

/** \brief structure to describe memory in a CUDA array object
*/
typedef struct nvtxMemCuArrayRangeDesc_v1
{
uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */
uint16_t structSize; /* Size of the structure. */
uint32_t reserved0;
CUarray src;
size_t offset[3];
size_t extent[3];
} nvtxMemCuArrayRangeDesc_v1;
typedef nvtxMemCuArrayRangeDesc_v1 nvtxMemCuArrayRangeDesc_t;

/* Reserving 0x2-0xF for more common types */

#define NVTX_MEM_CUDA_PEER_ALL_DEVICES -1

/** \brief Get the permission object that represent the CUDA runtime device
* or cuda driver context
*
* This object will allow developers to adjust permissions applied to work executed
* on the GPU. It may be inherited or overridden by permissions object bound
* with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
*
* Ex. change the peer to peer access permissions between devices in entirety
* or punch through special holes
*
* By default, all memory is accessible that naturally would be to a CUDA kernel until
* modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
*
* This object should also represent the CUDA driver API level context.
*/
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions(
nvtxDomainHandle_t domain);

/** \brief Get the permission object that represent the CUDA runtime device
* or cuda driver context
*
* This object will allow developers to adjust permissions applied to work executed
* on the GPU. It may be inherited or overridden by permissions object bound
* with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags.
*
* Ex. change the peer to peer access permissions between devices in entirety
* or punch through special holes
*
* By default, all memory is accessible that naturally would be to a CUDA kernel until
* modified otherwise by nvtxMemCudaSetPeerAccess or changing regions.
*
* This object should also represent the CUDA driver API level context.
*/
NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions(
nvtxDomainHandle_t domain,
int device);


/** \brief Change the default behavior for all memory mapped in from a particular device.
*
* While typically all memory defaults to readable and writable, users may desire to limit
* access to reduced default permissions such as read-only and a per-device basis.
*
* Regions can used to further override smaller windows of memory.
*
* devicePeer can be NVTX_MEM_CUDA_PEER_ALL_DEVICES
*
*/
NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess(
nvtxDomainHandle_t domain,
nvtxMemPermissionsHandle_t permissions,
int devicePeer, /* device number such as from cudaGetDevice() or NVTX_MEM_CUDA_PEER_ALL_DEVICES */
uint32_t flags ); /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */


/** @} */ /*END defgroup*/

#ifdef __GNUC__
#pragma GCC visibility push(internal)
#endif

#ifndef NVTX_NO_IMPL
#define NVTX_EXT_IMPL_MEM_CUDART_GUARD /* Ensure other headers cannot included directly */
#include "nvtxExtDetail/nvtxExtImplMemCudaRt1.h"
#undef NVTX_EXT_IMPL_MEM_CUDART_GUARD
#endif /*NVTX_NO_IMPL*/

#ifdef __GNUC__
#pragma GCC visibility pop
#endif


#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* NVTOOLSEXTV3_MEM_CUDART_V1 */
139 changes: 139 additions & 0 deletions c/include/nvtx3/nvtxExtDetail/nvtxExtImpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#ifndef NVTX_EXT_IMPL_GUARD
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
#endif

#ifndef NVTX_EXT_IMPL_H
#define NVTX_EXT_IMPL_H
/* ---- Include required platform headers ---- */

#if defined(_WIN32)

#include <Windows.h>

#else
#include <unistd.h>

#if defined(__ANDROID__)
#include <android/api-level.h>
#endif

#if defined(__linux__) || defined(__CYGWIN__)
#include <sched.h>
#endif

#include <limits.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>

#include <string.h>
#include <sys/types.h>
#include <pthread.h>
#include <stdlib.h>
#include <wchar.h>

#endif

/* ---- Define macros used in this file ---- */

#ifdef NVTX_DEBUG_PRINT
#ifdef __ANDROID__
#include <android/log.h>
#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
#else
#include <stdio.h>
#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
#endif
#else /* !defined(NVTX_DEBUG_PRINT) */
#define NVTX_ERR(...)
#define NVTX_INFO(...)
#endif

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

// #ifdef __GNUC__
// #pragma GCC visibility push(hidden)
// #endif

#define NVTX_EXTENSION_FRESH 0
#define NVTX_EXTENSION_DISABLED 1
#define NVTX_EXTENSION_STARTING 2
#define NVTX_EXTENSION_LOADED 3

typedef intptr_t (NVTX_API * NvtxExtGetExportFunction_t)(uint32_t exportFunctionId);

typedef struct nvtxExtModuleSegment_t
{
size_t segmentId;
size_t slotCount;
intptr_t* slots;

} nvtxExtModuleSegment_t;

typedef struct nvtxExtModuleInfo_t
{
uint16_t nvtxVer;
uint16_t structSize;
uint16_t moduleId;
uint16_t compatId;
size_t segmentsCount;
nvtxExtModuleSegment_t* segments;
NvtxExtGetExportFunction_t getExportFunction;
} nvtxExtModuleInfo_t;

typedef int (NVTX_API * NvtxExtInitializeInjectionFunc_t)(nvtxExtModuleInfo_t* moduleInfo);

/* nvtxExtGlobals1_t is for the global storage of slots for function pointers and function tables.
* Slots ranges are pre-assigned to extensions.
* other, potentially larger, globals will be created once there is insufficient room for a new extension.
*/
#define NVTX3EXT_GLOBALS1_SLOT_GROUP_ID 1 /* incrimented with each new ext global we introduce */
#define NVTX3EXT_GLOBALS1_SLOT_COUNT 256
typedef struct nvtxExtGlobals1_t
{
NvtxExtInitializeInjectionFunc_t injectionFnPtr;
size_t slotGroupId;
size_t slotCount;
intptr_t slots[256];

} nvtxExtGlobals1_t;

NVTX_LINKONCE_DEFINE_GLOBAL nvtxExtGlobals1_t NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1) =
{
(NvtxExtInitializeInjectionFunc_t)0,
1,
NVTX3EXT_GLOBALS1_SLOT_COUNT,
{0}
};



#define NVTX_EXT_INIT_GUARD
#include "nvtxExtInit.h"
#undef NVTX_EXT_INIT_GUARD

// #ifdef __GNUC__
// #pragma GCC visibility pop
// #endif

#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

#endif /* NVTX_EXT_IMPL_H */
Loading