From ea51b7a5a3a789df0c16ece71ab36a14cddf1044 Mon Sep 17 00:00:00 2001 From: Evan Ramos Date: Tue, 2 Apr 2024 13:34:12 -0500 Subject: [PATCH] [NOMERGE] Add NVTX headers --- crate-tmp/src/nvtxw-sys/LICENSE.txt | 218 +++ crate-tmp/src/nvtxw-sys/nvtx3/nvToolsExt.h | 1474 +++++++++++++++++ .../src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImpl.h | 438 +++++ .../nvtxw-sys/nvtx3/nvtxDetail/nvtxImplCore.h | 307 ++++ .../src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInit.h | 312 ++++ .../nvtx3/nvtxDetail/nvtxInitDecls.h | 81 + .../nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDefs.h | 573 +++++++ .../nvtxw-sys/nvtx3/nvtxDetail/nvtxLinkOnce.h | 83 + .../nvtxw-sys/nvtx3/nvtxDetail/nvtxTypes.h | 304 ++++ .../nvtxext/nvtx3/nvToolsExtPayload.h | 1285 ++++++++++++++ .../nvtx3/nvtxExtDetail/nvtxExtHelperMacros.h | 31 + .../nvtxext/nvtx3/nvtxExtDetail/nvtxExtImpl.h | 102 ++ .../nvtxExtDetail/nvtxExtImplPayload_v1.h | 208 +++ .../nvtxext/nvtx3/nvtxExtDetail/nvtxExtInit.h | 378 +++++ .../nvtxExtDetail/nvtxExtPayloadTypeInfo.h | 151 ++ .../nvtx3/nvtxExtDetail/nvtxExtTypes.h | 44 + .../src/nvtxw-sys/tools/nvtxw/c/nvtxw3.c | 874 ++++++++++ .../src/nvtxw-sys/tools/nvtxw/c/nvtxw3.h | 549 ++++++ 18 files changed, 7412 insertions(+) create mode 100644 crate-tmp/src/nvtxw-sys/LICENSE.txt create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvToolsExt.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImpl.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImplCore.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInit.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDecls.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDefs.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxLinkOnce.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxTypes.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvToolsExtPayload.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtHelperMacros.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImpl.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtInit.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h create mode 100644 crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtTypes.h create mode 100644 crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.c create mode 100644 crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.h diff --git a/crate-tmp/src/nvtxw-sys/LICENSE.txt b/crate-tmp/src/nvtxw-sys/LICENSE.txt new file mode 100644 index 0000000..537293d --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/LICENSE.txt @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvToolsExt.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvToolsExt.h new file mode 100644 index 0000000..9e77459 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvToolsExt.h @@ -0,0 +1,1474 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +/** \file nvToolsExt.h + */ + +/* ========================================================================= */ +/** \mainpage + * \tableofcontents + * \section INTRODUCTION Introduction + * + * The NVIDIA Tools Extension library is a set of functions that a + * developer can use to provide additional information to tools. + * The additional information is used by the tool to improve + * analysis and visualization of data. + * + * The library introduces close to zero overhead if no tool is + * attached to the application. The overhead when a tool is + * attached is specific to the tool. + * + * \section INITIALIZATION_SECTION Initialization + * + * Typically the tool's library that plugs into NVTX is indirectly + * loaded via enviromental properties that are platform specific. + * For some platform or special cases, the user may be required + * to instead explicity initialize instead though. This can also + * be helpful to control when the API loads a tool's library instead + * of what would typically be the first function call to emit info. + * For these rare case, see \ref INITIALIZATION for additional information. + * + * \section MARKERS_AND_RANGES Markers and Ranges + * + * Markers and ranges are used to describe events at a specific time (markers) + * or over a time span (ranges) during the execution of the application + * respectively. + * + * \subsection MARKERS Markers + * + * Markers denote specific moments in time. + * + * + * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on + * how to specify the domain. + * + * \subsection THREAD_RANGES Thread Ranges + * + * Thread ranges denote nested time ranges. Nesting is maintained per thread + * per domain and does not require any additional correlation mechanism. The + * duration of a thread range is defined by the corresponding pair of + * nvtxRangePush* to nvtxRangePop API calls. + * + * See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on + * how to specify the domain. + * + * \subsection PROCESS_RANGES Process Ranges + * + * Process ranges denote a time span that can expose arbitrary concurrency, as + * opposed to thread ranges that only support nesting. In addition the range + * start event can happen on a different thread than the end marker. For the + * correlation of a start/end pair an unique correlation ID is used that is + * returned from the start API call and needs to be passed into the end API + * call. + * + * \subsection EVENT_ATTRIBUTES Event Attributes + * + * \ref MARKERS_AND_RANGES can be annotated with various attributes to provide + * additional information for an event or to guide the tool's visualization of + * the data. Each of the attributes is optional and if left unused the + * attributes fall back to a default value. The attributes include: + * - color + * - category + * + * To specify any attribute other than the text message, the \ref + * EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used. + * + * \section DOMAINS Domains + * + * Domains enable developers to scope annotations. By default all events and + * annotations are in the default domain. Additional domains can be registered. + * This allows developers to scope markers, ranges, and resources names to + * avoid conflicts. + * + * The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create + * a named domain. + * + * Each domain maintains its own + * - categories + * - thread range stacks + * - registered strings + * + * The function ::nvtxDomainDestroy marks the end of the domain. Destroying + * a domain unregisters and destroys all objects associated with it such as + * registered strings, resource objects, named categories, and started ranges. + * + * \section RESOURCE_NAMING Resource Naming + * + * This section covers calls that allow to annotate objects with user-provided + * names in order to allow for a better analysis of complex trace data. All of + * the functions take the handle or the ID of the object to name and the name. + * The functions can be called multiple times during the execution of an + * application, however, in that case it is implementation dependent which + * name will be reported by the tool. + * + * \subsection CATEGORY_NAMING Category Naming + * + * Some function in this library support associating an integer category + * to enable filtering and sorting. The category naming functions allow + * the application to associate a user friendly name with the integer + * category. Support for domains have been added in NVTX_VERSION_2 to + * avoid collisions when domains are developed independantly. + * + * \subsection RESOURCE_OBJECTS Resource Objects + * + * Resource objects are a generic mechanism for attaching data to an application + * resource. The identifier field makes the association to a pointer or handle, + * while the type field helps provide deeper understanding of the identifier as + * well as enabling differentiation in cases where handles generated by different + * APIs may collide. The resource object may also have an associated message to + * associate with the application resource, enabling further annotation of this + * object and how it is used. + * + * The resource object was introduced in NVTX_VERSION_2 to supersede existing naming + * functions and allow the application resource identified by those functions to be + * associated to a domain. The other naming functions are still supported for backward + * compatibility but will be associated only to the default domain. + * + * \subsection RESOURCE_NAMING_OS Resource Naming + * + * Some operating system resources creation APIs do not support providing a user friendly + * name, such as some OS thread creation APIs. This API support resource naming though + * both through resource objects and functions following the pattern + * nvtxName[RESOURCE_TYPE][A|W](identifier, name). Resource objects introduced in NVTX_VERSION 2 + * supersede the other functions with a a more general method of assigning names to OS resources, + * along with associating them to domains too. The older nvtxName* functions are only associated + * with the default domain. + * \section EXTENSIONS Optional Extensions + * Optional extensions will either appear within the existing sections the extend or appear + * in the "Related Pages" when they introduce new concepts. + */ + + /** + * Tools Extension API version + */ +#if defined(NVTX_VERSION) && NVTX_VERSION < 3 +#error "Trying to #include NVTX version 3 in a source file where an older NVTX version has already been included. If you are not directly using NVTX (the NVIDIA Tools Extension library), you are getting this error because libraries you are using have included different versions of NVTX. Suggested solutions are: (1) reorder #includes so the newest NVTX version is included first, (2) avoid using the conflicting libraries in the same .c/.cpp file, or (3) update the library using the older NVTX version to use the newer version instead." +#endif + +/* Header guard */ +#if !defined(NVTX_VERSION) +#define NVTX_VERSION 3 + +#if defined(_MSC_VER) +#define NVTX_API __stdcall +#define NVTX_INLINE_STATIC __inline static +#else /*defined(__GNUC__)*/ +#define NVTX_API +#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +#define NVTX_INLINE_STATIC inline static +#else +#define NVTX_INLINE_STATIC static +#endif +#endif /* Platform */ + +#if defined(NVTX_NO_IMPL) +/* When omitting implementation, avoid declaring functions inline */ +/* without definitions, since this causes compiler warnings. */ +#define NVTX_DECLSPEC +#elif defined(NVTX_EXPORT_API) +/* Allow overriding definition of NVTX_DECLSPEC when exporting API. */ +/* Default is empty, meaning non-inline with external linkage. */ +#if !defined(NVTX_DECLSPEC) +#define NVTX_DECLSPEC +#endif +#else +/* Normal NVTX usage defines the NVTX API inline with static */ +/* (internal) linkage. */ +#define NVTX_DECLSPEC NVTX_INLINE_STATIC +#endif + +#include "nvtxDetail/nvtxLinkOnce.h" + +#define NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) NAME##_v##VERSION +#define NVTX_VERSIONED_IDENTIFIER_L2(NAME, VERSION) NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) +#define NVTX_VERSIONED_IDENTIFIER(NAME) NVTX_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION) + +/** + * The nvToolsExt library depends on stdint.h. If the build tool chain in use + * does not include stdint.h then define NVTX_STDINT_TYPES_ALREADY_DEFINED + * and define the following types: + * + * #define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your own header file. + */ +#ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED +#include +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** +* Result Codes +*/ + +#define NVTX_SUCCESS 0 +#define NVTX_FAIL 1 +#define NVTX_ERR_INIT_LOAD_PROPERTY 2 +#define NVTX_ERR_INIT_ACCESS_LIBRARY 3 +#define NVTX_ERR_INIT_LOAD_LIBRARY 4 +#define NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT 5 +#define NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT 6 +#define NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE 7 + +/** + * Size of the nvtxEventAttributes_t structure. + */ +#define NVTX_EVENT_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxEventAttributes_t) ) ) + +#define NVTX_NO_PUSH_POP_TRACKING ((int)-2) + +typedef uint64_t nvtxRangeId_t; + +/* Forward declaration of opaque domain registration structure */ +struct nvtxDomainRegistration_st; +typedef struct nvtxDomainRegistration_st nvtxDomainRegistration; + +/* \brief Domain Handle Structure. +* \anchor DOMAIN_HANDLE_STRUCTURE +* +* This structure is opaque to the user and is used as a handle to reference +* a domain. This type is returned from tools when using the NVTX API to +* create a domain. +* +*/ +typedef nvtxDomainRegistration* nvtxDomainHandle_t; + +/* Forward declaration of opaque string registration structure */ +struct nvtxStringRegistration_st; +typedef struct nvtxStringRegistration_st nvtxStringRegistration; + +/* \brief Registered String Handle Structure. +* \anchor REGISTERED_STRING_HANDLE_STRUCTURE +* +* This structure is opaque to the user and is used as a handle to reference +* a registered string. This type is returned from tools when using the NVTX +* API to create a registered string. +* +*/ +typedef nvtxStringRegistration* nvtxStringHandle_t; + +/* ========================================================================= */ +/** \defgroup GENERAL General + * @{ + */ + +/** --------------------------------------------------------------------------- + * Color Types + * ------------------------------------------------------------------------- */ +typedef enum nvtxColorType_t +{ + NVTX_COLOR_UNKNOWN = 0, /**< Color attribute is unused. */ + NVTX_COLOR_ARGB = 1 /**< An ARGB color is provided. */ +} nvtxColorType_t; + +/** --------------------------------------------------------------------------- + * Message Types + * ------------------------------------------------------------------------- */ +typedef enum nvtxMessageType_t +{ + NVTX_MESSAGE_UNKNOWN = 0, /**< Message payload is unused. */ + NVTX_MESSAGE_TYPE_ASCII = 1, /**< A character sequence is used as payload. */ + NVTX_MESSAGE_TYPE_UNICODE = 2, /**< A wide character sequence is used as payload. */ + /* NVTX_VERSION_2 */ + NVTX_MESSAGE_TYPE_REGISTERED = 3, /**< A unique string handle that was registered + with \ref nvtxDomainRegisterStringA() or + \ref nvtxDomainRegisterStringW(). */ +} nvtxMessageType_t; + +typedef union nvtxMessageValue_t +{ + const char* ascii; + const wchar_t* unicode; + /* NVTX_VERSION_2 */ + nvtxStringHandle_t registered; +} nvtxMessageValue_t; + + +/** @} */ /*END defgroup*/ +/* ------------------------------------------------------------------------- */ +/** \brief Force initialization (optional) +* +* Force NVTX library to initialize. The first call to any NVTX API function +* will automatically initialize the entire API. This can make the first call +* much slower than subsequent calls. In applications where the first call to +* NVTX may be in a performance-critical section, calling nvtxInitialize before +* any performance-critical sections will ensure NVTX initialization occurs at +* an acceptable time. Since nvtxInitialize takes no parameters and has no +* expected behavior besides initialization, it is convenient to add a call to +* nvtxInitialize in NVTX-instrumented applications that need to force earlier +* initialization without changing any other code. For example, if an app's +* first NVTX call is nvtxDomainCreate, and it is difficult to move that call +* earlier because the domain handle must be stored in an object only created +* at that point, adding a call to nvtxInitialize at the top of main() will +* ensure the later call to nvtxDomainCreate is as fast as possible. +* +* \version \NVTX_VERSION_3 +* +* \param reserved - must be zero or NULL. +* +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved); +/** @} */ + + +/** @} */ /*END defgroup*/ + +/* ========================================================================= */ +/** \defgroup EVENT_ATTRIBUTES Event Attributes +* @{ +*/ + +/** --------------------------------------------------------------------------- +* Payload Types +* ------------------------------------------------------------------------- */ +typedef enum nvtxPayloadType_t +{ + NVTX_PAYLOAD_UNKNOWN = 0, /**< Color payload is unused. */ + NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1, /**< A 64 bit unsigned integer value is used as payload. */ + NVTX_PAYLOAD_TYPE_INT64 = 2, /**< A 64 bit signed integer value is used as payload. */ + NVTX_PAYLOAD_TYPE_DOUBLE = 3, /**< A 64 bit floating point value is used as payload. */ + /* NVTX_VERSION_2 */ + NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4, /**< A 32 bit floating point value is used as payload. */ + NVTX_PAYLOAD_TYPE_INT32 = 5, /**< A 32 bit floating point value is used as payload. */ + NVTX_PAYLOAD_TYPE_FLOAT = 6 /**< A 32 bit floating point value is used as payload. */ +} nvtxPayloadType_t; + +/** \brief Event Attribute Structure. + * \anchor EVENT_ATTRIBUTE_STRUCTURE + * + * This structure is used to describe the attributes of an event. The layout of + * the structure is defined by a specific version of the tools extension + * library and can change between different versions of the Tools Extension + * library. + * + * \par Initializing the Attributes + * + * The caller should always perform the following three tasks when using + * attributes: + *
    + *
  • Zero the structure + *
  • Set the version field + *
  • Set the size field + *
+ * + * Zeroing the structure sets all the event attributes types and values + * to the default value. + * + * The version and size field are used by the Tools Extension + * implementation to handle multiple versions of the attributes structure. + * + * It is recommended that the caller use one of the following to methods + * to initialize the event attributes structure: + * + * \par Method 1: Initializing nvtxEventAttributes for future compatibility + * \code + * nvtxEventAttributes_t eventAttrib = {0}; + * eventAttrib.version = NVTX_VERSION; + * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + * \endcode + * + * \par Method 2: Initializing nvtxEventAttributes for a specific version + * \code + * nvtxEventAttributes_t eventAttrib = {0}; + * eventAttrib.version = 1; + * eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1)); + * \endcode + * + * If the caller uses Method 1 it is critical that the entire binary + * layout of the structure be configured to 0 so that all fields + * are initialized to the default value. + * + * The caller should either use both NVTX_VERSION and + * NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values + * and a versioned type (Method 2). Using a mix of the two methods + * will likely cause either source level incompatibility or binary + * incompatibility in the future. + * + * \par Settings Attribute Types and Values + * + * + * \par Example: + * \code + * // Initialize + * nvtxEventAttributes_t eventAttrib = {0}; + * eventAttrib.version = NVTX_VERSION; + * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + * + * // Configure the Attributes + * eventAttrib.colorType = NVTX_COLOR_ARGB; + * eventAttrib.color = 0xFF880000; + * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + * eventAttrib.message.ascii = "Example"; + * \endcode + * + * In the example the caller does not have to set the value of + * \ref ::nvtxEventAttributes_v2::category or + * \ref ::nvtxEventAttributes_v2::payload as these fields were set to + * the default value by {0}. + * \sa + * ::nvtxDomainMarkEx + * ::nvtxDomainRangeStartEx + * ::nvtxDomainRangePushEx + */ +typedef struct nvtxEventAttributes_v2 +{ + /** + * \brief Version flag of the structure. + * + * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs + * supported in this header file. This can optionally be overridden to + * another version of the tools extension library. + */ + uint16_t version; + + /** + * \brief Size of the structure. + * + * Needs to be set to the size in bytes of the event attribute + * structure used to specify the event. + */ + uint16_t size; + + /** + * \brief ID of the category the event is assigned to. + * + * A category is a user-controlled ID that can be used to group + * events. The tool may use category IDs to improve filtering or + * enable grouping of events in the same category. The functions + * \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used + * to name a category. + * + * Default Value is 0 + */ + uint32_t category; + + /** \brief Color type specified in this attribute structure. + * + * Defines the color format of the attribute structure's \ref COLOR_FIELD + * "color" field. + * + * Default Value is NVTX_COLOR_UNKNOWN + */ + int32_t colorType; /* nvtxColorType_t */ + + /** \brief Color assigned to this event. \anchor COLOR_FIELD + * + * The color that the tool should use to visualize the event. + */ + uint32_t color; + + /** + * \brief Payload type specified in this attribute structure. + * + * Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD + * "payload" field. + * + * Default Value is NVTX_PAYLOAD_UNKNOWN + */ + int32_t payloadType; /* nvtxPayloadType_t */ + + int32_t reserved0; + + /** + * \brief Payload assigned to this event. \anchor PAYLOAD_FIELD + * + * A numerical value that can be used to annotate an event. The tool could + * use the payload data to reconstruct graphs and diagrams. + */ + union payload_t + { + uint64_t ullValue; + int64_t llValue; + double dValue; + /* NVTX_VERSION_2 */ + uint32_t uiValue; + int32_t iValue; + float fValue; + } payload; + + /** \brief Message type specified in this attribute structure. + * + * Defines the message format of the attribute structure's \ref MESSAGE_FIELD + * "message" field. + * + * Default Value is NVTX_MESSAGE_UNKNOWN + */ + int32_t messageType; /* nvtxMessageType_t */ + + /** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD + * + * The text message that is attached to an event. + */ + nvtxMessageValue_t message; + +} nvtxEventAttributes_v2; + +typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t; + +/** @} */ /*END defgroup*/ +/* ========================================================================= */ +/** \defgroup MARKERS_AND_RANGES Markers and Ranges + * + * See \ref MARKERS_AND_RANGES for more details + * + * @{ + */ + +/** \name Marker */ + +/* ------------------------------------------------------------------------- */ +/** \brief Marks an instantaneous event in the application. +* +* A marker can contain a text message or specify additional information +* using the event attributes structure. These attributes include a text +* message, color, category, and a payload. Each of the attributes is optional +* and can only be sent out using the \ref nvtxDomainMarkEx function. +* +* nvtxDomainMarkEx(NULL, event) is equivalent to calling +* nvtxMarkEx(event). +* +* \param domain - The domain of scoping the category. +* \param eventAttrib - The event attribute structure defining the marker's +* attribute types and attribute values. +* +* \sa +* ::nvtxMarkEx +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Marks an instantaneous event in the application. + * + * A marker can contain a text message or specify additional information + * using the event attributes structure. These attributes include a text + * message, color, category, and a payload. Each of the attributes is optional + * and can only be sent out using the \ref nvtxMarkEx function. + * If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker + * or if an attribute is unspecified then a default value will be used. + * + * \param eventAttrib - The event attribute structure defining the marker's + * attribute types and attribute values. + * + * \par Example: + * \code + * // zero the structure + * nvtxEventAttributes_t eventAttrib = {0}; + * // set the version and the size information + * eventAttrib.version = NVTX_VERSION; + * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + * // configure the attributes. 0 is the default for all attributes. + * eventAttrib.colorType = NVTX_COLOR_ARGB; + * eventAttrib.color = 0xFF880000; + * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + * eventAttrib.message.ascii = "Example nvtxMarkEx"; + * nvtxMarkEx(&eventAttrib); + * \endcode + * + * \sa + * ::nvtxDomainMarkEx + * + * \version \NVTX_VERSION_1 + * @{ */ +NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Marks an instantaneous event in the application. + * + * A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a + * text message. + * + * \param message - The message associated to this marker event. + * + * \par Example: + * \code + * nvtxMarkA("Example nvtxMarkA"); + * nvtxMarkW(L"Example nvtxMarkW"); + * \endcode + * + * \sa + * ::nvtxDomainMarkEx + * ::nvtxMarkEx + * + * \version \NVTX_VERSION_0 + * @{ */ +NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message); +NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message); +/** @} */ + + +/** \name Process Ranges */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a process range in a domain. +* +* \param domain - The domain of scoping the category. +* \param eventAttrib - The event attribute structure defining the range's +* attribute types and attribute values. +* +* \return The unique ID used to correlate a pair of Start and End events. +* +* \remarks Ranges defined by Start/End can overlap. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain"); +* nvtxEventAttributes_t eventAttrib = {0}; +* eventAttrib.version = NVTX_VERSION; +* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; +* eventAttrib.message.ascii = "my range"; +* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib); +* // ... +* nvtxDomainRangeEnd(rangeId); +* \endcode +* +* \sa +* ::nvtxDomainRangeEnd +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a process range. + * + * \param eventAttrib - The event attribute structure defining the range's + * attribute types and attribute values. + * + * \return The unique ID used to correlate a pair of Start and End events. + * + * \remarks Ranges defined by Start/End can overlap. + * + * \par Example: + * \code + * nvtxEventAttributes_t eventAttrib = {0}; + * eventAttrib.version = NVTX_VERSION; + * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + * eventAttrib.category = 3; + * eventAttrib.colorType = NVTX_COLOR_ARGB; + * eventAttrib.color = 0xFF0088FF; + * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + * eventAttrib.message.ascii = "Example Range"; + * nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib); + * // ... + * nvtxRangeEnd(rangeId); + * \endcode + * + * \sa + * ::nvtxRangeEnd + * ::nvtxDomainRangeStartEx + * + * \version \NVTX_VERSION_1 + * @{ */ +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a process range. + * + * \param message - The event message associated to this range event. + * + * \return The unique ID used to correlate a pair of Start and End events. + * + * \remarks Ranges defined by Start/End can overlap. + * + * \par Example: + * \code + * nvtxRangeId_t r1 = nvtxRangeStartA("Range 1"); + * nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2"); + * nvtxRangeEnd(r1); + * nvtxRangeEnd(r2); + * \endcode + * + * \sa + * ::nvtxRangeEnd + * ::nvtxRangeStartEx + * ::nvtxDomainRangeStartEx + * + * \version \NVTX_VERSION_0 + * @{ */ +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message); +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Ends a process range. +* +* \param domain - The domain +* \param id - The correlation ID returned from a nvtxRangeStart call. +* +* \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd. +* It does not need a domain param since that is associated iwth the range ID at ::nvtxDomainRangeStartEx +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain"); +* nvtxEventAttributes_t eventAttrib = {0}; +* eventAttrib.version = NVTX_VERSION; +* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; +* eventAttrib.message.ascii = "my range"; +* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib); +* // ... +* nvtxDomainRangeEnd(rangeId); +* \endcode +* +* \sa +* ::nvtxDomainRangeStartEx +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Ends a process range. + * + * \param id - The correlation ID returned from an nvtxRangeStart call. + * + * \sa + * ::nvtxDomainRangeStartEx + * ::nvtxRangeStartEx + * ::nvtxRangeStartA + * ::nvtxRangeStartW + * + * \version \NVTX_VERSION_0 + * @{ */ +NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id); +/** @} */ + +/** \name Thread Ranges */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a nested thread range. +* +* \param domain - The domain of scoping. +* \param eventAttrib - The event attribute structure defining the range's +* attribute types and attribute values. +* +* \return The 0 based level of range being started. This value is scoped to the domain. +* If an error occurs, a negative value is returned. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain"); +* nvtxEventAttributes_t eventAttrib = {0}; +* eventAttrib.version = NVTX_VERSION; +* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib.colorType = NVTX_COLOR_ARGB; +* eventAttrib.color = 0xFFFF0000; +* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; +* eventAttrib.message.ascii = "Level 0"; +* nvtxDomainRangePushEx(domain, &eventAttrib); +* +* // Re-use eventAttrib +* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE; +* eventAttrib.message.unicode = L"Level 1"; +* nvtxDomainRangePushEx(domain, &eventAttrib); +* +* nvtxDomainRangePop(domain); //level 1 +* nvtxDomainRangePop(domain); //level 0 +* \endcode +* +* \sa +* ::nvtxDomainRangePop +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a nested thread range. + * + * \param eventAttrib - The event attribute structure defining the range's + * attribute types and attribute values. + * + * \return The 0 based level of range being started. This level is per domain. + * If an error occurs a negative value is returned. + * + * \par Example: + * \code + * nvtxEventAttributes_t eventAttrib = {0}; + * eventAttrib.version = NVTX_VERSION; + * eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + * eventAttrib.colorType = NVTX_COLOR_ARGB; + * eventAttrib.color = 0xFFFF0000; + * eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + * eventAttrib.message.ascii = "Level 0"; + * nvtxRangePushEx(&eventAttrib); + * + * // Re-use eventAttrib + * eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE; + * eventAttrib.message.unicode = L"Level 1"; + * nvtxRangePushEx(&eventAttrib); + * + * nvtxRangePop(); + * nvtxRangePop(); + * \endcode + * + * \sa + * ::nvtxDomainRangePushEx + * ::nvtxRangePop + * + * \version \NVTX_VERSION_1 + * @{ */ +NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Starts a nested thread range. + * + * \param message - The event message associated to this range event. + * + * \return The 0 based level of range being started. If an error occurs a + * negative value is returned. + * + * \par Example: + * \code + * nvtxRangePushA("Level 0"); + * nvtxRangePushW(L"Level 1"); + * nvtxRangePop(); + * nvtxRangePop(); + * \endcode + * + * \sa + * ::nvtxDomainRangePushEx + * ::nvtxRangePop + * + * \version \NVTX_VERSION_0 + * @{ */ +NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message); +NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message); +/** @} */ + + +/* ------------------------------------------------------------------------- */ +/** \brief Ends a nested thread range. +* +* \return The level of the range being ended. If an error occurs a negative +* value is returned on the current thread. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreate("example library"); +* nvtxDomainRangePushA(domain, "Level 0"); +* nvtxDomainRangePushW(domain, L"Level 1"); +* nvtxDomainRangePop(domain); +* nvtxDomainRangePop(domain); +* \endcode +* +* \sa +* ::nvtxRangePushEx +* ::nvtxRangePushA +* ::nvtxRangePushW +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Ends a nested thread range. + * + * \return The level of the range being ended. If an error occurs a negative + * value is returned on the current thread. + * + * \par Example: + * \code + * nvtxRangePushA("Level 0"); + * nvtxRangePushW(L"Level 1"); + * nvtxRangePop(); + * nvtxRangePop(); + * \endcode + * + * \sa + * ::nvtxRangePushEx + * ::nvtxRangePushA + * ::nvtxRangePushW + * + * \version \NVTX_VERSION_0 + * @{ */ +NVTX_DECLSPEC int NVTX_API nvtxRangePop(void); +/** @} */ + + +/** @} */ /*END defgroup*/ +/* ========================================================================= */ +/** \defgroup RESOURCE_NAMING Resource Naming + * + * See \ref RESOURCE_NAMING for more details + * + * @{ + */ + + +/* ------------------------------------------------------------------------- */ +/** \name Functions for Generic Resource Naming*/ +/* ------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------- */ +/** \cond SHOW_HIDDEN +* \brief Resource typing helpers. +* +* Classes are used to make it easy to create a series of resource types +* per API without collisions +*/ +#define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) ((((uint32_t)(NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|((uint32_t)(INDEX))) +#define NVTX_RESOURCE_CLASS_GENERIC 1 +/** \endcond */ + +/* ------------------------------------------------------------------------- */ +/** \brief Generic resource type for when a resource class is not available. +* +* \sa +* ::nvtxDomainResourceCreate +* +* \version \NVTX_VERSION_2 +*/ +typedef enum nvtxResourceGenericType_t +{ + NVTX_RESOURCE_TYPE_UNKNOWN = 0, + NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */ + NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */ + NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */ + NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */ +} nvtxResourceGenericType_t; + + + +/** \brief Resource Attribute Structure. +* \anchor RESOURCE_ATTRIBUTE_STRUCTURE +* +* This structure is used to describe the attributes of a resource. The layout of +* the structure is defined by a specific version of the tools extension +* library and can change between different versions of the Tools Extension +* library. +* +* \par Initializing the Attributes +* +* The caller should always perform the following three tasks when using +* attributes: +*
    +*
  • Zero the structure +*
  • Set the version field +*
  • Set the size field +*
+* +* Zeroing the structure sets all the resource attributes types and values +* to the default value. +* +* The version and size field are used by the Tools Extension +* implementation to handle multiple versions of the attributes structure. +* +* It is recommended that the caller use one of the following to methods +* to initialize the event attributes structure: +* +* \par Method 1: Initializing nvtxEventAttributes for future compatibility +* \code +* nvtxResourceAttributes_t attribs = {0}; +* attribs.version = NVTX_VERSION; +* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; +* \endcode +* +* \par Method 2: Initializing nvtxEventAttributes for a specific version +* \code +* nvtxResourceAttributes_v0 attribs = {0}; +* attribs.version = 2; +* attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0)); +* \endcode +* +* If the caller uses Method 1 it is critical that the entire binary +* layout of the structure be configured to 0 so that all fields +* are initialized to the default value. +* +* The caller should either use both NVTX_VERSION and +* NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values +* and a versioned type (Method 2). Using a mix of the two methods +* will likely cause either source level incompatibility or binary +* incompatibility in the future. +* +* \par Settings Attribute Types and Values +* +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain"); +* +* // Initialize +* nvtxResourceAttributes_t attribs = {0}; +* attribs.version = NVTX_VERSION; +* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; +* +* // Configure the Attributes +* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER; +* attribs.identifier.pValue = (const void*)pMutex; +* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII; +* attribs.message.ascii = "Single thread access to database."; +* +* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs); +* \endcode +* +* \sa +* ::nvtxDomainResourceCreate +*/ +typedef struct nvtxResourceAttributes_v0 +{ + /** + * \brief Version flag of the structure. + * + * Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs + * supported in this header file. This can optionally be overridden to + * another version of the tools extension library. + */ + uint16_t version; + + /** + * \brief Size of the structure. + * + * Needs to be set to the size in bytes of this attribute + * structure. + */ + uint16_t size; + + /** + * \brief Identifier type specifies how to interpret the identifier field + * + * Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD + * "identifier" field. + * + * Default Value is NVTX_RESOURCE_TYPE_UNKNOWN + */ + int32_t identifierType; /* values from enums following the pattern nvtxResource[name]Type_t */ + + /** + * \brief Identifier for the resource. + * \anchor RESOURCE_IDENTIFIER_FIELD + * + * An identifier may be a pointer or a handle to an OS or middleware API object. + * The resource type will assist in avoiding collisions where handles values may collide. + */ + union identifier_t + { + const void* pValue; + uint64_t ullValue; + } identifier; + + /** \brief Message type specified in this attribute structure. + * + * Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD + * "message" field. + * + * Default Value is NVTX_MESSAGE_UNKNOWN + */ + int32_t messageType; /* nvtxMessageType_t */ + + /** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD + * + * The text message that is attached to a resource. + */ + nvtxMessageValue_t message; + +} nvtxResourceAttributes_v0; + +typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t; + +/* \cond SHOW_HIDDEN +* \version \NVTX_VERSION_2 +*/ +#define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxResourceAttributes_v0) ) ) +typedef struct nvtxResourceHandle* nvtxResourceHandle_t; +/** \endcond */ + + + +/* ------------------------------------------------------------------------- */ +/** \brief Create a resource object to track and associate data with OS and middleware objects +* +* Allows users to associate an API handle or pointer with a user-provided name. +* +* +* \param domain - Domain to own the resource object +* \param attribs - Attributes to be associated with the resource +* +* \return A handle that represents the newly created resource object. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain"); +* nvtxResourceAttributes_t attribs = {0}; +* attribs.version = NVTX_VERSION; +* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; +* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER; +* attribs.identifier.pValue = (const void*)pMutex; +* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII; +* attribs.message.ascii = "Single thread access to database."; +* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs); +* \endcode +* +* \sa +* ::nvtxResourceAttributes_t +* ::nvtxDomainResourceDestroy +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Destroy a resource object to track and associate data with OS and middleware objects +* +* Allows users to associate an API handle or pointer with a user-provided name. +* +* \param resource - Handle to the resource in which to operate. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain"); +* nvtxResourceAttributes_t attribs = {0}; +* attribs.version = NVTX_VERSION; +* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; +* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER; +* attribs.identifier.pValue = (const void*)pMutex; +* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII; +* attribs.message.ascii = "Single thread access to database."; +* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs); +* nvtxDomainResourceDestroy(handle); +* \endcode +* +* \sa +* ::nvtxDomainResourceCreate +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource); +/** @} */ + + +/** \name Functions for NVTX Category Naming*/ + +/* ------------------------------------------------------------------------- */ +/** +* \brief Annotate an NVTX category used within a domain. +* +* Categories are used to group sets of events. Each category is identified +* through a unique ID and that ID is passed into any of the marker/range +* events to assign that event to a specific category. The nvtxDomainNameCategory +* function calls allow the user to assign a name to a category ID that is +* specific to the domain. +* +* nvtxDomainNameCategory(NULL, category, name) is equivalent to calling +* nvtxNameCategory(category, name). +* +* \param domain - The domain of scoping the category. +* \param category - The category ID to name. +* \param name - The name of the category. +* +* \remarks The category names are tracked per domain. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("example"); +* nvtxDomainNameCategoryA(domain, 1, "Memory Allocation"); +* nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer"); +* \endcode +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name); +NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name); +/** @} */ + +/** \brief Annotate an NVTX category. + * + * Categories are used to group sets of events. Each category is identified + * through a unique ID and that ID is passed into any of the marker/range + * events to assign that event to a specific category. The nvtxNameCategory + * function calls allow the user to assign a name to a category ID. + * + * \param category - The category ID to name. + * \param name - The name of the category. + * + * \remarks The category names are tracked per process. + * + * \par Example: + * \code + * nvtxNameCategory(1, "Memory Allocation"); + * nvtxNameCategory(2, "Memory Transfer"); + * nvtxNameCategory(3, "Memory Object Lifetime"); + * \endcode + * + * \version \NVTX_VERSION_1 + * @{ */ +NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name); +NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name); +/** @} */ + +/** \name Functions for OS Threads Naming*/ + +/* ------------------------------------------------------------------------- */ +/** \brief Annotate an OS thread. + * + * Allows the user to name an active thread of the current process. If an + * invalid thread ID is provided or a thread ID from a different process is + * used the behavior of the tool is implementation dependent. + * + * Tools expect thread ID to be a number that uniquely identifies the thread + * at the time of the call. Note that a thread's ID can be reused after + * it is destroyed. Tools may choose how to handle aliasing of thread IDs. + * + * POSIX pthread_t type returned by pthread_self() may not comply with these + * expectations. Please use OS-specific thread ID instead of pthread_t. + * + * The thread name is associated to the default domain. To support domains + * use resource objects via ::nvtxDomainResourceCreate. + * + * \param threadId - The ID of the thread to name. + * \param name - The name of the thread. + * + * \par Examples: + * MS Windows: + * \code + * #include + * nvtxNameOsThread(GetCurrentThreadId(), "Current thread"); + * nvtxNameOsThread(GetThreadId(SomeThreadHandle), "Other thread"); + * \endcode + * + * Android: + * \code + * #include + * nvtxNameOsThreadA(gettid(), "Current thread"); + * nvtxNameOsThreadA(getpid(), "Main thread"); + * \endcode + * + * Linux: + * \code + * #include + * nvtxNameOsThreadA(syscall(SYS_gettid), "Current thread"); + * \endcode + * \code + * #include + * nvtxNameOsThreadA(getpid(), "Main thread"); + * \endcode + * + * OS X: + * \code + * #include + * nvtxNameOsThreadA(syscall(SYS_thread_selfid), "Current thread"); + * \endcode + * \code + * #include + * __uint64_t id; + * pthread_threadid_np(pthread_self(), &id); + * nvtxNameOsThreadA(id, "Current thread"); + * pthread_threadid_np(somePThreadId, &id); + * nvtxNameOsThreadA(id, "Other thread"); + * \endcode + * + * \version \NVTX_VERSION_1 + * @{ */ +NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name); +NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name); +/** @} */ + + +/** @} */ /*END defgroup*/ +/* ========================================================================= */ +/** \defgroup STRING_REGISTRATION String Registration +* +* Registered strings are intended to increase performance by lowering instrumentation +* overhead. String may be registered once and the handle may be passed in place of +* a string where an the APIs may allow. +* +* See \ref STRING_REGISTRATION for more details +* +* @{ +*/ + +/* ------------------------------------------------------------------------- */ +/** \brief Register a string. + +* Registers an immutable string with NVTX. Once registered the pointer used +* to register the domain name can be used in nvtxEventAttributes_t +* \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the +* contents of the message on each event invocation. +* +* String registration is an optimization. It is recommended to use string +* registration if the string will be passed to an event many times. +* +* String are not unregistered, except that by unregistering the entire domain +* +* \param domain - Domain handle. If NULL then the global domain is used. +* \param string - A unique pointer to a sequence of characters. +* +* \return A handle representing the registered string. +* +* \par Example: +* \code +* nvtxDomainCreateA("com.nvidia.nvtx.example"); +* nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string"); +* nvtxEventAttributes_t eventAttrib = {0}; +* eventAttrib.version = NVTX_VERSION; +* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED; +* eventAttrib.message.registered = message; +* \endcode +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string); +NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string); +/** @} */ + +/** @} */ /*END defgroup*/ +/* ========================================================================= */ +/** \defgroup DOMAINS Domains +* +* Domains are used to group events to a developer defined scope. Middleware +* vendors may also scope their own events to avoid collisions with the +* the application developer's events, so that the application developer may +* inspect both parts and easily differentiate or filter them. By default +* all events are scoped to a global domain where NULL is provided or when +* using APIs provided b versions of NVTX below v2 +* +* Domains are intended to be typically long lived objects with the intention +* of logically separating events of large modules from each other such as +* middleware libraries from each other and the main application. +* +* See \ref DOMAINS for more details +* +* @{ +*/ + +/* ------------------------------------------------------------------------- */ +/** \brief Register a NVTX domain. +* +* Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1 +* annotations are scoped to the global domain. The function nvtxDomainCreate +* creates a new named domain. +* +* Each domain maintains its own nvtxRangePush and nvtxRangePop stack. +* +* \param name - A unique string representing the domain. +* +* \return A handle representing the domain. +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example"); +* +* nvtxMarkA("nvtxMarkA to global domain"); +* +* nvtxEventAttributes_t eventAttrib1 = {0}; +* eventAttrib1.version = NVTX_VERSION; +* eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain"; +* nvtxDomainMarkEx(NULL, &eventAttrib1); +* +* nvtxEventAttributes_t eventAttrib2 = {0}; +* eventAttrib2.version = NVTX_VERSION; +* eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; +* eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example"; +* nvtxDomainMarkEx(domain, &eventAttrib2); +* nvtxDomainDestroy(domain); +* \endcode +* +* \sa +* ::nvtxDomainDestroy +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name); +NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name); +/** @} */ + +/* ------------------------------------------------------------------------- */ +/** \brief Unregister a NVTX domain. +* +* Unregisters the domain handle and frees all domain specific resources. +* +* \param domain - the domain handle +* +* \par Example: +* \code +* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example"); +* nvtxDomainDestroy(domain); +* \endcode +* +* \sa +* ::nvtxDomainCreateA +* ::nvtxDomainCreateW +* +* \version \NVTX_VERSION_2 +* @{ */ +NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain); +/** @} */ + + +/** @} */ /*END defgroup*/ +/* ========================================================================= */ +/** \cond SHOW_HIDDEN */ + +#ifdef UNICODE + #define nvtxMark nvtxMarkW + #define nvtxRangeStart nvtxRangeStartW + #define nvtxRangePush nvtxRangePushW + #define nvtxNameCategory nvtxNameCategoryW + #define nvtxNameOsThread nvtxNameOsThreadW + /* NVTX_VERSION_2 */ + #define nvtxDomainCreate nvtxDomainCreateW + #define nvtxDomainRegisterString nvtxDomainRegisterStringW + #define nvtxDomainNameCategory nvtxDomainNameCategoryW +#else + #define nvtxMark nvtxMarkA + #define nvtxRangeStart nvtxRangeStartA + #define nvtxRangePush nvtxRangePushA + #define nvtxNameCategory nvtxNameCategoryA + #define nvtxNameOsThread nvtxNameOsThreadA + /* NVTX_VERSION_2 */ + #define nvtxDomainCreate nvtxDomainCreateA + #define nvtxDomainRegisterString nvtxDomainRegisterStringA + #define nvtxDomainNameCategory nvtxDomainNameCategoryA +#endif + +/** \endcond */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#define NVTX_IMPL_GUARD /* Ensure other headers cannot included directly */ + +#include "nvtxDetail/nvtxTypes.h" + +#ifndef NVTX_NO_IMPL +#include "nvtxDetail/nvtxImpl.h" +#endif /*NVTX_NO_IMPL*/ + +#undef NVTX_IMPL_GUARD + +#endif /* !defined(NVTX_VERSION) */ diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImpl.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImpl.h new file mode 100644 index 0000000..8f3d69d --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImpl.h @@ -0,0 +1,438 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +/* ---- Include required platform headers ---- */ + +#if defined(_WIN32) + +#include + +#else +#include + +#if defined(__ANDROID__) +#include +#endif + +#if defined(__linux__) || defined(__CYGWIN__) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#endif + +/* ---- Define macros used in this file ---- */ + +#define NVTX_INIT_STATE_FRESH 0 +#define NVTX_INIT_STATE_STARTED 1 +#define NVTX_INIT_STATE_COMPLETE 2 + +#ifdef NVTX_DEBUG_PRINT +#ifdef __ANDROID__ +#include +#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__); +#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__); +#else +#include +#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__) +#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__) +#endif +#else /* !defined(NVTX_DEBUG_PRINT) */ +#define NVTX_ERR(...) +#define NVTX_INFO(...) +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#ifdef __GNUC__ +#pragma GCC visibility push(hidden) +#endif + +/* ---- Forward declare all functions referenced in globals ---- */ + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)( + NvtxCallbackModule module, + NvtxFunctionTable* out_table, + unsigned int* out_size); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)( + uint32_t version); +NVTX_LINKONCE_FWDDECL_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)( + uint32_t exportTableId); + +#include "nvtxInitDecls.h" + +/* ---- Define all globals ---- */ + +typedef struct nvtxGlobals_t +{ + volatile unsigned int initState; + NvtxExportTableCallbacks etblCallbacks; + NvtxExportTableVersionInfo etblVersionInfo; + + /* Implementation function pointers */ + nvtxMarkEx_impl_fntype nvtxMarkEx_impl_fnptr; + nvtxMarkA_impl_fntype nvtxMarkA_impl_fnptr; + nvtxMarkW_impl_fntype nvtxMarkW_impl_fnptr; + nvtxRangeStartEx_impl_fntype nvtxRangeStartEx_impl_fnptr; + nvtxRangeStartA_impl_fntype nvtxRangeStartA_impl_fnptr; + nvtxRangeStartW_impl_fntype nvtxRangeStartW_impl_fnptr; + nvtxRangeEnd_impl_fntype nvtxRangeEnd_impl_fnptr; + nvtxRangePushEx_impl_fntype nvtxRangePushEx_impl_fnptr; + nvtxRangePushA_impl_fntype nvtxRangePushA_impl_fnptr; + nvtxRangePushW_impl_fntype nvtxRangePushW_impl_fnptr; + nvtxRangePop_impl_fntype nvtxRangePop_impl_fnptr; + nvtxNameCategoryA_impl_fntype nvtxNameCategoryA_impl_fnptr; + nvtxNameCategoryW_impl_fntype nvtxNameCategoryW_impl_fnptr; + nvtxNameOsThreadA_impl_fntype nvtxNameOsThreadA_impl_fnptr; + nvtxNameOsThreadW_impl_fntype nvtxNameOsThreadW_impl_fnptr; + + nvtxNameCuDeviceA_fakeimpl_fntype nvtxNameCuDeviceA_impl_fnptr; + nvtxNameCuDeviceW_fakeimpl_fntype nvtxNameCuDeviceW_impl_fnptr; + nvtxNameCuContextA_fakeimpl_fntype nvtxNameCuContextA_impl_fnptr; + nvtxNameCuContextW_fakeimpl_fntype nvtxNameCuContextW_impl_fnptr; + nvtxNameCuStreamA_fakeimpl_fntype nvtxNameCuStreamA_impl_fnptr; + nvtxNameCuStreamW_fakeimpl_fntype nvtxNameCuStreamW_impl_fnptr; + nvtxNameCuEventA_fakeimpl_fntype nvtxNameCuEventA_impl_fnptr; + nvtxNameCuEventW_fakeimpl_fntype nvtxNameCuEventW_impl_fnptr; + + nvtxNameClDeviceA_fakeimpl_fntype nvtxNameClDeviceA_impl_fnptr; + nvtxNameClDeviceW_fakeimpl_fntype nvtxNameClDeviceW_impl_fnptr; + nvtxNameClContextA_fakeimpl_fntype nvtxNameClContextA_impl_fnptr; + nvtxNameClContextW_fakeimpl_fntype nvtxNameClContextW_impl_fnptr; + nvtxNameClCommandQueueA_fakeimpl_fntype nvtxNameClCommandQueueA_impl_fnptr; + nvtxNameClCommandQueueW_fakeimpl_fntype nvtxNameClCommandQueueW_impl_fnptr; + nvtxNameClMemObjectA_fakeimpl_fntype nvtxNameClMemObjectA_impl_fnptr; + nvtxNameClMemObjectW_fakeimpl_fntype nvtxNameClMemObjectW_impl_fnptr; + nvtxNameClSamplerA_fakeimpl_fntype nvtxNameClSamplerA_impl_fnptr; + nvtxNameClSamplerW_fakeimpl_fntype nvtxNameClSamplerW_impl_fnptr; + nvtxNameClProgramA_fakeimpl_fntype nvtxNameClProgramA_impl_fnptr; + nvtxNameClProgramW_fakeimpl_fntype nvtxNameClProgramW_impl_fnptr; + nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr; + nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr; + + nvtxNameCudaDeviceA_impl_fntype nvtxNameCudaDeviceA_impl_fnptr; + nvtxNameCudaDeviceW_impl_fntype nvtxNameCudaDeviceW_impl_fnptr; + nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr; + nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr; + nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr; + nvtxNameCudaEventW_fakeimpl_fntype nvtxNameCudaEventW_impl_fnptr; + + nvtxDomainMarkEx_impl_fntype nvtxDomainMarkEx_impl_fnptr; + nvtxDomainRangeStartEx_impl_fntype nvtxDomainRangeStartEx_impl_fnptr; + nvtxDomainRangeEnd_impl_fntype nvtxDomainRangeEnd_impl_fnptr; + nvtxDomainRangePushEx_impl_fntype nvtxDomainRangePushEx_impl_fnptr; + nvtxDomainRangePop_impl_fntype nvtxDomainRangePop_impl_fnptr; + nvtxDomainResourceCreate_impl_fntype nvtxDomainResourceCreate_impl_fnptr; + nvtxDomainResourceDestroy_impl_fntype nvtxDomainResourceDestroy_impl_fnptr; + nvtxDomainNameCategoryA_impl_fntype nvtxDomainNameCategoryA_impl_fnptr; + nvtxDomainNameCategoryW_impl_fntype nvtxDomainNameCategoryW_impl_fnptr; + nvtxDomainRegisterStringA_impl_fntype nvtxDomainRegisterStringA_impl_fnptr; + nvtxDomainRegisterStringW_impl_fntype nvtxDomainRegisterStringW_impl_fnptr; + nvtxDomainCreateA_impl_fntype nvtxDomainCreateA_impl_fnptr; + nvtxDomainCreateW_impl_fntype nvtxDomainCreateW_impl_fnptr; + nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr; + nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr; + + nvtxDomainSyncUserCreate_impl_fntype nvtxDomainSyncUserCreate_impl_fnptr; + nvtxDomainSyncUserDestroy_impl_fntype nvtxDomainSyncUserDestroy_impl_fnptr; + nvtxDomainSyncUserAcquireStart_impl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr; + nvtxDomainSyncUserAcquireFailed_impl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr; + nvtxDomainSyncUserAcquireSuccess_impl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr; + nvtxDomainSyncUserReleasing_impl_fntype nvtxDomainSyncUserReleasing_impl_fnptr; + + /* Tables of function pointers -- Extra null added to the end to ensure + * a crash instead of silent corruption if a tool reads off the end. */ + NvtxFunctionPointer* functionTable_CORE [NVTX_CBID_CORE_SIZE + 1]; + NvtxFunctionPointer* functionTable_CUDA [NVTX_CBID_CUDA_SIZE + 1]; + NvtxFunctionPointer* functionTable_OPENCL[NVTX_CBID_OPENCL_SIZE + 1]; + NvtxFunctionPointer* functionTable_CUDART[NVTX_CBID_CUDART_SIZE + 1]; + NvtxFunctionPointer* functionTable_CORE2 [NVTX_CBID_CORE2_SIZE + 1]; + NvtxFunctionPointer* functionTable_SYNC [NVTX_CBID_SYNC_SIZE + 1]; +} nvtxGlobals_t; + +NVTX_LINKONCE_DEFINE_GLOBAL nvtxGlobals_t NVTX_VERSIONED_IDENTIFIER(nvtxGlobals) = +{ + NVTX_INIT_STATE_FRESH, + + { + sizeof(NvtxExportTableCallbacks), + NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable) + }, + { + sizeof(NvtxExportTableVersionInfo), + NVTX_VERSION, + 0, + NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion) + }, + + /* Implementation function pointers */ + NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init), + + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init), + + NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init), + + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init), + + NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init), + + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init), + NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init), + + /* Tables of function pointers */ + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr, + 0 + }, + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr, + 0 + }, + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr, + 0 + }, + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr, + 0 + }, + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr, + 0 + }, + { + 0, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr, + (NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr, + 0 + } +}; + +/* ---- Define static inline implementations of core API functions ---- */ + +#include "nvtxImplCore.h" + +/* ---- Define implementations of export table functions ---- */ + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)( + NvtxCallbackModule module, + NvtxFunctionTable* out_table, + unsigned int* out_size) +{ + unsigned int bytes = 0; + NvtxFunctionTable table = (NvtxFunctionTable)0; + + switch (module) + { + case NVTX_CB_MODULE_CORE: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE); + break; + case NVTX_CB_MODULE_CUDA: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA); + break; + case NVTX_CB_MODULE_OPENCL: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL); + break; + case NVTX_CB_MODULE_CUDART: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART); + break; + case NVTX_CB_MODULE_CORE2: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2); + break; + case NVTX_CB_MODULE_SYNC: + table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC; + bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC); + break; + default: return 0; + } + + if (out_size) + *out_size = (bytes / (unsigned int)sizeof(NvtxFunctionPointer*)) - 1; + + if (out_table) + *out_table = table; + + return 1; +} + +NVTX_LINKONCE_DEFINE_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(uint32_t exportTableId) +{ + switch (exportTableId) + { + case NVTX_ETID_CALLBACKS: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblCallbacks; + case NVTX_ETID_VERSIONINFO: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblVersionInfo; + default: return 0; + } +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(uint32_t version) +{ + /* Reserved for custom implementations to resolve problems with tools */ + (void)version; +} + +/* ---- Define implementations of init versions of all API functions ---- */ + +#include "nvtxInitDefs.h" + +/* ---- Define implementations of initialization functions ---- */ + +#include "nvtxInit.h" + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImplCore.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImplCore.h new file mode 100644 index 0000000..3c2353f --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxImplCore.h @@ -0,0 +1,307 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr; + if(local!=0) + (*local)(eventAttrib); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message) +{ +#ifndef NVTX_DISABLE + nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr; + if(local!=0) + (*local)(message); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message) +{ +#ifndef NVTX_DISABLE + nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr; + if(local!=0) + (*local)(message); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr; + if(local!=0) + return (*local)(eventAttrib); + else +#endif /*NVTX_DISABLE*/ + return (nvtxRangeId_t)0; +} + +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message) +{ +#ifndef NVTX_DISABLE + nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (nvtxRangeId_t)0; +} + +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message) +{ +#ifndef NVTX_DISABLE + nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (nvtxRangeId_t)0; +} + +NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id) +{ +#ifndef NVTX_DISABLE + nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr; + if(local!=0) + (*local)(id); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr; + if(local!=0) + return (*local)(eventAttrib); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message) +{ +#ifndef NVTX_DISABLE + nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message) +{ +#ifndef NVTX_DISABLE + nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC int NVTX_API nvtxRangePop(void) +{ +#ifndef NVTX_DISABLE + nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr; + if(local!=0) + return (*local)(); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name) +{ +#ifndef NVTX_DISABLE + nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr; + if(local!=0) + (*local)(category, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name) +{ +#ifndef NVTX_DISABLE + nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr; + if(local!=0) + (*local)(category, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name) +{ +#ifndef NVTX_DISABLE + nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr; + if(local!=0) + (*local)(threadId, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name) +{ +#ifndef NVTX_DISABLE + nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr; + if(local!=0) + (*local)(threadId, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr; + if(local!=0) + (*local)(domain, eventAttrib); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr; + if(local!=0) + return (*local)(domain, eventAttrib); + else +#endif /*NVTX_DISABLE*/ + return (nvtxRangeId_t)0; +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id) +{ +#ifndef NVTX_DISABLE + nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr; + if(local!=0) + (*local)(domain, id); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib) +{ +#ifndef NVTX_DISABLE + nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr; + if(local!=0) + return (*local)(domain, eventAttrib); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain) +{ +#ifndef NVTX_DISABLE + nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr; + if(local!=0) + return (*local)(domain); + else +#endif /*NVTX_DISABLE*/ + return (int)NVTX_NO_PUSH_POP_TRACKING; +} + +NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs) +{ +#ifndef NVTX_DISABLE + nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr; + if(local!=0) + return (*local)(domain, attribs); + else +#endif /*NVTX_DISABLE*/ + return (nvtxResourceHandle_t)0; +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource) +{ +#ifndef NVTX_DISABLE + nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr; + if(local!=0) + (*local)(resource); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name) +{ +#ifndef NVTX_DISABLE + nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr; + if(local!=0) + (*local)(domain, category, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name) +{ +#ifndef NVTX_DISABLE + nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr; + if(local!=0) + (*local)(domain, category, name); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string) +{ +#ifndef NVTX_DISABLE + nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr; + if(local!=0) + return (*local)(domain, string); + else +#endif /*NVTX_DISABLE*/ + return (nvtxStringHandle_t)0; +} + +NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string) +{ +#ifndef NVTX_DISABLE + nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr; + if(local!=0) + return (*local)(domain, string); + else +#endif /*NVTX_DISABLE*/ + return (nvtxStringHandle_t)0; +} + +NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message) +{ +#ifndef NVTX_DISABLE + nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (nvtxDomainHandle_t)0; +} + +NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message) +{ +#ifndef NVTX_DISABLE + nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr; + if(local!=0) + return (*local)(message); + else +#endif /*NVTX_DISABLE*/ + return (nvtxDomainHandle_t)0; +} + +NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain) +{ +#ifndef NVTX_DISABLE + nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr; + if(local!=0) + (*local)(domain); +#endif /*NVTX_DISABLE*/ +} + +NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved) +{ +#ifndef NVTX_DISABLE + nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr; + if(local!=0) + (*local)(reserved); +#endif /*NVTX_DISABLE*/ +} diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInit.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInit.h new file mode 100644 index 0000000..91fcc29 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInit.h @@ -0,0 +1,312 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +/* ---- Platform-independent helper definitions and functions ---- */ + +/* Prefer macros over inline functions to reduce symbol resolution at link time */ + +#if defined(_WIN32) +#define NVTX_PATHCHAR wchar_t +#define NVTX_STR(x) L##x +#define NVTX_GETENV _wgetenv +#define NVTX_BUFSIZE MAX_PATH +#define NVTX_DLLHANDLE HMODULE +#define NVTX_DLLOPEN(x) LoadLibraryW(x) +#define NVTX_DLLFUNC GetProcAddress +#define NVTX_DLLCLOSE FreeLibrary +#define NVTX_YIELD() SwitchToThread() +#define NVTX_MEMBAR() MemoryBarrier() +#define NVTX_ATOMIC_WRITE_32(address, value) InterlockedExchange((volatile LONG*)address, value) +#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) old = InterlockedCompareExchange((volatile LONG*)address, exchange, comparand) +#elif defined(__GNUC__) +#define NVTX_PATHCHAR char +#define NVTX_STR(x) x +#define NVTX_GETENV getenv +#define NVTX_BUFSIZE PATH_MAX +#define NVTX_DLLHANDLE void* +#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY) +#define NVTX_DLLFUNC dlsym +#define NVTX_DLLCLOSE dlclose +#define NVTX_YIELD() sched_yield() +#define NVTX_MEMBAR() __sync_synchronize() +/* Ensure full memory barrier for atomics, to match Windows functions */ +#define NVTX_ATOMIC_WRITE_32(address, value) __sync_synchronize(); __sync_lock_test_and_set(address, value) +#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand) +#else +#error The library does not support your configuration! +#endif + +/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */ +#if defined(_WIN32) +/* TODO */ +#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0 +#else +#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0 +#endif + +/* Define this to 1 for platforms that support environment variables */ +/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */ +/* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */ +#define NVTX_SUPPORT_ENV_VARS 1 + +/* Define this to 1 for platforms that support dynamic/shared libraries */ +#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1 + +/* Injection libraries implementing InitializeInjectionNvtx2 may be statically linked, +* and this will override any dynamic injection. Useful for platforms where dynamic +* injection is not available. Since weak symbols not explicitly marked extern are +* guaranteed to be initialized to zero if no definitions are found by the linker, the +* dynamic injection process proceeds normally if pfnInitializeInjectionNvtx2 is 0. */ +#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__) +#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1 +/* To statically inject an NVTX library, define InitializeInjectionNvtx2_fnptr as a normal +* symbol (not weak) pointing to the implementation of InitializeInjectionNvtx2 (which +* does not need to be named "InitializeInjectionNvtx2" as is necessary in a dynamic +* injection library. */ +__attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr; +#else +#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0 +#endif + +/* This function tries to find or load an NVTX injection library and get the +* address of its InitializeInjection2 function. If such a function pointer +* is found, it is called, and passed the address of this NVTX instance's +* nvtxGetExportTable function, so the injection can attach to this instance. +* If the initialization fails for any reason, any dynamic library loaded will +* be freed, and all NVTX implementation functions will be set to no-ops. If +* initialization succeeds, NVTX functions not attached to the tool will be set +* to no-ops. This is implemented as one function instead of several small +* functions to minimize the number of weak symbols the linker must resolve. +* Order of search is: +* - Pre-injected library exporting InitializeInjectionNvtx2 +* - Loadable library exporting InitializeInjectionNvtx2 +* - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64) +* - On Android, libNvtxInjection??.so within the package (?? is 32 or 64) +* - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr +*/ +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void); +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void) +{ + const char* const initFuncName = "InitializeInjectionNvtx2"; + NvtxInitializeInjectionNvtxFunc_t init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)0; + NVTX_DLLHANDLE injectionLibraryHandle = (NVTX_DLLHANDLE)0; + int entryPointStatus = 0; + +#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY + /* Use POSIX global symbol chain to query for init function from any module */ + init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(0, initFuncName); +#endif + +#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY + /* Try discovering dynamic injection library to load */ + if (!init_fnptr) + { +#if NVTX_SUPPORT_ENV_VARS + /* If env var NVTX_INJECTION64_PATH is set, it should contain the path + * to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */ + const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4) + ? NVTX_STR("NVTX_INJECTION32_PATH") + : NVTX_STR("NVTX_INJECTION64_PATH"); +#endif /* NVTX_SUPPORT_ENV_VARS */ + NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE]; + const NVTX_PATHCHAR* injectionLibraryPath = (const NVTX_PATHCHAR*)0; + + /* Refer to this variable explicitly in case all references to it are #if'ed out */ + (void)injectionLibraryPathBuf; + +#if NVTX_SUPPORT_ENV_VARS + /* Disable the warning for getenv & _wgetenv -- this usage is safe because + * these functions are not called again before using the returned value. */ +#if defined(_MSC_VER) +#pragma warning( push ) +#pragma warning( disable : 4996 ) +#endif + injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName); +#if defined(_MSC_VER) +#pragma warning( pop ) +#endif +#endif + +#if defined(__ANDROID__) + if (!injectionLibraryPath) + { + const char *bits = (sizeof(void*) == 4) ? "32" : "64"; + char cmdlineBuf[32]; + char pkgName[PATH_MAX]; + int count; + int pid; + FILE *fp; + size_t bytesRead; + size_t pos; + + pid = (int)getpid(); + count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid); + if (count <= 0 || count >= (int)sizeof(cmdlineBuf)) + { + NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + fp = fopen(cmdlineBuf, "r"); + if (!fp) + { + NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp); + fclose(fp); + if (bytesRead == 0) + { + NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + pkgName[bytesRead] = 0; + + /* String can contain colon as a process separator. In this case the package name is before the colon. */ + pos = 0; + while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0') + { + ++pos; + } + pkgName[pos] = 0; + + count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits); + if (count <= 0 || count >= NVTX_BUFSIZE) + { + NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + /* On Android, verify path is accessible due to aggressive file access restrictions. */ + /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */ + /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */ + if (injectionLibraryPathBuf[0] == '/') + { +#if (__ANDROID_API__ < 21) + int access_err = access(injectionLibraryPathBuf, F_OK | R_OK); +#else + int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0); +#endif + if (access_err != 0) + { + NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + } + injectionLibraryPath = injectionLibraryPathBuf; + } +#endif + + /* At this point, injectionLibraryPath is specified if a dynamic + * injection library was specified by a tool. */ + if (injectionLibraryPath) + { + /* Load the injection library */ + injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath); + if (!injectionLibraryHandle) + { + NVTX_ERR("Failed to load injection library\n"); + return NVTX_ERR_INIT_LOAD_LIBRARY; + } + else + { + /* Attempt to get the injection library's entry-point */ + init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(injectionLibraryHandle, initFuncName); + if (!init_fnptr) + { + NVTX_DLLCLOSE(injectionLibraryHandle); + NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n"); + return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT; + } + } + } + } +#endif + +#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY + if (!init_fnptr) + { + /* Check weakly-defined function pointer. A statically-linked injection can define this as + * a normal symbol and it will take precedence over a dynamic injection. */ + if (InitializeInjectionNvtx2_fnptr) + { + init_fnptr = InitializeInjectionNvtx2_fnptr; + } + } +#endif + + /* At this point, if init_fnptr is not set, then no tool has specified + * an NVTX injection library -- return non-success result so all NVTX + * API functions will be set to no-ops. */ + if (!init_fnptr) + { + return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE; + } + + /* Invoke injection library's initialization function. If it returns + * 0 (failure) and a dynamic injection was loaded, unload it. */ + entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)); + if (entryPointStatus == 0) + { + NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n"); + if (injectionLibraryHandle) + { + NVTX_DLLCLOSE(injectionLibraryHandle); + } + return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT; + } + + return NVTX_SUCCESS; +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void) +{ + unsigned int old; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE) + { + return; + } + + NVTX_ATOMIC_CAS_32( + old, + &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState, + NVTX_INIT_STATE_STARTED, + NVTX_INIT_STATE_FRESH); + if (old == NVTX_INIT_STATE_FRESH) + { + int result; + int forceAllToNoops; + + /* Load & initialize injection library -- it will assign the function pointers */ + result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(); + + /* Set all pointers not assigned by the injection to null */ + forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */ + NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops); + + /* Signal that initialization has finished, so now the assigned function pointers will be used */ + NVTX_ATOMIC_WRITE_32( + &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState, + NVTX_INIT_STATE_COMPLETE); + } + else /* Spin-wait until initialization has finished */ + { + NVTX_MEMBAR(); + while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE) + { + NVTX_YIELD(); + NVTX_MEMBAR(); + } + } +} diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDecls.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDecls.h new file mode 100644 index 0000000..2c1ed32 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDecls.h @@ -0,0 +1,81 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name); + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name); + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name); + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name); + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message); +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved); + +NVTX_LINKONCE_FWDDECL_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle); +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle); diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDefs.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDefs.h new file mode 100644 index 0000000..3ebe74c --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxInitDefs.h @@ -0,0 +1,573 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxMarkEx(eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxMarkA(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxMarkW(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangeStartEx(eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangeStartA(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangeStartW(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxRangeEnd(id); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangePushEx(eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangePushA(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangePushW(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxRangePop(); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxNameCategoryA(category, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxNameCategoryW(category, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxNameOsThreadA(threadId, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxNameOsThreadW(threadId, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainMarkEx(domain, eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainRangeStartEx(domain, eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainRangeEnd(domain, id); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainRangePushEx(domain, eventAttrib); +} + +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainRangePop(domain); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainResourceCreate(domain, attribs); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainResourceDestroy(resource); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainNameCategoryA(domain, category, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainNameCategoryW(domain, category, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainRegisterStringA(domain, string); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainRegisterStringW(domain, string); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainCreateA(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + return nvtxDomainCreateW(message); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxDomainDestroy(domain); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){ + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + nvtxInitialize(reserved); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){ + nvtxNameCuDeviceA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){ + nvtxNameCuDeviceW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){ + nvtxNameCuContextA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr; + if (local) + local(context, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){ + nvtxNameCuContextW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr; + if (local) + local(context, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){ + nvtxNameCuStreamA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr; + if (local) + local(stream, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){ + nvtxNameCuStreamW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr; + if (local) + local(stream, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){ + nvtxNameCuEventA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr; + if (local) + local(event, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){ + nvtxNameCuEventW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr; + if (local) + local(event, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){ + nvtxNameCudaDeviceA_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){ + nvtxNameCudaDeviceW_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){ + nvtxNameCudaStreamA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr; + if (local) + local(stream, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){ + nvtxNameCudaStreamW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr; + if (local) + local(stream, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){ + nvtxNameCudaEventA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr; + if (local) + local(event, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){ + nvtxNameCudaEventW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr; + if (local) + local(event, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){ + nvtxNameClDeviceA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){ + nvtxNameClDeviceW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr; + if (local) + local(device, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){ + nvtxNameClContextA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr; + if (local) + local(context, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){ + nvtxNameClContextW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr; + if (local) + local(context, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){ + nvtxNameClCommandQueueA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr; + if (local) + local(command_queue, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){ + nvtxNameClCommandQueueW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr; + if (local) + local(command_queue, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){ + nvtxNameClMemObjectA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr; + if (local) + local(memobj, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){ + nvtxNameClMemObjectW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr; + if (local) + local(memobj, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){ + nvtxNameClSamplerA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr; + if (local) + local(sampler, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){ + nvtxNameClSamplerW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr; + if (local) + local(sampler, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){ + nvtxNameClProgramA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr; + if (local) + local(program, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){ + nvtxNameClProgramW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr; + if (local) + local(program, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){ + nvtxNameClEventA_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr; + if (local) + local(evnt, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){ + nvtxNameClEventW_fakeimpl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr; + if (local) + local(evnt, name); +} + +NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){ + nvtxDomainSyncUserCreate_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr; + if (local) { + return local(domain, attribs); + } + return (nvtxSyncUser_t)0; +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){ + nvtxDomainSyncUserDestroy_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr; + if (local) + local(handle); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireStart_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr; + if (local) + local(handle); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireFailed_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr; + if (local) + local(handle); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){ + nvtxDomainSyncUserAcquireSuccess_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr; + if (local) + local(handle); +} + +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){ + nvtxDomainSyncUserReleasing_impl_fntype local; + NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(); + local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr; + if (local) + local(handle); +} + +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops); +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops) +{ + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NULL; + + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NULL; + + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NULL; + + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NULL; + + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NULL; + + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NULL; + if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops) + NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NULL; +} diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxLinkOnce.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxLinkOnce.h new file mode 100644 index 0000000..2272303 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxLinkOnce.h @@ -0,0 +1,83 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef __NVTX_LINKONCE_H__ +#define __NVTX_LINKONCE_H__ + +/* This header defines macros to permit making definitions of global variables + * and functions in C/C++ header files which may be included multiple times in + * a translation unit or linkage unit. It allows authoring header-only libraries + * which can be used by multiple other header-only libraries (either as the same + * copy or multiple copies), and does not require any build changes, such as + * adding another .c file, linking a static library, or deploying a dynamic + * library. Globals defined with these macros have the property that they have + * the same address, pointing to a single instance, for the entire linkage unit. + * It is expected but not guaranteed that each linkage unit will have a separate + * instance. + * + * In some situations it is desirable to declare a variable without initializing + * it, refer to it in code or other variables' initializers, and then initialize + * it later. Similarly, functions can be prototyped, have their address taken, + * and then have their body defined later. In such cases, use the FWDDECL macros + * when forward-declaring LINKONCE global variables without initializers and + * function prototypes, and then use the DEFINE macros when later defining them. + * Although in many cases the FWDDECL macro is equivalent to the DEFINE macro, + * following this pattern makes code maximally portable. + */ + +#if defined(__MINGW32__) /* MinGW */ + #define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0."))) + #if defined(__cplusplus) + #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany) + #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline NVTX_LINKONCE_WEAK + #else + #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany) + #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK + #endif +#elif defined(_MSC_VER) /* MSVC */ + #if defined(__cplusplus) + #define NVTX_LINKONCE_DEFINE_GLOBAL extern "C" __declspec(selectany) + #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline + #else + #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany) + #define NVTX_LINKONCE_DEFINE_FUNCTION __inline + #endif +#elif defined(__CYGWIN__) && defined(__clang__) /* Clang on Cygwin */ + #define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0."))) + #if defined(__cplusplus) + #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK + #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_WEAK + #else + #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK + #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK + #endif +#elif defined(__CYGWIN__) /* Assume GCC or compatible */ + #define NVTX_LINKONCE_WEAK __attribute__((weak)) + #if defined(__cplusplus) + #define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany) + #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline + #else + #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK + #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK + #endif +#else /* All others: Assume GCC, clang, or compatible */ + #define NVTX_LINKONCE_WEAK __attribute__((weak)) + #define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden"))) + #if defined(__cplusplus) + #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK + #define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline + #else + #define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK + #define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK + #endif +#endif + +#define NVTX_LINKONCE_FWDDECL_GLOBAL NVTX_LINKONCE_DEFINE_GLOBAL extern +#define NVTX_LINKONCE_FWDDECL_FUNCTION NVTX_LINKONCE_DEFINE_FUNCTION + +#endif /* __NVTX_LINKONCE_H__ */ diff --git a/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxTypes.h b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxTypes.h new file mode 100644 index 0000000..2698812 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtx3/nvtxDetail/nvtxTypes.h @@ -0,0 +1,304 @@ +/* +* Copyright 2009-2022 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +/* This header defines types which are used by the internal implementation +* of NVTX and callback subscribers. API clients do not use these types, +* so they are defined here instead of in nvToolsExt.h to clarify they are +* not part of the NVTX client API. */ + +#ifndef NVTX_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h. +#endif + +/* ------ Dependency-free types binary-compatible with real types ------- */ + +/* In order to avoid having the NVTX core API headers depend on non-NVTX +* headers like cuda.h, NVTX defines binary-compatible types to use for +* safely making the initialization versions of all NVTX functions without +* needing to have definitions for the real types. */ + +typedef int nvtx_CUdevice; +typedef void* nvtx_CUcontext; +typedef void* nvtx_CUstream; +typedef void* nvtx_CUevent; + +typedef void* nvtx_cudaStream_t; +typedef void* nvtx_cudaEvent_t; + +typedef void* nvtx_cl_platform_id; +typedef void* nvtx_cl_device_id; +typedef void* nvtx_cl_context; +typedef void* nvtx_cl_command_queue; +typedef void* nvtx_cl_mem; +typedef void* nvtx_cl_program; +typedef void* nvtx_cl_kernel; +typedef void* nvtx_cl_event; +typedef void* nvtx_cl_sampler; + +typedef struct nvtxSyncUser* nvtxSyncUser_t; +struct nvtxSyncUserAttributes_v0; +typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t; + +/* --------- Types for function pointers (with fake API types) ---------- */ + +typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib); +typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message); +typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message); +typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib); +typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message); +typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message); +typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id); +typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib); +typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message); +typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message); +typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void); +typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name); +typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name); +typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name); +typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name); + +/* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */ +typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name); +typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name); +typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name); +typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name); +typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name); + +/* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */ +typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name); +typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name); +typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name); +typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name); +typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name); +typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name); +typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name); +typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name); +typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name); + +/* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */ +typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name); +typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name); +typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name); +typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name); +typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name); + +typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id); +typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib); +typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain); +typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs); +typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource); +typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name); +typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name); +typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string); +typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string); +typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message); +typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message); +typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain); +typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved); + +typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs); +typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle); +typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle); + +/* ---------------- Types for callback subscription --------------------- */ + +typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId); +typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable); + +typedef enum NvtxCallbackModule +{ + NVTX_CB_MODULE_INVALID = 0, + NVTX_CB_MODULE_CORE = 1, + NVTX_CB_MODULE_CUDA = 2, + NVTX_CB_MODULE_OPENCL = 3, + NVTX_CB_MODULE_CUDART = 4, + NVTX_CB_MODULE_CORE2 = 5, + NVTX_CB_MODULE_SYNC = 6, + /* --- New constants must only be added directly above this line --- */ + NVTX_CB_MODULE_SIZE, + NVTX_CB_MODULE_FORCE_INT = 0x7fffffff +} NvtxCallbackModule; + +typedef enum NvtxCallbackIdCore +{ + NVTX_CBID_CORE_INVALID = 0, + NVTX_CBID_CORE_MarkEx = 1, + NVTX_CBID_CORE_MarkA = 2, + NVTX_CBID_CORE_MarkW = 3, + NVTX_CBID_CORE_RangeStartEx = 4, + NVTX_CBID_CORE_RangeStartA = 5, + NVTX_CBID_CORE_RangeStartW = 6, + NVTX_CBID_CORE_RangeEnd = 7, + NVTX_CBID_CORE_RangePushEx = 8, + NVTX_CBID_CORE_RangePushA = 9, + NVTX_CBID_CORE_RangePushW = 10, + NVTX_CBID_CORE_RangePop = 11, + NVTX_CBID_CORE_NameCategoryA = 12, + NVTX_CBID_CORE_NameCategoryW = 13, + NVTX_CBID_CORE_NameOsThreadA = 14, + NVTX_CBID_CORE_NameOsThreadW = 15, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_CORE_SIZE, + NVTX_CBID_CORE_FORCE_INT = 0x7fffffff +} NvtxCallbackIdCore; + +typedef enum NvtxCallbackIdCore2 +{ + NVTX_CBID_CORE2_INVALID = 0, + NVTX_CBID_CORE2_DomainMarkEx = 1, + NVTX_CBID_CORE2_DomainRangeStartEx = 2, + NVTX_CBID_CORE2_DomainRangeEnd = 3, + NVTX_CBID_CORE2_DomainRangePushEx = 4, + NVTX_CBID_CORE2_DomainRangePop = 5, + NVTX_CBID_CORE2_DomainResourceCreate = 6, + NVTX_CBID_CORE2_DomainResourceDestroy = 7, + NVTX_CBID_CORE2_DomainNameCategoryA = 8, + NVTX_CBID_CORE2_DomainNameCategoryW = 9, + NVTX_CBID_CORE2_DomainRegisterStringA = 10, + NVTX_CBID_CORE2_DomainRegisterStringW = 11, + NVTX_CBID_CORE2_DomainCreateA = 12, + NVTX_CBID_CORE2_DomainCreateW = 13, + NVTX_CBID_CORE2_DomainDestroy = 14, + NVTX_CBID_CORE2_Initialize = 15, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_CORE2_SIZE, + NVTX_CBID_CORE2_FORCE_INT = 0x7fffffff +} NvtxCallbackIdCore2; + +typedef enum NvtxCallbackIdCuda +{ + NVTX_CBID_CUDA_INVALID = 0, + NVTX_CBID_CUDA_NameCuDeviceA = 1, + NVTX_CBID_CUDA_NameCuDeviceW = 2, + NVTX_CBID_CUDA_NameCuContextA = 3, + NVTX_CBID_CUDA_NameCuContextW = 4, + NVTX_CBID_CUDA_NameCuStreamA = 5, + NVTX_CBID_CUDA_NameCuStreamW = 6, + NVTX_CBID_CUDA_NameCuEventA = 7, + NVTX_CBID_CUDA_NameCuEventW = 8, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_CUDA_SIZE, + NVTX_CBID_CUDA_FORCE_INT = 0x7fffffff +} NvtxCallbackIdCuda; + +typedef enum NvtxCallbackIdCudaRt +{ + NVTX_CBID_CUDART_INVALID = 0, + NVTX_CBID_CUDART_NameCudaDeviceA = 1, + NVTX_CBID_CUDART_NameCudaDeviceW = 2, + NVTX_CBID_CUDART_NameCudaStreamA = 3, + NVTX_CBID_CUDART_NameCudaStreamW = 4, + NVTX_CBID_CUDART_NameCudaEventA = 5, + NVTX_CBID_CUDART_NameCudaEventW = 6, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_CUDART_SIZE, + NVTX_CBID_CUDART_FORCE_INT = 0x7fffffff +} NvtxCallbackIdCudaRt; + +typedef enum NvtxCallbackIdOpenCL +{ + NVTX_CBID_OPENCL_INVALID = 0, + NVTX_CBID_OPENCL_NameClDeviceA = 1, + NVTX_CBID_OPENCL_NameClDeviceW = 2, + NVTX_CBID_OPENCL_NameClContextA = 3, + NVTX_CBID_OPENCL_NameClContextW = 4, + NVTX_CBID_OPENCL_NameClCommandQueueA = 5, + NVTX_CBID_OPENCL_NameClCommandQueueW = 6, + NVTX_CBID_OPENCL_NameClMemObjectA = 7, + NVTX_CBID_OPENCL_NameClMemObjectW = 8, + NVTX_CBID_OPENCL_NameClSamplerA = 9, + NVTX_CBID_OPENCL_NameClSamplerW = 10, + NVTX_CBID_OPENCL_NameClProgramA = 11, + NVTX_CBID_OPENCL_NameClProgramW = 12, + NVTX_CBID_OPENCL_NameClEventA = 13, + NVTX_CBID_OPENCL_NameClEventW = 14, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_OPENCL_SIZE, + NVTX_CBID_OPENCL_FORCE_INT = 0x7fffffff +} NvtxCallbackIdOpenCL; + +typedef enum NvtxCallbackIdSync +{ + NVTX_CBID_SYNC_INVALID = 0, + NVTX_CBID_SYNC_DomainSyncUserCreate = 1, + NVTX_CBID_SYNC_DomainSyncUserDestroy = 2, + NVTX_CBID_SYNC_DomainSyncUserAcquireStart = 3, + NVTX_CBID_SYNC_DomainSyncUserAcquireFailed = 4, + NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5, + NVTX_CBID_SYNC_DomainSyncUserReleasing = 6, + /* --- New constants must only be added directly above this line --- */ + NVTX_CBID_SYNC_SIZE, + NVTX_CBID_SYNC_FORCE_INT = 0x7fffffff +} NvtxCallbackIdSync; + +/* IDs for NVTX Export Tables */ +typedef enum NvtxExportTableID +{ + NVTX_ETID_INVALID = 0, + NVTX_ETID_CALLBACKS = 1, + NVTX_ETID_RESERVED0 = 2, + NVTX_ETID_VERSIONINFO = 3, + /* --- New constants must only be added directly above this line --- */ + NVTX_ETID_SIZE, + NVTX_ETID_FORCE_INT = 0x7fffffff +} NvtxExportTableID; + +typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be casted to appropriate function type */ +typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */ + +typedef struct NvtxExportTableCallbacks +{ + size_t struct_size; + + /* returns an array of pointer to function pointers*/ + int (NVTX_API *GetModuleFunctionTable)( + NvtxCallbackModule module, + NvtxFunctionTable* out_table, + unsigned int* out_size); +} NvtxExportTableCallbacks; + +typedef struct NvtxExportTableVersionInfo +{ + /* sizeof(NvtxExportTableVersionInfo) */ + size_t struct_size; + + /* The API version comes from the NVTX library linked to the app. The + * injection library is can use this info to make some assumptions */ + uint32_t version; + + /* Reserved for alignment, do not use */ + uint32_t reserved0; + + /* This must be set by tools when attaching to provide applications + * the ability to, in emergency situations, detect problematic tools + * versions and modify the NVTX source to prevent attaching anything + * that causes trouble in the app. Currently, this value is ignored. */ + void (NVTX_API *SetInjectionNvtxVersion)( + uint32_t version); +} NvtxExportTableVersionInfo; + + + + + + + diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvToolsExtPayload.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvToolsExtPayload.h new file mode 100644 index 0000000..44e60e3 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvToolsExtPayload.h @@ -0,0 +1,1285 @@ +/* +* Copyright 2021-2024 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "nvtx3/nvToolsExt.h" + +/* Optionally include helper macros. */ +/* #include "nvToolsExtPayloadHelper.h" */ + +/** + * If needed, semantic extension headers can be included after this header. + */ + +/** + * \brief The compatibility ID is used for versioning of this extension. + */ +#ifndef NVTX_EXT_PAYLOAD_COMPATID +#define NVTX_EXT_PAYLOAD_COMPATID 0x0103 +#endif + +/** + * \brief The module ID identifies the payload extension. It has to be unique + * among the extension modules. + */ +#ifndef NVTX_EXT_PAYLOAD_MODULEID +#define NVTX_EXT_PAYLOAD_MODULEID 2 +#endif + +/** + * \brief Additional value for the enum @ref nvtxPayloadType_t + */ +#ifndef NVTX_PAYLOAD_TYPE_EXT +#define NVTX_PAYLOAD_TYPE_EXT ((int32_t)0xDFBD0009) +#endif + +/** --------------------------------------------------------------------------- + * Payload schema entry flags. Used for @ref nvtxPayloadSchemaEntry_t::flags. + * ------------------------------------------------------------------------- */ +#ifndef NVTX_PAYLOAD_ENTRY_FLAGS_V1 +#define NVTX_PAYLOAD_ENTRY_FLAGS_V1 + +#define NVTX_PAYLOAD_ENTRY_FLAG_UNUSED 0 + +/** + * Absolute pointer into a payload (entry) of the same event. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_POINTER (1 << 1) + +/** + * Offset from base address of the payload. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_BASE (1 << 2) + +/** + * Offset from the end of this payload entry. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_HERE (1 << 3) + +/** + * The value is an array with fixed length, set with the field `arrayLength`. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE (1 << 4) + +/** + * The value is a zero-/null-terminated array. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED (2 << 4) + +/** + * \brief A single or multi-dimensional array of variable length. + * + * The field `arrayOrUnionDetail` contains the index of the schema entry that + * holds the length(s). If the length entry is a scalar, then this entry is a 1D + * array. If the length entry is a fixed-size array, then the number of + * dimensions is defined with the registration of the schema. If the length + * entry is a zero-terminated array, then the array of the dimensions can be + * determined at runtime. + * For multidimensional arrays, values are stored in row-major order, with rows + * being stored consecutively in contiguous memory. The size of the entry (in + * bytes) is the product of the dimensions multiplied with size of the array + * element. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX (3 << 4) + +/** + * \brief A single or multi-dimensional array of variable length, where the + * dimensions are stored in a different payload (index) of the same event. + * + * This enables an existing address to an array to be directly passed, while the + * dimensions are defined in a separate payload (with only one payload entry). + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_PAYLOAD_INDEX (4 << 4) + +/** + * \brief The value or data that is pointed to by this payload entry value shall + * be copied by the NVTX handler. + * + * A tool may not support deep copy and just ignore this flag. + * See @ref NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY for more details. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_DEEP_COPY (1 << 8) + +/** + * Notifies the NVTX handler to hide this entry in case of visualization. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_HIDE (1 << 9) + +/** + * The entry specifies the event message. Any string type can be used. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE (1 << 10) + +/** + * \brief The entry contains an event timestamp. + * + * The time source might be provided via the entry semantics field. In most + * cases, the timestamp (entry) type is @ref NVTX_PAYLOAD_ENTRY_TYPE_UINT64. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_EVENT_TIMESTAMP (2 << 10) + +/** + * These flags specify the NVTX event type to which an entry refers. + */ +#define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_BEGIN (1 << 12) +#define NVTX_PAYLOAD_ENTRY_FLAG_RANGE_END (2 << 12) +#define NVTX_PAYLOAD_ENTRY_FLAG_MARK (3 << 12) +#define NVTX_PAYLOAD_ENTRY_FLAG_COUNTER (4 << 12) + +#endif /* NVTX_PAYLOAD_ENTRY_FLAGS_V1 */ +/** --------------------------------------------------------------------------- + * END: Payload schema entry flags. + * ------------------------------------------------------------------------- */ + +/** \todo: Keep this in the header? */ +/** + * @note The ‘array’ flags assume that the array is embedded. Otherwise, + * @ref NVTX_PAYLOAD_ENTRY_FLAG_POINTER has to be additionally specified. Some + * combinations may be invalid based on the `NVTX_PAYLOAD_SCHEMA_TYPE_*` this + * entry is enclosed. For instance, variable length embedded arrays are valid + * within @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC but invalid with + * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC. See `NVTX_PAYLOAD_SCHEMA_TYPE_*` for + * additional details. + */ + +/* Helper macro to check if an entry represents an array. */ +#define NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY (\ + NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE | \ + NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED | \ + NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX) + +#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_TYPE(F) \ + (F & NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY) +/** \todo end */ + + +/** --------------------------------------------------------------------------- + * Types of entries in a payload schema. + * + * @note Several of the predefined types contain the size (in bits) in their + * names. For some data types the size (in bytes) is not fixed and may differ + * for different platforms/operating systems/compilers. To provide portability, + * an array of sizes (in bytes) for type 1 to 28 ( @ref + * NVTX_PAYLOAD_ENTRY_TYPE_CHAR to @ref NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE) + * is passed to the NVTX extension initialization function + * @ref InitializeInjectionNvtxExtension via the `extInfo` field of + * @ref nvtxExtModuleInfo_t. + * ------------------------------------------------------------------------- */ +#ifndef NVTX_PAYLOAD_ENTRY_TYPES_V1 +#define NVTX_PAYLOAD_ENTRY_TYPES_V1 + +#define NVTX_PAYLOAD_ENTRY_TYPE_INVALID 0 + +/** + * Basic integer types. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR 1 +#define NVTX_PAYLOAD_ENTRY_TYPE_UCHAR 2 +#define NVTX_PAYLOAD_ENTRY_TYPE_SHORT 3 +#define NVTX_PAYLOAD_ENTRY_TYPE_USHORT 4 +#define NVTX_PAYLOAD_ENTRY_TYPE_INT 5 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT 6 +#define NVTX_PAYLOAD_ENTRY_TYPE_LONG 7 +#define NVTX_PAYLOAD_ENTRY_TYPE_ULONG 8 +#define NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG 9 +#define NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG 10 + +/** + * Integer types with explicit size. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_INT8 11 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT8 12 +#define NVTX_PAYLOAD_ENTRY_TYPE_INT16 13 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT16 14 +#define NVTX_PAYLOAD_ENTRY_TYPE_INT32 15 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT32 16 +#define NVTX_PAYLOAD_ENTRY_TYPE_INT64 17 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT64 18 + +/** + * Floating point types + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT 19 +#define NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE 20 +#define NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE 21 + +/** + * Size type (`size_t` in C). + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_SIZE 22 + +/** + * Any address, e.g. `void*`. If the pointer type matters, use the flag @ref + * NVTX_PAYLOAD_ENTRY_FLAG_POINTER and the respective type instead. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS 23 + +/** + * Special character types. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_WCHAR 24 /* wide character (since C90) */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 25 /* since C2x and C++20 */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 26 +#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 27 + +/** + * There is type size and alignment information for all previous types. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE (NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 + 1) + +/** + * Store raw 8-bit binary data. As with `char`, 1-byte alignment is assumed. + * Typically, a tool will display this as hex or binary. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_BYTE 32 + +/** + * These types do not have standardized equivalents. It is assumed that the + * number at the end corresponds to the bits used to store the value and that + * the alignment corresponds to standardized types of the same size. + * A tool may not support these types. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_INT128 33 +#define NVTX_PAYLOAD_ENTRY_TYPE_UINT128 34 + +#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT16 42 +#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT32 43 +#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT64 44 +#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT128 45 + +#define NVTX_PAYLOAD_ENTRY_TYPE_BF16 50 +#define NVTX_PAYLOAD_ENTRY_TYPE_TF32 52 + +/*** Entry types to be used in deferred events or events without event attributes. ***/ + +/** + * \brief Types representing members of @ref nvtxEventAttributes_t. + * + * Data types are as defined by NVTXv3 core. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_CATEGORY 68 /* uint32_t */ +#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_COLORTYPE 69 /* int32_t */ +#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_COLOR 70 /* uint32_t */ + +/** + * Annotate the scope of events (see `nvtxScopeRegister`). + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_EVENT_SCOPE_ID 71 + +/** + * Thread ID as event scope (see `nvtxGetActiveThreadId` for valid values). + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_THREAD_ID_UINT32 72 +#define NVTX_PAYLOAD_ENTRY_TYPE_THREAD_ID_UINT64 73 + +/*** END: Entry types to be used in deferred events. ***/ + +/** + * This type marks the union selector member (entry index) in schemas used by + * a union with internal selector. + * See @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_UNION_SELECTOR 74 + +/** + * \brief String types. + * + * If no flags are set for the entry and `arrayOrUnionDetail > 0`, the entry is + * assumed to be a fixed-size string with the given length, embedded in the payload. + * `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE` is redundant for fixed-size strings. + * + * \todo: discuss this paragraph: + * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED` specifies a + * zero-terminated string. If `arrayOrUnionDetail > 0`, the entry is handled as + * a zero-terminated array of fixed-size strings. + * + * Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX` specifies a + * variable-length string with the length given in the entry specified by the + * field `arrayOrUnionDetail`. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING 75 /* `char*`, system LOCALE */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF8 76 +#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF16 77 +#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF32 78 + +/** + * The entry value is of type @ref nvtxStringHandle_t returned by + * @ref nvtxDomainRegisterString. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE 80 + +/** + * \brief Predefined schema ID for payload data that is referenced in another payload. + * + * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate that the + * payload is a blob of memory which other payload entries may point into. + * A tool will not expose this payload directly. + * + * This schema ID cannot be used as schema entry type! + */ +#define NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED 1022 + +/** + * \brief Predefined schema ID for raw payload data. + * + * This schema ID can be used in @ref nvtxPayloadData_t::schema_id to indicate + * that the payload is a blob, which can be shown with an arbitrary data viewer. + * This schema ID cannot be used as schema entry type! + */ +#define NVTX_TYPE_PAYLOAD_SCHEMA_RAW 1023 + +/** + * \deprecated: Remove for official release! + * In the initial version of this header custom schema IDs started + * here. Unless predefined types require more than 16 bits we can keep this + * value to preserve backwards compatibility. The value is not used as first + * ID for custom schemas any more, but in the analysis every entry type >= this + * value is assumed to be a custom schema. + */ +#define NVTX_PAYLOAD_ENTRY_TYPE_CUSTOM_BASE 65536 + +/* Custom (static) schema IDs. */ +#define NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START (1 << 24) + +/* Dynamic schema IDs (generated by the tool) start here. */ +#define NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START 4294967296 /* 1 << 32 */ + +#endif /* NVTX_PAYLOAD_ENTRY_TYPES_V1 */ +/** --------------------------------------------------------------------------- + * END: Payload schema entry types. + * ------------------------------------------------------------------------- */ + + +#ifndef NVTX_PAYLOAD_SCHEMA_TYPES_V1 +#define NVTX_PAYLOAD_SCHEMA_TYPES_V1 + +/** + * \brief The payload schema type. + * + * A schema can be either of the following types. It is set with + * @ref nvtxPayloadSchemaAttr_t::type. + */ +#define NVTX_PAYLOAD_SCHEMA_TYPE_INVALID 0 +#define NVTX_PAYLOAD_SCHEMA_TYPE_STATIC 1 +#define NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC 2 +#define NVTX_PAYLOAD_SCHEMA_TYPE_UNION 3 +#define NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR 4 + +#endif /* NVTX_PAYLOAD_SCHEMA_TYPES_V1 */ + + +#ifndef NVTX_PAYLOAD_SCHEMA_FLAGS_V1 +#define NVTX_PAYLOAD_SCHEMA_FLAGS_V1 + +/** + * \brief Flags for static and dynamic schemas. + * + * The schema flags are used with @ref nvtxPayloadSchemaAttr_t::flags. + */ +#define NVTX_PAYLOAD_SCHEMA_FLAG_NONE 0 + +/** + * This flag indicates that a schema and the corresponding payloads can + * contain fields which require a deep copy. + */ +#define NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY (1 << 1) + +/** + * This flag indicates that a schema and the corresponding payload can be + * referenced by another payload of the same event. If the schema is not + * intended to be visualized directly, it is possible use + * @ref NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED instead. + */ +#define NVTX_PAYLOAD_SCHEMA_FLAG_REFERENCED (1 << 2) + +/** + * The schema defines a counter group. An NVTX handler can expect that the schema + * contains entries with counter semantics. + */ +#define NVTX_PAYLOAD_SCHEMA_FLAG_COUNTER_GROUP (1 << 3) + + +#endif /* NVTX_PAYLOAD_SCHEMA_FLAGS_V1 */ + + +#ifndef NVTX_PAYLOAD_SCHEMA_ATTRS_V1 +#define NVTX_PAYLOAD_SCHEMA_ATTRS_V1 + +/** + * The values allow the valid fields in @ref nvtxPayloadSchemaAttr_t to be + * specified via setting the field `fieldMask`. + */ +#define NVTX_PAYLOAD_SCHEMA_ATTR_NAME (1 << 1) +#define NVTX_PAYLOAD_SCHEMA_ATTR_TYPE (1 << 2) +#define NVTX_PAYLOAD_SCHEMA_ATTR_FLAGS (1 << 3) +#define NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES (1 << 4) +#define NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES (1 << 5) +#define NVTX_PAYLOAD_SCHEMA_ATTR_STATIC_SIZE (1 << 6) +#define NVTX_PAYLOAD_SCHEMA_ATTR_ALIGNMENT (1 << 7) +#define NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID (1 << 8) +#define NVTX_PAYLOAD_SCHEMA_ATTR_EXTENSION (1 << 9) + +#endif /* NVTX_PAYLOAD_SCHEMA_ATTRS_V1 */ + + +#ifndef NVTX_PAYLOAD_ENUM_ATTRS_V1 +#define NVTX_PAYLOAD_ENUM_ATTRS_V1 + +/** + * The values are used to set the field `fieldMask` and specify which fields in + * @ref nvtxPayloadEnumAttr_t are set. + */ +#define NVTX_PAYLOAD_ENUM_ATTR_NAME (1 << 1) +#define NVTX_PAYLOAD_ENUM_ATTR_ENTRIES (1 << 2) +#define NVTX_PAYLOAD_ENUM_ATTR_NUM_ENTRIES (1 << 3) +#define NVTX_PAYLOAD_ENUM_ATTR_SIZE (1 << 4) +#define NVTX_PAYLOAD_ENUM_ATTR_SCHEMA_ID (1 << 5) +#define NVTX_PAYLOAD_ENUM_ATTR_EXTENSION (1 << 6) + +#endif /* NVTX_PAYLOAD_ENUM_ATTRS_V1 */ + + +#ifndef NVTX_EVENT_SCOPES_V1 +#define NVTX_EVENT_SCOPES_V1 + +/** + * \brief NVTX event scopes (for deferred events and counters) + */ +#define NVTX_EVENT_SCOPE_INVALID 0 +#define NVTX_EVENT_SCOPE_NONE 1 /* Global/base/root or no scope */ + +/* Hardware events */ +#define NVTX_EVENT_SCOPE_HW_MACHINE 2 /* Node/machine name, Device? */ +#define NVTX_EVENT_SCOPE_HW_SOCKET 3 +#define NVTX_EVENT_SCOPE_HW_CPU 4 +#define NVTX_EVENT_SCOPE_HW_CPU_LOGICAL 5 +/* Innermost HW execution context at registration time */ +#define NVTX_EVENT_SCOPE_HW_INNERMOST 6 + +/* Virtualized hardware, virtual machines */ +#define NVTX_EVENT_SCOPE_VM 7 + +/* Software scopes */ +#define NVTX_EVENT_SCOPE_SW_PROCESS 8 /* Process scope */ +#define NVTX_EVENT_SCOPE_SW_THREAD 9 /* Thread scope */ +/* Innermost SW execution context at registration time */ +#define NVTX_EVENT_SCOPE_SW_INNERMOST 10 + +/* Static (user-provided) scope IDs (feed forward) */ +#define NVTX_EVENT_SCOPE_ID_STATIC_START (1 << 24) + +/* Dynamically (tool) generated scope IDs */ +#define NVTX_EVENT_SCOPE_ID_DYNAMIC_START 4294967296 /* 1 << 32 */ + +#endif /* NVTX_EVENT_SCOPES_V1 */ + + +#ifndef NVTX_DEFERRED_EVENTS_SORTING_V1 +#define NVTX_DEFERRED_EVENTS_SORTING_V1 +/** + * Deferred events are assumed to be in chronologically order by default. + */ +#define NVTX_DEFERRED_EVENTS_SORTED 0 +#define NVTX_DEFERRED_EVENTS_SORTED_PER_EVENT_SOURCE 1 +#define NVTX_DEFERRED_EVENTS_UNSORTED 2 + +#endif /* NVTX_DEFERRED_EVENTS_SORTING_V1 */ + + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#ifndef NVTX_PAYLOAD_TYPEDEFS_V1 +#define NVTX_PAYLOAD_TYPEDEFS_V1 + +/** + * \brief Size and alignment information for predefined payload entry types. + * + * The struct contains the size and the alignment size in bytes. A respective + * array for the predefined types is passed via nvtxExtModuleInfo_t to the NVTX + * client/handler. The type (ID) is used as index into this array. + */ +typedef struct nvtxPayloadEntryTypeInfo_v1 +{ + uint16_t size; + uint16_t align; +} nvtxPayloadEntryTypeInfo_t; + +/** + * \brief Binary payload data, size and decoding information. + * + * An array of type `nvtxPayloadData_t` is passed to the NVTX event attached to + * an NVTX event via the `payload.ullvalue` field of NVTX event attributes. + * + * The `schemaId` be a predefined schema entry type (`NVTX_PAYLOAD_ENTRY_TYPE*`), + * a schema ID (statically specified or dynamically created) or one of + * `NVTX_PAYLOAD_TYPE_REFERENCED` or `NVTX_PAYLOAD_TYPE_RAW`. + * + * Setting the size of a payload to `MAX_SIZE` can be useful to reduce the + * overhead of NVTX instrumentation, when no NVTX handler is attached. However, + * a tool might not be able to detect the size of a payload and thus skip it. + * A reasonable use case is a payload that represents a null-terminated + * C string, where the NVTX handler can call `strlen()`. + */ +typedef struct nvtxPayloadData_v1 +{ + /** + * The schema ID, which defines the layout of the binary data. + */ + uint64_t schemaId; + + /** + * Size of the payload (blob) in bytes. `SIZE_MAX` (`-1`) indicates the tool + * that it should figure out the size, which might not be possible. + */ + size_t size; + + /** + * Pointer to the binary payload data. + */ + const void* payload; +} nvtxPayloadData_t; + + +/** + * \brief Header of the payload entry's semantic field. + * + * If the semantic field of the payload schema entry is set, the first four + * fields (header) are defined with this type. A tool can iterate through the + * extensions and check, if it supports (can handle) it. + */ +typedef struct nvtxSemanticsHeader_v1 +{ + uint32_t structSize; /** Size of semantic extension struct. */ + uint16_t semanticId; + uint16_t version; + const struct nvtxSemanticsHeader_v1* next; /** linked list */ + /* Additional fields are defined by the specific semantic extension. */ +} nvtxSemanticsHeader_t; + +/** + * \brief Entry in a schema. + * + * A payload schema consists of an array of payload schema entries. It is + * registered with @ref nvtxPayloadSchemaRegister. `flag` can be set to `0` for + * simple values, 'type' is the only "required" field. If not set explicitly, + * all other fields are zero-initialized, which means that the entry has no name + * and the offset is determined based on self-alignment rules. + * + * Example schema: + * nvtxPayloadSchemaEntry_t schema[] = { + * {0, NVTX_EXT_PAYLOAD_TYPE_UINT8, "one byte"}, + * {0, NVTX_EXT_PAYLOAD_TYPE_INT32, "four bytes"} + * }; + */ +typedef struct nvtxPayloadSchemaEntry_v1 +{ + /** + * \brief Flags to augment the basic type. + * + * This field allows additional properties of the payload entry to be + * specified. Valid values are `NVTX_PAYLOAD_ENTRY_FLAG_*`. + */ + uint64_t flags; + + /** + * \brief Predefined payload schema entry type or custom schema ID. + * + * Predefined types are `NVTX_PAYLOAD_ENTRY_TYPE_*`. Passing a schema ID + * enables nesting of schemas. + */ + uint64_t type; + + /** + * \brief Name or label of the payload entry. (Optional) + * + * A meaningful name or label can help organizing and interpreting the data. + */ + const char* name; + + /** + * \brief Description of the payload entry. (Optional) + * + * A more detail description of the data that is stored with this entry. + */ + const char* description; + + /** + * \brief String length, array length or member selector for union types. + * + * If @ref type is a C string type, this field specifies the string length. + * + * If @ref flags specify that the entry is an array, this field specifies + * the array length. See `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_*` for more details. + * + * If @ref type is a union with schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION + * (external selection of the union member), this field contains the index + * (starting with 0) to an entry of integral type in the same schema. The + * associated field value specifies the selected union member. + * + * @note An array of schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION is not + * supported. @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR can + * be used instead. + */ + uint64_t arrayOrUnionDetail; + + /** + * \brief Offset in the binary payload data (in bytes). + * + * This field specifies the byte offset from the base address of the actual + * binary data (blob) to the start address of the data of this entry. + * + * It is recommended (but not required) to provide the offset it. Otherwise, + * the NVTX handler will determine the offset from natural alignment rules. + * In some cases, e.g. dynamic schema layouts, the offset cannot be set and + * has to be determined based on the data of prior entries. + * + * Setting the offset can also be used to skip entries during payload parsing. + */ + uint64_t offset; + + /** + * \brief Additional semantics of the payload entry. + * + * The field points to the first element in a linked list, which enables + * multiple semantic extensions. + */ + const nvtxSemanticsHeader_t* semantics; + + /** + * \brief Reserved for future use. Do not use it! + */ + const void* reserved; +} nvtxPayloadSchemaEntry_t; + + +/** + * \brief Header of the schema attribute extension field. + */ +typedef struct nvtxPayloadSchemaExtension_v1 +{ + uint32_t structSize; /** Size of schema extension struct. */ + uint16_t schemaExtId; + uint16_t version; + const struct nvtxPayloadSchemaExtension_v1* next; /** linked list */ + /* Additional fields are defined by the specific schema extension. */ +} nvtxPayloadSchemaExtension_t; + +/** + * \brief NVTX payload schema attributes. + */ +typedef struct nvtxPayloadSchemaAttr_v1 +{ + /** + * \brief Mask of valid fields in this struct. + * + * Use the `NVTX_PAYLOAD_SCHEMA_ATTR_*` defines. + */ + uint64_t fieldMask; + + /** + * \brief Name of the payload schema. (Optional) + */ + const char* name; + + /** + * \brief Payload schema type. (Mandatory) \anchor PAYLOAD_TYPE_FIELD + * + * Use the `NVTX_PAYLOAD_SCHEMA_TYPE_*` defines. + */ + uint64_t type; + + /** + * \brief Payload schema flags. (Optional) + * + * Flags defined by `NVTX_PAYLOAD_SCHEMA_FLAG_*` can be used to set + * additional properties of the schema. + */ + uint64_t flags; + + /** + * \brief Entries of a payload schema. (Mandatory) \anchor ENTRIES_FIELD + * + * This field is a pointer to an array of schema entries, each describing a + * field in a data structure, e.g. in a C struct or union. + */ + const nvtxPayloadSchemaEntry_t* entries; + + /** + * \brief Number of entries in the payload schema. (Mandatory) + * + * Number of entries in the array of payload entries \ref ENTRIES_FIELD. + */ + size_t numEntries; + + /** + * \brief The binary payload size in bytes for static payload schemas. + * + * If \ref PAYLOAD_TYPE_FIELD is @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC this + * value is ignored. If this field is not specified for a schema of type + * @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, the size can be automatically + * determined by a tool. + */ + size_t payloadStaticSize; + + /** + * \brief The byte alignment for packed structures. + * + * If not specified, this field defaults to `0`, which means that the fields + * in the data structure are not packed and natural alignment rules can be + * applied. + */ + size_t packAlign; + + /* Static/custom schema ID must be + >= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START and + < NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START */ + uint64_t schemaId; + + /* Flexible extension for schema attributes. */ + void* extension; +} nvtxPayloadSchemaAttr_t; + +/** + * \brief This type is used to describe an enumeration. + * + * Since the value of an enum entry might not be meaningful for the analysis + * and/or visualization, a tool can show the name of enum entry instead. + * + * An array of this struct is passed to @ref nvtxPayloadEnumAttr_t::entries to be + * finally registered via @ref nvtxPayloadEnumRegister with the NVTX handler. + * + * @note EXPERIMENTAL + */ +typedef struct nvtxPayloadEnum_v1 +{ + /** + * Name of the enum value. + */ + const char* name; + + /** + * Value of the enum entry. + */ + uint64_t value; + + /** + * Indicates that this entry sets a specific set of bits, which can be used + * to define bitsets. + */ + int8_t isFlag; +} nvtxPayloadEnum_t; + +/** + * \brief NVTX payload enumeration type attributes. + * + * A pointer to this struct is passed to @ref nvtxPayloadEnumRegister. + */ +typedef struct nvtxPayloadEnumAttr_v1 +{ + /** + * Mask of valid fields in this struct. See `NVTX_PAYLOAD_ENUM_ATTR_*`. + */ + uint64_t fieldMask; + + /** + * Name of the enum. (Optional) + */ + const char* name; + + /** + * Entries of the enum. (Mandatory) + */ + const nvtxPayloadEnum_t* entries; + + /** + * Number of entries in the enum. (Mandatory) + */ + size_t numEntries; + + /** + * Size of enumeration type in bytes + */ + size_t sizeOfEnum; + + /** + * Static/custom schema ID must be + * >= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START and + * < NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START + */ + uint64_t schemaId; + + /* Flexible extension for enumeration attributes. */ + void* extension; +} nvtxPayloadEnumAttr_t; + +typedef struct nvtxScopeAttr_v1 +{ + size_t structSize; + + /** Path delimited by '/' characters, relative to parentScope. Leading + slashes are ignored. Nodes in the path may use name[key] syntax to indicate + an array of sibling nodes, which may be combined with other non-array nodes + or different arrays at the same scope. Node names should be UTF8 printable + characters, excluding '/', '[', and ']' characters which have special + meaning here. An empty C string "" and `NULL` are valid inputs and treated + equivalently. */ + const char* path; + + uint64_t parentScope; + + /** The static scope ID must be unique within the domain, + >= NVTX_EVENT_SCOPE_ID_STATIC_START, and + < NVTX_EVENT_SCOPE_ID_DYNAMIC_START. */ + uint64_t scopeId; +} nvtxScopeAttr_t; + +/** + * \brief Helper struct to submit a batch of events or counters. + * \category DeferredEvents + * + * The event scope is assumed to be the execution context of the call to + * `nvtxSubmitDeferred`. It can be specified via the event scope semantics for + * payload entries in `batches` or via `NVTX_PAYLOAD_ENTRY_TYPE_EVENT_SCOPE_ID` + * in `commonData`. + * + * Timestamp arrays or start time plus interval pairs can be provided multiple + * times via `batches`. Each can be associated to a specific batch via the bind + * semantics. If there is only a single timestamp batch provided, it is assumed + * to bind to all value types of all other batches. + * By default, events are assumed to be chronologically sorted. Otherwise, array + * semantics can be used to specify the ordering. + */ +typedef struct nvtxDeferredEvents_v1 +{ + /** + * Pointer to the first element of an array of payload data. + * Each data batch is either a set of events, counters or timestamps. + */ + nvtxPayloadData_t* batches; + + /* Number of data batches. */ + size_t numBatches; + + /** + * Common data might contain an NVTX category, NVTX color, NVTX event scope + * or any other value that should be shown/associated with all events or + * counters. `0` indicates that there is no common data. + * + * If an entry in common data specifies a binding (via the binding semantics), + * the respective value is only associated to the specified payload. + */ + nvtxPayloadData_t* commonData; +} nvtxDeferredEvents_t; + +/* Forward declaration of opaque event scope registration structure */ +struct nvtxTimeDomainRegistration_st; +typedef struct nvtxTimeDomainRegistration_st nvtxTimeDomainRegistration; + +/** + * \brief Time domain handle structure. + * + * This structure is opaque to the user and is used as a handle to reference + * a time domain. This type is returned from tools when using the NVTX API to + * create a time domain. + */ +typedef nvtxTimeDomainRegistration* nvtxTimeDomainHandle_t; + +/** + * \brief Function signature to be used by `nvtxTimestampProvideSource`. + * + * NVTX-instrumented code provides the timestamps via a function of this type. + */ +typedef int64_t (*nvtxTimestampProviderFn)(); + +/** + * \brief Function signature to be used by `nvtxTimestampProvideSourceEx`. + * + * NVTX-instrumented code provides the timestamps via a function of this type. + */ +typedef int64_t (*nvtxTimestampProviderExFn)(void* dataPtr); + +#endif /* NVTX_PAYLOAD_TYPEDEFS_V1 */ + +#ifndef NVTX_PAYLOAD_API_FUNCTIONS_V1 +#define NVTX_PAYLOAD_API_FUNCTIONS_V1 + +/** + * \brief Register a payload schema. + * + * @param domain NVTX domain handle. + * @param attr NVTX payload schema attributes. + */ +NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadSchemaRegister( + nvtxDomainHandle_t domain, const nvtxPayloadSchemaAttr_t* attr); + + +/** + * \brief Register an enumeration type with the payload extension. + * + * @param domain NVTX domain handle + * @param attr NVTX payload enumeration type attributes. + */ +NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadEnumRegister(nvtxDomainHandle_t domain, + const nvtxPayloadEnumAttr_t* attr); + + +/** + * \brief Register a scope for deferred counters and events. + * + * @param domain NVTX domain handle (0 for default domain) + * @param attr Event scope attributes. + * + * @return An identifier for the scope. + */ +NVTX_DECLSPEC uint64_t NVTX_API nvtxScopeRegister(nvtxDomainHandle_t domain, + const nvtxScopeAttr_t* attr); + +/** + * \brief Marks an instantaneous event in the application with the attributes + * being passed via the extended payload. + * + * An NVTX handler can assume that the payload contains the event message. + * Otherwise, it might ignore the event. + * + * @param domain NVTX domain handle + * @param payloadData pointer to an array of structured payloads. + * @param count number of payload BLOBs. + */ +NVTX_DECLSPEC void NVTX_API nvtxMarkPayload(nvtxDomainHandle_t domain, + const nvtxPayloadData_t* payloadData, size_t count); + +/** + * \brief Begin a nested thread range with the attributes being passed via the + * payload. + * + * @param domain NVTX domain handle + * @param payloadData pointer to an array of structured payloads. + * @param count number of payload BLOBs. + * + * @return The level of the range being ended. If an error occurs a negative + * value is returned on the current thread. + */ +NVTX_DECLSPEC int NVTX_API nvtxRangePushPayload(nvtxDomainHandle_t domain, + const nvtxPayloadData_t* payloadData, size_t count); + +/** + * \brief End a nested thread range with an additional custom payload. + * + * NVTX event attributes passed to this function (via the payloads) overwrite + * event attributes (message and color) that have been set in the push event. + * Other payload entries extend the data of the range. + * + * @param domain NVTX domain handle + * @param payloadData pointer to an array of structured payloads. + * @param count number of payload BLOBs. + * + * @return The level of the range being ended. If an error occurs a negative + * value is returned on the current thread. + */ +NVTX_DECLSPEC int NVTX_API nvtxRangePopPayload(nvtxDomainHandle_t domain, + const nvtxPayloadData_t* payloadData, size_t count); + +/** + * \brief Start a thread range with attributes passed via the extended payload. + * + * @param domain NVTX domain handle + * @param payloadData pointer to an array of structured payloads. + * @param count number of payload BLOBs. + * + * @return The level of the range being ended. If an error occurs a negative + * value is returned on the current thread. + */ +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartPayload(nvtxDomainHandle_t domain, + const nvtxPayloadData_t* payloadData, size_t count); + +/** + * \brief End a thread range and pass a custom payload. + * + * NVTX event attributes passed to this function (via the payloads) overwrite + * event attributes (message and color) that have been set in the start event. + * Other payload entries extend the data of the range. + * + * @param domain NVTX domain handle + * @param id The correlation ID returned from a NVTX range start call. + * @param payloadData pointer to an array of structured payloads. + * @param count number of payload BLOBs. + */ +NVTX_DECLSPEC void NVTX_API nvtxRangeEndPayload(nvtxDomainHandle_t domain, + nvtxRangeId_t id, const nvtxPayloadData_t* payloadData, size_t count); + +/** + * \brief Start a range with event attributes and an extended payload. + * + * @param domain NVTX domain handle (0 for default domain) + * @param evtAttr pointer to NVTX event attribute. + * @param schemaId NVTX payload schema ID + * @param plAddr Pointer to the binary data (actual payload) + * @param size Size of the binary payload data in bytes. + * + * @return range ID (type is `nvtxRangeId_t`) + */ +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxPayloadRangeStart(nvtxDomainHandle_t domain, + nvtxEventAttributes_t* evtAttr, uint64_t schemaId, void* plAddr, size_t size); + +/** + * @brief Checks if an NVTX domain is enabled (unofficial and may not work) + * + * @param domain NVTX domain handle + * @return 0 if the domain is not enabled. + */ +NVTX_DECLSPEC uint8_t NVTX_API nvtxDomainIsEnabled(nvtxDomainHandle_t domain); + +/** + * \brief Report a push-pop range in a single call. + * \category NsysInternal + * + * This function is called at range pop. Thus, the NVTX handler will immediately + * take a timestamp (if timing is desired). The timestamp of the push operation + * is passed as argument and can be retrieved via `nvtxTimestampGet()`. + * + * The NVTX handler can assume that no other push operation happend in the same + * domain in between the push and the pop time of the reported range. + * + * @param domain The domain of scoping. + * @param eventAttrib The event attribute structure defining the range's + * attribute types and attribute values. + * @param pushTime The timestamp of the push operation (use `nvtxTimestampGet()`). + */ +NVTX_DECLSPEC void NVTX_API nvtxRangePushPop(nvtxDomainHandle_t domain, + const nvtxEventAttributes_t* eventAttrib, uint64_t pushTime); + +/** + * \brief Get a timestamp from the attached NVTX handler/tool. + * + * The timestamp is intended to be passed ... + * The time source is assumed to be TSC. + */ +NVTX_DECLSPEC int64_t NVTX_API nvtxTimestampGet(void); + +/** + * \brief Register a time domain using an existing event scope. + * \category DeferredEvents + * + * @param eventScope + * @return nvtxTimeDomainHandle_t + */ +NVTX_DECLSPEC nvtxTimeDomainHandle_t NVTX_API nvtxTimestampDomainRegister( + uint64_t eventScope); + +/** + * \brief Let the NVTX instrumented code provide its timer. + * \category DeferredEvents + * + * This enables the tool to do the time synchronization. + * + * @param fnPtr has to valid to be called all the time. + * typedef uint64_t (*)() + */ +NVTX_DECLSPEC void NVTX_API nvtxTimestampProvideSource( + nvtxTimeDomainHandle_t timeDomain, nvtxTimestampProviderExFn fnPtr, void* dataPtr); + +/** + * \brief The synchronization point between two time domains is provided by the + * NVTX-instrumented code. + * \category DeferredEvents + * + * @param domain1 first time domain + * @param timestamp1 timestamp in first time domain + * @param domain2 second time domain + * @param timestamp2 timestamp in second time domain + */ +NVTX_DECLSPEC void NVTX_API nvtxTimestampProvideSyncPoint( + nvtxTimeDomainHandle_t domain1, int64_t timestamp1, + nvtxTimeDomainHandle_t domain2, int64_t timestamp2); + +/** + * \brief Submit deferred events of the same type in the given domain. + * \category DeferredEvents + * + * @param domain NVTX domain + * @param events pointer to deferred events helper struct. + */ +NVTX_DECLSPEC void NVTX_API nvtxSubmitDeferred(nvtxDomainHandle_t domain, + const nvtxDeferredEvents_t* events); + +#endif /* NVTX_PAYLOAD_API_FUNCTIONS_V1 */ + +#ifndef NVTX_PAYLOAD_CALLBACK_ID_V1 +#define NVTX_PAYLOAD_CALLBACK_ID_V1 +/** + * \brief Callback Ids of API functions in the payload extension. + * + * The NVTX handler can use these values to register a handler function. When + * InitializeInjectionNvtxExtension(nvtxExtModuleInfo_t* moduleInfo) is + * executed, a handler routine 'handlenvtxPayloadRegisterSchema' can be + * registered as follows: + * \code{.c} + * moduleInfo->segments->slots[NVTX3EXT_CBID_nvtxPayloadSchemaRegister] = + * (intptr_t)YourPayloadRegisterSchemaHandlerFn; + * \endcode + */ +#define NVTX3EXT_CBID_nvtxPayloadSchemaRegister 0 +#define NVTX3EXT_CBID_nvtxPayloadEnumRegister 1 +#define NVTX3EXT_CBID_nvtxMarkPayload 2 +#define NVTX3EXT_CBID_nvtxRangePushPayload 3 +#define NVTX3EXT_CBID_nvtxRangePopPayload 4 +#define NVTX3EXT_CBID_nvtxRangeStartPayload 5 +#define NVTX3EXT_CBID_nvtxRangeEndPayload 6 +/* The following are not official and may change. */ +#define NVTX3EXT_CBID_nvtxDomainIsEnabled 7 +#define NVTX3EXT_CBID_nvtxTimestampGet 8 +#define NVTX3EXT_CBID_nvtxTimestampDomainRegister 9 +#define NVTX3EXT_CBID_nvtxTimestampProvideSource 10 +#define NVTX3EXT_CBID_nvtxTimestampProvideSyncPoint 11 +#define NVTX3EXT_CBID_nvtxScopeRegister 12 +#define NVTX3EXT_CBID_nvtxSubmitDeferred 13 +/* For internal use only. */ +#define NVTX3EXT_CBID_nvtxRangePushPop 62 +#endif /* NVTX_PAYLOAD_CALLBACK_ID_V1 */ + +/*** Helper utilities ***/ + +/** \brief Helper macro for safe double-cast of pointer to uint64_t value. */ +#ifndef NVTX_POINTER_AS_PAYLOAD_ULLVALUE +# ifdef __cplusplus +# define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) \ + static_cast(reinterpret_cast(p)) +# else +#define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) ((uint64_t)(uintptr_t)p) +# endif +#endif + +#ifndef NVTX_PAYLOAD_EVTATTR_SET_DATA +/** + * \brief Helper macro to attach a single payload to an NVTX event attribute. + * + * @param evtAttr NVTX event attribute (variable name) + * @param pldata_addr Adress of `nvtxPayloadData_t` variable. + * @param schema_id NVTX binary payload schema ID. + * @param pl_addr Address of the (actual) payload. + * @param sz size of the (actual) payload. + */ +#define NVTX_PAYLOAD_EVTATTR_SET_DATA(evtAttr, pldata_addr, schema_id, pl_addr, sz) \ + (pldata_addr)->schemaId = schema_id; \ + (pldata_addr)->size = sz; \ + (pldata_addr)->payload = pl_addr; \ + (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata_addr); \ + (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \ + (evtAttr).reserved0 = 1; +#endif /* NVTX_PAYLOAD_EVTATTR_SET_DATA */ + +#ifndef NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE +/** + * \brief Helper macro to attach multiple payloads to an NVTX event attribute. + * + * @param evtAttr NVTX event attribute (variable name) + * @param pldata Payload data array (of type `nvtxPayloadData_t`) + */ +#define NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE(evtAttr, pldata) \ + (evtAttr).payloadType = NVTX_PAYLOAD_TYPE_EXT; \ + (evtAttr).reserved0 = sizeof(pldata)/sizeof(nvtxPayloadData_t); \ + (evtAttr).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(pldata); +#endif /* NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE */ + +#ifndef NVTX_PAYLOAD_EVTATTR_SET +/* + * Do not use this macro directly! It is a helper to attach a single payload to + * an NVTX event attribute. + * @warning The NVTX push, start or mark operation must not be in an outer scope. + */ +#define NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schema_id, pl_addr, sz) \ + nvtxPayloadData_t _NVTX_PAYLOAD_DATA_VAR[] = \ + {{schema_id, sz, pl_addr}}; \ + (evtAttr)->payload.ullValue = \ + NVTX_POINTER_AS_PAYLOAD_ULLVALUE(_NVTX_PAYLOAD_DATA_VAR); \ + (evtAttr)->payloadType = NVTX_PAYLOAD_TYPE_EXT; \ + (evtAttr)->reserved0 = 1; +#endif /* NVTX_PAYLOAD_EVTATTR_SET */ + +#ifndef nvtxPayloadRangePush +/** + * \brief Push a range with extended payload. + * + * @param domain NVTX domain handle (0 for default domain) + * @param evtAttr pointer to NVTX event attribute. + * @param schemaId NVTX payload schema ID + * @param plAddr Pointer to the binary data (actual payload) + * @param size Size of the binary payload data in bytes. + */ +#define nvtxPayloadRangePush(domain, evtAttr, schemaId, plAddr, size) \ +do { \ + NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \ + nvtxDomainRangePushEx(domain, evtAttr); \ +} while (0) +#endif /* nvtxPayloadRangePush */ + +#ifndef nvtxPayloadMark +/** + * \brief Set a marker with extended payload. + * + * @param domain NVTX domain handle (0 for default domain) + * @param evtAttr pointer to NVTX event attribute. + * @param schemaId NVTX payload schema ID + * @param plAddr Pointer to the binary data (actual payload) + * @param size Size of the binary payload data in bytes. + */ +#define nvtxPayloadMark(domain, evtAttr, schemaId, plAddr, size) \ +do { \ + NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) \ + nvtxDomainMarkEx(domain, evtAttr); \ +} while (0) +#endif /* nvtxPayloadMark */ + + +/** + * Helper macro to get the ID of the currently active thread. + */ +#ifndef nvtxGetActiveThreadId +#ifdef __cplusplus +#define _NVTX_PAYLOAD_CAST_TO_U64(v) static_cast(v) +#else +#define _NVTX_PAYLOAD_CAST_TO_U64(v) (uint64_t)(v) +#endif + +#if defined(_WIN32) +#include +#define nvtxGetActiveThreadId() _NVTX_PAYLOAD_CAST_TO_U64(GetCurrentThreadId()) +#elif defined(__ANDROID__) +#include +#define nvtxGetActiveThreadId() _NVTX_PAYLOAD_CAST_TO_U64(gettid()) +#elif defined(MAC_OS_X) +#define nvtxGetActiveThreadId() _NVTX_PAYLOAD_CAST_TO_U64(syscall(SYS_thread_selfid)) +#elif defined(__linux__) || defined(__CYGWIN__) +#define nvtxGetActiveThreadId() _NVTX_PAYLOAD_CAST_TO_U64(syscall(SYS_gettid)) +#endif +#undef _NVTX_PAYLOAD_CAST_TO_U64 +#endif /* nvtxGetActiveThreadId */ + +#ifdef __GNUC__ +#pragma GCC visibility push(internal) +#endif + +/* Extension types are required for the implementation and the NVTX handler. */ +#define NVTX_EXT_TYPES_GUARD +#include "nvtxExtDetail/nvtxExtTypes.h" +#undef NVTX_EXT_TYPES_GUARD + +#ifndef NVTX_NO_IMPL +#define NVTX_EXT_IMPL_PAYLOAD_GUARD +#include "nvtxExtDetail/nvtxExtImplPayload_v1.h" +#undef NVTX_EXT_IMPL_PAYLOAD_GUARD +#endif /* NVTX_NO_IMPL */ + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtHelperMacros.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtHelperMacros.h new file mode 100644 index 0000000..fb1c3dc --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtHelperMacros.h @@ -0,0 +1,31 @@ +/* +* Copyright 2023 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_EXT_HELPER_MACROS_H +#define NVTX_EXT_HELPER_MACROS_H + +/* Combine tokens */ +#define _NVTX_EXT_CONCAT(a, b) a##b +#define NVTX_EXT_CONCAT(a, b) _NVTX_EXT_CONCAT(a, b) + +/* Resolves to the number of arguments passed. */ +#define NVTX_EXT_NUM_ARGS(...) \ + NVTX_EXT_SELECTA16(__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, throwaway) +#define NVTX_EXT_SELECTA16(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, ...) a16 + +/* Cast argument(s) to void to prevent unused variable warnings. */ +#define _NVTX_EXT_VOIDIFY1(a1) (void)a1; +#define _NVTX_EXT_VOIDIFY2(a1, a2) (void)a1; (void)a2; +#define _NVTX_EXT_VOIDIFY3(a1, a2, a3) (void)a1; (void)a2; (void)a3; +#define _NVTX_EXT_VOIDIFY4(a1, a2, a3, a4) (void)a1; (void)a2; (void)a3; (void)a4; + +/* Mark function arguments as unused. */ +#define NVTX_EXT_HELPER_UNUSED_ARGS(...) \ + NVTX_EXT_CONCAT(_NVTX_EXT_VOIDIFY, NVTX_EXT_NUM_ARGS(__VA_ARGS__))(__VA_ARGS__) + +#endif /* NVTX_EXT_HELPER_MACROS_H */ \ No newline at end of file diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImpl.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImpl.h new file mode 100644 index 0000000..078ca29 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImpl.h @@ -0,0 +1,102 @@ +/* +* Copyright 2009-2020 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_EXT_IMPL_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +#ifndef NVTX_EXT_IMPL_H +#define NVTX_EXT_IMPL_H +/* ---- Include required platform headers ---- */ + +#if defined(_WIN32) + +#include + +#else +#include + +#if defined(__ANDROID__) +#include +#endif + +#if defined(__linux__) || defined(__CYGWIN__) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#endif + +/* ---- Define macros used in this file ---- */ + +#ifdef NVTX_DEBUG_PRINT +#ifdef __ANDROID__ +#include +#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__); +#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__); +#else +#include +#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__) +#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__) +#endif +#else /* !defined(NVTX_DEBUG_PRINT) */ +#define NVTX_ERR(...) +#define NVTX_INFO(...) +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +/* +#ifdef __GNUC__ +#pragma GCC visibility push(hidden) +#endif +*/ +#define NVTX_EXTENSION_FRESH 0 +#define NVTX_EXTENSION_DISABLED 1 +#define NVTX_EXTENSION_STARTING 2 +#define NVTX_EXTENSION_LOADED 3 + +/* Function slots are local to each extension now! */ +typedef struct nvtxExtGlobals1_t +{ + NvtxExtInitializeInjectionFunc_t injectionFnPtr; +} nvtxExtGlobals1_t; + +NVTX_LINKONCE_DEFINE_GLOBAL nvtxExtGlobals1_t NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1) = +{ + (NvtxExtInitializeInjectionFunc_t)0 +}; + +#define NVTX_EXT_INIT_GUARD +#include "nvtxExtInit.h" +#undef NVTX_EXT_INIT_GUARD +/* +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif +*/ +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* NVTX_EXT_IMPL_H */ \ No newline at end of file diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h new file mode 100644 index 0000000..fe6a616 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h @@ -0,0 +1,208 @@ +/* +* Copyright 2021-2023 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined). +#endif + +#define NVTX_EXT_IMPL_GUARD +#include "nvtxExtImpl.h" +#undef NVTX_EXT_IMPL_GUARD + +#ifndef NVTX_EXT_IMPL_PAYLOAD_V1 +#define NVTX_EXT_IMPL_PAYLOAD_V1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Macros to create versioned symbols. */ +#define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) \ + NAME##_v##VERSION##_bpl##COMPATID +#define NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, VERSION, COMPATID) \ + NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L3(NAME, VERSION, COMPATID) +#define NVTX_EXT_PAYLOAD_VERSIONED_ID(NAME) \ + NVTX_EXT_PAYLOAD_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION, NVTX_EXT_PAYLOAD_COMPATID) + +#ifdef NVTX_DISABLE + +#include "nvtxExtHelperMacros.h" + +#define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_val, fn_name, signature, arg_names) \ +ret_val fn_name signature { \ + NVTX_EXT_HELPER_UNUSED_ARGS arg_names \ + return ((ret_val)(intptr_t)-1); \ +} + +#else /* NVTX_DISABLE */ + +#include "nvtxExtPayloadTypeInfo.h" + +/* + * Function slots for the payload extension. First entry is the module state, + * initialized to `0` (`NVTX_EXTENSION_FRESH`). + */ +#define NVTX_EXT_PAYLOAD_SLOT_COUNT 63 +NVTX_LINKONCE_DEFINE_GLOBAL intptr_t +NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX_EXT_PAYLOAD_SLOT_COUNT + 1] + = {0}; + +/* Avoid warnings about missing prototype. */ +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(void); +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)() +{ + intptr_t* fnSlots = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots) + 1; + nvtxExtModuleSegment_t segment = { + 0, /* unused (only one segment) */ + NVTX_EXT_PAYLOAD_SLOT_COUNT, + fnSlots + }; + + nvtxExtModuleInfo_t module = { + NVTX_VERSION, sizeof(nvtxExtModuleInfo_t), + NVTX_EXT_PAYLOAD_MODULEID, NVTX_EXT_PAYLOAD_COMPATID, + 1, &segment, /* number of segments, segments */ + NULL, /* no export function needed */ + /* bake type sizes and alignment information into program binary */ + &(NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo)) + }; + + NVTX_INFO( "%s\n", __FUNCTION__ ); + + NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce)(&module, + NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)); +} + +#define NVTX_EXT_PAYLOAD_IMPL_FN_V1(ret_type, fn_name, signature, arg_names) \ +typedef ret_type (*fn_name##_impl_fntype)signature; \ +/*NVTX_LINKONCE_DEFINE_FUNCTION*/ ret_type NVTX_API fn_name signature { \ + intptr_t slot = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \ + if (slot != NVTX_EXTENSION_DISABLED) { \ + if (slot != NVTX_EXTENSION_FRESH) { \ + return (*(fn_name##_impl_fntype)slot) arg_names; \ + } else { \ + NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(); \ + /* Re-read function slot after extension initialization. */ \ + slot = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \ + if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \ + return (*(fn_name##_impl_fntype)slot) arg_names; \ + } \ + } \ + } \ + NVTX_EXT_FN_RETURN_INVALID(ret_type) \ +} + +#define NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(fn_name, signature, arg_names) \ +typedef void (*fn_name##_impl_fntype)signature; \ +/*NVTX_LINKONCE_DEFINE_FUNCTION*/ void NVTX_API fn_name signature { \ + intptr_t slot = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \ + if (slot != NVTX_EXTENSION_DISABLED) { \ + if (slot != NVTX_EXTENSION_FRESH) { \ + (*(fn_name##_impl_fntype)slot) arg_names; \ + } else { \ + NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadInitOnce)(); \ + /* Re-read function slot after extension initialization. */ \ + slot = NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadSlots)[NVTX3EXT_CBID_##fn_name + 1]; \ + if (slot != NVTX_EXTENSION_DISABLED && slot != NVTX_EXTENSION_FRESH) { \ + (*(fn_name##_impl_fntype)slot) arg_names; \ + } \ + } \ + } \ +} + +#endif /*NVTX_DISABLE*/ + +/* Non-void functions. */ +#define NVTX_EXT_FN_RETURN_INVALID(rtype) return ((rtype)(intptr_t)-1); + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadSchemaRegister, + (nvtxDomainHandle_t domain, const nvtxPayloadSchemaAttr_t* attr), + (domain, attr)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxPayloadEnumRegister, + (nvtxDomainHandle_t domain, const nvtxPayloadEnumAttr_t* attr), + (domain, attr)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePushPayload, + (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count), + (domain, payloadData, count)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(int, nvtxRangePopPayload, + (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count), + (domain, payloadData, count)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(nvtxRangeId_t, nvtxRangeStartPayload, + (nvtxDomainHandle_t domain, const nvtxPayloadData_t* payloadData, size_t count), + (domain, payloadData, count)) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint8_t, nvtxDomainIsEnabled, (nvtxDomainHandle_t domain), (domain)) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1(int64_t, nvtxTimestampGet, (void), ()) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1(nvtxTimeDomainHandle_t, nvtxTimestampDomainRegister, (uint64_t eventScope), (eventScope)) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxTimestampProvideSource, + (nvtxTimeDomainHandle_t timeDomain, nvtxTimestampProviderExFn fnPtr, void* dataPtr), + (timeDomain, fnPtr, dataPtr)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1(uint64_t, nvtxScopeRegister, (nvtxDomainHandle_t domain, + const nvtxScopeAttr_t* attr), (domain, attr)) + +#undef NVTX_EXT_FN_RETURN_INVALID +/* END: Non-void functions. */ + +/* void functions. */ +#define NVTX_EXT_FN_RETURN_INVALID(rtype) +#define return + +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxMarkPayload, (nvtxDomainHandle_t domain, + const nvtxPayloadData_t* payloadData, size_t count), (domain, payloadData, count)) + +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxRangeEndPayload, (nvtxDomainHandle_t domain, + nvtxRangeId_t id, const nvtxPayloadData_t* payloadData, size_t count), + (domain, id, payloadData, count)) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxTimestampProvideSyncPoint, + (nvtxTimeDomainHandle_t domain1, int64_t timestamp1, + nvtxTimeDomainHandle_t domain2, int64_t timestamp2), + (domain1, timestamp1, domain2, timestamp2)) + +/* Experimental */ +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxSubmitDeferred, + (nvtxDomainHandle_t domain, const nvtxDeferredEvents_t* events), + (domain, events)) + +#undef return +#undef NVTX_EXT_FN_RETURN_INVALID +/* END: void functions. */ + +NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxPayloadRangeStart( + nvtxDomainHandle_t domain, nvtxEventAttributes_t* evtAttr, + uint64_t schemaId, void* plAddr, size_t size) +{ + NVTX_PAYLOAD_EVTATTR_SET(evtAttr, schemaId, plAddr, size) + return nvtxDomainRangeStartEx(domain, evtAttr); +} + +NVTX_EXT_PAYLOAD_IMPL_FN_V1_VOID(nvtxRangePushPop, (nvtxDomainHandle_t domain, + const nvtxEventAttributes_t* evtAttr, uint64_t pushTime), + (domain, evtAttr, pushTime)) + +/* Keep NVTX_EXT_PAYLOAD_IMPL_FN_V1 defined for a future version of this extension. */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* NVTX_EXT_IMPL_PAYLOAD_V1 */ + diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtInit.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtInit.h new file mode 100644 index 0000000..5f1cf94 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtInit.h @@ -0,0 +1,378 @@ +/* +* Copyright 2009-2023 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_EXT_INIT_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined). +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* ---- Platform-independent helper definitions and functions ---- */ + +/* Prefer macros over inline functions to reduce symbol resolution at link time */ + +#if defined(_WIN32) +#define NVTX_PATHCHAR wchar_t +#define NVTX_STR(x) L##x +#define NVTX_GETENV _wgetenv +#define NVTX_BUFSIZE MAX_PATH +#define NVTX_DLLHANDLE HMODULE +#define NVTX_DLLOPEN(x) LoadLibraryW(x) +#define NVTX_DLLFUNC GetProcAddress +#define NVTX_DLLCLOSE FreeLibrary +#define NVTX_YIELD() SwitchToThread() +#define NVTX_MEMBAR() MemoryBarrier() +#define NVTX_ATOMIC_WRITE_32(address, value) InterlockedExchange((volatile LONG*)address, value) +#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) old = InterlockedCompareExchange((volatile LONG*)address, exchange, comparand) +#define NVTX_ATOMIC_WRITE_PTR(address, value) InterlockedExchangePointer((volatile PVOID*)address, (PVOID)value) +#define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) old = (intptr_t)InterlockedCompareExchangePointer((volatile PVOID*)address, (PVOID)exchange, (PVOID)comparand) + + +#elif defined(__GNUC__) +#define NVTX_PATHCHAR char +#define NVTX_STR(x) x +#define NVTX_GETENV getenv +#define NVTX_BUFSIZE PATH_MAX +#define NVTX_DLLHANDLE void* +#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY) +#define NVTX_DLLFUNC dlsym +#define NVTX_DLLCLOSE dlclose +#define NVTX_YIELD() sched_yield() +#define NVTX_MEMBAR() __sync_synchronize() +/* Ensure full memory barrier for atomics, to match Windows functions. */ +#define NVTX_ATOMIC_WRITE_32(address, value) __sync_synchronize(); __sync_lock_test_and_set(address, value) +#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand) +#define NVTX_ATOMIC_WRITE_PTR(address, value) __sync_synchronize(); __sync_lock_test_and_set(address, value) +#define NVTX_ATOMIC_CAS_PTR(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand) +#else +#error The library does not support your configuration! +#endif + +/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */ +#if defined(_WIN32) +/* TODO */ +#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0 +#else +#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0 +#endif + +/* Define this to 1 for platforms that support environment variables. */ +/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */ +/* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */ +#define NVTX_SUPPORT_ENV_VARS 1 + +/* Define this to 1 for platforms that support dynamic/shared libraries */ +#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1 + +/* Injection libraries implementing InitializeInjectionNvtxExtension may be statically linked, + * which will override any dynamic injection. This is useful for platforms, where dynamic + * injection is not available. Since weak symbols, not explicitly marked extern, are + * guaranteed to be initialized to zero, if no definitions are found by the linker, the + * dynamic injection process proceeds normally, if pfnInitializeInjectionNvtx2 is 0. */ +#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__) +#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1 +/* To statically inject an NVTX library, define InitializeInjectionNvtxExtension_fnptr as a normal + * symbol (not weak) pointing to the implementation of InitializeInjectionNvtxExtension, which + * does not need to be named "InitializeInjectionNvtxExtension" as it is necessary in a dynamic + * injection library. */ +__attribute__((weak)) NvtxExtInitializeInjectionFunc_t InitializeInjectionNvtxExtension_fnptr; +#else +#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0 +#endif + + + +/* This function tries to find or load an NVTX injection library and get the address of its + * `InitializeInjectionExtension` function. If such a function pointer is found, it is called and + * passed the address of this NVTX instance's `nvtxGetExportTable` function, so that the injection + * can attach to this instance. + * If the initialization fails for any reason, any dynamic library loaded will be freed, and all + * NVTX implementation functions will be set to no-ops. If the initialization succeeds, NVTX + * functions that are not attached to the tool will be set to no-ops. This is implemented as one + * function instead of several small functions to minimize the number of weak symbols the linker + * must resolve. The order of search is: + * 1) Pre-injected library exporting InitializeInjectionNvtxExtension + * 2) Loadable library exporting InitializeInjectionNvtxExtension + * - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64) + * - On Android, libNvtxInjection??.so within the package (?? is 32 or 64) + * 3) Statically-linked injection library defining InitializeInjectionNvtx2_fnptr + */ +NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)( + NvtxExtInitializeInjectionFunc_t* out_init_fnptr); +NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)( + NvtxExtInitializeInjectionFunc_t* out_init_fnptr) +{ + const char* const initFuncName = "InitializeInjectionNvtxExtension"; + NvtxExtInitializeInjectionFunc_t init_fnptr = (NvtxExtInitializeInjectionFunc_t)0; + NVTX_DLLHANDLE injectionLibraryHandle = (NVTX_DLLHANDLE)0; + + if (out_init_fnptr) + { + *out_init_fnptr = (NvtxExtInitializeInjectionFunc_t)0; + } + +#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY + /* Use POSIX global symbol chain to query for init function from any module. */ + init_fnptr = (NvtxExtInitializeInjectionFunc_t)NVTX_DLLFUNC(0, initFuncName); +#endif + +#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY + /* Try discovering dynamic injection library to load */ + if (!init_fnptr) + { +#if NVTX_SUPPORT_ENV_VARS + /* If env var NVTX_INJECTION64_PATH is set, it should contain the path + to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */ + const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4) + ? NVTX_STR("NVTX_INJECTION32_PATH") + : NVTX_STR("NVTX_INJECTION64_PATH"); +#endif /* NVTX_SUPPORT_ENV_VARS */ + NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE]; + const NVTX_PATHCHAR* injectionLibraryPath = (const NVTX_PATHCHAR*)0; + + /* Refer to this variable explicitly in case all references to it are #if'ed out. */ + (void)injectionLibraryPathBuf; + +#if NVTX_SUPPORT_ENV_VARS + /* Disable the warning for getenv & _wgetenv -- this usage is safe because + these functions are not called again before using the returned value. */ +#if defined(_MSC_VER) +#pragma warning( push ) +#pragma warning( disable : 4996 ) +#endif + injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName); +#if defined(_MSC_VER) +#pragma warning( pop ) +#endif +#endif + +#if defined(__ANDROID__) + if (!injectionLibraryPath) + { + const char *bits = (sizeof(void*) == 4) ? "32" : "64"; + char cmdlineBuf[32]; + char pkgName[PATH_MAX]; + int count; + int pid; + FILE *fp; + size_t bytesRead; + size_t pos; + + pid = (int)getpid(); + count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid); + if (count <= 0 || count >= (int)sizeof(cmdlineBuf)) + { + NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + fp = fopen(cmdlineBuf, "r"); + if (!fp) + { + NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp); + fclose(fp); + if (bytesRead == 0) + { + NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + pkgName[bytesRead] = 0; + + /* String can contain colon as a process separator. In this case the + package name is before the colon. */ + pos = 0; + while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0') + { + ++pos; + } + pkgName[pos] = 0; + + count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits); + if (count <= 0 || count >= NVTX_BUFSIZE) + { + NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + + /* On Android, verify path is accessible due to aggressive file access restrictions. */ + /* For dlopen, if the filename contains a leading slash, then it is interpreted as a */ + /* relative or absolute pathname; otherwise it will follow the rules in ld.so. */ + if (injectionLibraryPathBuf[0] == '/') + { +#if (__ANDROID_API__ < 21) + int access_err = access(injectionLibraryPathBuf, F_OK | R_OK); +#else + int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0); +#endif + if (access_err != 0) + { + NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf); + return NVTX_ERR_INIT_ACCESS_LIBRARY; + } + } + injectionLibraryPath = injectionLibraryPathBuf; + } +#endif + + /* At this point, `injectionLibraryPath` is specified if a dynamic + injection library was specified by a tool. */ + if (injectionLibraryPath) + { + /* Load the injection library */ + injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath); + if (!injectionLibraryHandle) + { + NVTX_ERR("Failed to load injection library\n"); + return NVTX_ERR_INIT_LOAD_LIBRARY; + } + else + { + /* Attempt to get the injection library's entry-point. */ + init_fnptr = (NvtxExtInitializeInjectionFunc_t)NVTX_DLLFUNC(injectionLibraryHandle, initFuncName); + if (!init_fnptr) + { + NVTX_DLLCLOSE(injectionLibraryHandle); + NVTX_ERR("Failed to get address of function %s from injection library\n", initFuncName); + return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT; + } + } + } + } +#endif + +#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY + if (!init_fnptr) + { + /* Check weakly-defined function pointer. A statically-linked injection can define + this as a normal symbol and it will take precedence over a dynamic injection. */ + if (InitializeInjectionNvtxExtension_fnptr) + { + init_fnptr = InitializeInjectionNvtxExtension_fnptr; + } + } +#endif + + if (out_init_fnptr) + { + *out_init_fnptr = init_fnptr; + } + + /* At this point, if `init_fnptr` is not set, no tool has specified an NVTX injection library. + Non-success result is returned, so that all NVTX API functions will be set to no-ops. */ + if (!init_fnptr) + { + return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE; + } + + return NVTX_SUCCESS; +} + +/* Avoid warnings about missing prototypes. */ +NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) ( + nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState); +NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxExtInitOnce) ( + nvtxExtModuleInfo_t* moduleInfo, intptr_t* moduleState) +{ + intptr_t old; + + NVTX_INFO( "%s\n", __FUNCTION__ ); + + if (*moduleState == NVTX_EXTENSION_LOADED) + { + NVTX_INFO("Module loaded\n"); + return; + } + + NVTX_ATOMIC_CAS_PTR( + old, + moduleState, + NVTX_EXTENSION_STARTING, + NVTX_EXTENSION_FRESH); + if (old == NVTX_EXTENSION_FRESH) + { + NvtxExtInitializeInjectionFunc_t init_fnptr = + NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr; + int entryPointStatus = 0; + int forceAllToNoops = 0; + size_t s; + + /* Load and initialize injection library, which will assign the function pointers. */ + if (init_fnptr == 0) + { + int result = 0; + + /* Try to load vanilla NVTX first. */ + nvtxInitialize(0); + + result = NVTX_VERSIONED_IDENTIFIER(nvtxExtLoadInjectionLibrary)(&init_fnptr); + /* At this point `init_fnptr` will be either 0 or a real function. */ + + if (result == NVTX_SUCCESS) + { + NVTX_VERSIONED_IDENTIFIER(nvtxExtGlobals1).injectionFnPtr = init_fnptr; + } + else + { + NVTX_ERR("Failed to load injection library\n"); + } + } + + if (init_fnptr != 0) + { + /* Invoke injection library's initialization function. If it returns + 0 (failure) and a dynamic injection was loaded, unload it. */ + entryPointStatus = init_fnptr(moduleInfo); + if (entryPointStatus == 0) + { + NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n"); + } + } + + /* Clean up any functions that are still uninitialized so that they are + skipped. Set all to null if injection init function failed as well. */ + forceAllToNoops = (init_fnptr == 0) || (entryPointStatus == 0); + for (s = 0; s < moduleInfo->segmentsCount; ++s) + { + nvtxExtModuleSegment_t* segment = moduleInfo->segments + s; + size_t i; + for (i = 0; i < segment->slotCount; ++i) + { + if (forceAllToNoops || (segment->functionSlots[i] == NVTX_EXTENSION_FRESH)) + { + segment->functionSlots[i] = NVTX_EXTENSION_DISABLED; + } + } + } + + NVTX_MEMBAR(); + + /* Signal that initialization has finished and the assigned function + pointers will be used. */ + NVTX_ATOMIC_WRITE_PTR(moduleState, NVTX_EXTENSION_LOADED); + } + else /* Spin-wait until initialization has finished. */ + { + NVTX_MEMBAR(); + while (*moduleState != NVTX_EXTENSION_LOADED) + { + NVTX_YIELD(); + NVTX_MEMBAR(); + } + } +} + +#ifdef __cplusplus +} +#endif /* __cplusplus */ diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h new file mode 100644 index 0000000..4005da3 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h @@ -0,0 +1,151 @@ +/* +* Copyright 2021-2023 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef NVTX_EXT_IMPL_PAYLOAD_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExtPayload.h (except when NVTX_NO_IMPL is defined). +#endif + +typedef void* nvtx_payload_pointer_type; + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +#include +#include +#endif + +/* `alignof` is available as of C11 or C++11. */ +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || (defined(__cplusplus) && __cplusplus >= 201103L) + +#define nvtx_alignof(type) alignof(type) +#define nvtx_alignof2(type,tname) alignof(type) + +#else /* (__STDC_VERSION__ >= 201112L) || (__cplusplus >= 201103L) */ + +/* Create helper structs to determine type alignment. */ +#define MKTYPEDEF(type) typedef struct {char c; type d;} _nvtx_##type +#define MKTYPEDEF2(type,tname) typedef struct {char c; type d;} _nvtx_##tname + +MKTYPEDEF(char); +MKTYPEDEF2(unsigned char, uchar); +MKTYPEDEF(short); +MKTYPEDEF2(unsigned short, ushort); +MKTYPEDEF(int); +MKTYPEDEF2(unsigned int, uint); +MKTYPEDEF(long); +MKTYPEDEF2(unsigned long, ulong); +MKTYPEDEF2(long long, longlong); +MKTYPEDEF2(unsigned long long, ulonglong); + +MKTYPEDEF(int8_t); +MKTYPEDEF(uint8_t); +MKTYPEDEF(int16_t); +MKTYPEDEF(uint16_t); +MKTYPEDEF(int32_t); +MKTYPEDEF(uint32_t); +MKTYPEDEF(int64_t); +MKTYPEDEF(uint64_t); + +MKTYPEDEF(float); +MKTYPEDEF(double); +MKTYPEDEF2(long double, longdouble); + +MKTYPEDEF(size_t); +MKTYPEDEF(nvtx_payload_pointer_type); + +MKTYPEDEF(wchar_t); + +/* `char8_t` is available as of C++20 or C23 */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || (defined(__cplusplus) && __cplusplus >= 201811L) + MKTYPEDEF(char8_t); +#endif + +/* `char16_t` and `char32_t` are available as of C++11 or C11 */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 200704L) + MKTYPEDEF(char16_t); + MKTYPEDEF(char32_t); +#endif + +/* C requires to include stddef.h to use `offsetof` */ +#ifndef __cplusplus +#include +#endif + +#define nvtx_alignof(tname) offsetof(_nvtx_##tname, d) +#define nvtx_alignof2(type, tname) offsetof(_nvtx_##tname, d) + +#endif /* __STDC_VERSION__ >= 201112L */ + +#undef MKTYPEDEF +#undef MKTYPEDEF2 + +/* + * Helper array to get the alignment for each predefined C/C++ language type. + * The order of entries must match the values in`enum nvtxPayloadSchemaEntryType`. + * + * In C++, `const` variables use internal linkage by default, but we need it to + * be public (extern) since weak declarations must be public. + */ +NVTX_LINKONCE_DEFINE_GLOBAL +#ifdef __cplusplus +extern +#endif +const nvtxPayloadEntryTypeInfo_t +NVTX_EXT_PAYLOAD_VERSIONED_ID(nvtxExtPayloadTypeInfo)[NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE] = +{ + /* The first entry contains this array's length and the size of each entry in this array. */ + {NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE, sizeof(nvtxPayloadEntryTypeInfo_t)}, + + /*** C integer types ***/ + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR */ {sizeof(char), nvtx_alignof(char)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UCHAR */ {sizeof(unsigned char), nvtx_alignof2(unsigned char, uchar)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_SHORT */ {sizeof(short), nvtx_alignof(short)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_USHORT */ {sizeof(unsigned short), nvtx_alignof2(unsigned short, ushort)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_INT */ {sizeof(int), nvtx_alignof(int)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UINT */ {sizeof(unsigned int), nvtx_alignof2(unsigned int, uint)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_LONG */ {sizeof(long), nvtx_alignof(long)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_ULONG */ {sizeof(unsigned long), nvtx_alignof2(unsigned long, ulong)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG */ {sizeof(long long), nvtx_alignof2(long long, longlong)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG */ {sizeof(unsigned long long), nvtx_alignof2(unsigned long long,ulonglong)}, + + /*** Integer types with explicit size ***/ + /* NVTX_PAYLOAD_ENTRY_TYPE_INT8 */ {sizeof(int8_t), nvtx_alignof(int8_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UINT8 */ {sizeof(uint8_t), nvtx_alignof(uint8_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_INT16 */ {sizeof(int16_t), nvtx_alignof(int16_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UINT16 */ {sizeof(uint16_t), nvtx_alignof(uint16_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_INT32 */ {sizeof(int32_t), nvtx_alignof(int32_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UINT32 */ {sizeof(uint32_t), nvtx_alignof(uint32_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_INT64 */ {sizeof(int64_t), nvtx_alignof(int64_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_UINT64 */ {sizeof(uint64_t), nvtx_alignof(uint64_t)}, + + /*** C floating point types ***/ + /* NVTX_PAYLOAD_ENTRY_TYPE_FLOAT */ {sizeof(float), nvtx_alignof(float)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE */ {sizeof(double), nvtx_alignof(double)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE */ {sizeof(long double), nvtx_alignof2(long double, longdouble)}, + + /* NVTX_PAYLOAD_ENTRY_TYPE_SIZE */ {sizeof(size_t), nvtx_alignof(size_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS */ {sizeof(nvtx_payload_pointer_type), nvtx_alignof(nvtx_payload_pointer_type)}, + + /*** Special character types ***/ + /* NVTX_PAYLOAD_ENTRY_TYPE_WCHAR */ {sizeof(wchar_t), nvtx_alignof(wchar_t)}, + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || (defined(__cplusplus) && __cplusplus >= 201811L) + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {sizeof(char8_t), nvtx_alignof(char8_t)}, +#else + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 */ {0, 0}, +#endif + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 200704L) + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {sizeof(char16_t), nvtx_alignof(char16_t)}, + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {sizeof(char32_t), nvtx_alignof(char32_t)} +#else + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 */ {0, 0}, + /* NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 */ {0, 0} +#endif +}; + +#undef nvtx_alignof +#undef nvtx_alignof2 \ No newline at end of file diff --git a/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtTypes.h b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtTypes.h new file mode 100644 index 0000000..98eed08 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/nvtxext/nvtx3/nvtxExtDetail/nvtxExtTypes.h @@ -0,0 +1,44 @@ +/* +* Copyright 2021 NVIDIA Corporation. All rights reserved. +* +* Licensed under the Apache License v2.0 with LLVM Exceptions. +* See LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +/* This header defines types which are used by the internal implementation +* of NVTX and callback subscribers. API clients do not use these types, +* so they are defined here instead of in nvToolsExt.h to clarify they are +* not part of the NVTX client API. */ + +#ifndef NVTXEXTTYPES_H +#define NVTXEXTTYPES_H + +#ifndef NVTX_EXT_TYPES_GUARD +#error Never include this file directly -- it is automatically included by nvToolsExt[EXTENSION].h. +#endif + +typedef intptr_t (NVTX_API * NvtxExtGetExportFunction_t)(uint32_t exportFunctionId); + +typedef struct nvtxExtModuleSegment_t +{ + size_t segmentId; + size_t slotCount; + intptr_t* functionSlots; +} nvtxExtModuleSegment_t; + +typedef struct nvtxExtModuleInfo_t +{ + uint16_t nvtxVer; + uint16_t structSize; + uint16_t moduleId; + uint16_t compatId; + size_t segmentsCount; + nvtxExtModuleSegment_t* segments; + NvtxExtGetExportFunction_t getExportFunction; + const void* extInfo; +} nvtxExtModuleInfo_t; + +typedef int (NVTX_API * NvtxExtInitializeInjectionFunc_t)(nvtxExtModuleInfo_t* moduleInfo); + +#endif /* NVTXEXTTYPES_H */ \ No newline at end of file diff --git a/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.c b/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.c new file mode 100644 index 0000000..8831c05 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.c @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.txt for license information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include + +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#else +#include +#include +#if defined (_QNX_SOURCE) +#include +#include +#else +#include +#endif +#include +#include +#endif + +#if defined(__APPLE__) +#include +#endif + +/*-------------------------------------------------------------*/ +/* Path string helpers -- implement here to avoid dependencies */ + +#if defined(_WIN32) +static const char pathSep = '\\'; +#if defined(NVTXW3_TEST_PATH_UTILITIES) +static const char pathDelimiter = ';'; +#endif +static const size_t initialPathBufSize = MAX_PATH; /* Grows if not big enough */ +#define NVTXW3_DLLHANDLE HMODULE +#define NVTXW3_DLLOPEN(x) LoadLibraryA(x) +#define NVTXW3_DLLFUNC GetProcAddress +#define NVTXW3_DLLCLOSE FreeLibrary +#else +static const char pathSep = '/'; +#if defined(NVTXW3_TEST_PATH_UTILITIES) +static const char pathDelimiter = ':'; +#endif +static const size_t initialPathBufSize = 260; /* Grows if not big enough */ +#define NVTXW3_DLLHANDLE void* +#define NVTXW3_DLLOPEN(x) dlopen(x, RTLD_LAZY) +#define NVTXW3_DLLFUNC dlsym +#define NVTXW3_DLLCLOSE dlclose +#endif + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +/* If native path separator is not forward slash (e.g. backslash on Windows), +* do in-place conversion of forward slashes to native path separator. */ +static void ForwardSlashesToNative(char* path) +{ +#if _WIN32 + char* cur; + if (!path) return; + for (cur = path; *cur; ++cur) + { + if (*cur == '/') *cur = pathSep; + } +#else + (void)path; +#endif +} +#endif + +/* Take pointers to string buffer begin/end. End must equal begin + strlen(begin), +* or NULL, in which case it will be set to begin + strlen(begin). +* Remove trailing slashes in-place by overwriting first trailing slash with null. */ +static void StripTrailingSlashes(char* path) +{ + char* newPathEnd; + char* pathEnd = path + strlen(path); + + newPathEnd = pathEnd; + while (newPathEnd != path) + { + char* cur = newPathEnd - 1; + if (*cur != pathSep) break; + newPathEnd = cur; + } + if (newPathEnd != pathEnd) + { + *newPathEnd = '\0'; + } +} + +/* Take pointers to string buffer begin/end. End must equal begin + strlen(begin), +* or NULL, in which case it will be set to begin + strlen(begin). +* Remove leading slashes in-place by memmove-ing from first character after leading +* slashes to beginning of buffer, including null terminator. */ +#if defined(NVTXW3_TEST_PATH_UTILITIES) +static char* AfterLeadingSlashes(char* cur) +{ + for (; *cur && *cur == pathSep; ++cur); + return cur; +} +#endif +static const char* AfterLeadingSlashesConst(const char* cur) +{ + for (; *cur && *cur == pathSep; ++cur); + return cur; +} + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +/* Take pointers to string buffer begin/end. End must equal begin + strlen(begin), +* or NULL, in which case it will be set to begin + strlen(begin). +* Remove leading slashes in-place by memmove-ing from first character after leading +* slashes to beginning of buffer, including null terminator. */ +static void StripLeadingSlashes(char* path) +{ + char* afterSlashes = AfterLeadingSlashes(path); + if (afterSlashes != path) + { + size_t sizeAfterSlashesWithNull = strlen(afterSlashes) + 1; + memmove(path, afterSlashes, sizeAfterSlashesWithNull); + } +} +#endif + +/* Take pointers to string buffer begin/end. End must equal begin + strlen(begin), +* or NULL, in which case it will be set to begin + strlen(begin). +* Returns pointer to heap-allocated copy of input, must be freed with free(). */ +static char* AssignHeapString(char* lhs, const char* rhs) +{ + size_t lenWithNull; + + if (!rhs) return NULL; + + lenWithNull = strlen(rhs) + 1; + lhs = (char*)realloc(lhs, lenWithNull); + memcpy(lhs, rhs, lenWithNull); + return lhs; +} + +static char* AssignHeapStringFromRange(char* lhs, const char* rhsBegin, const char* rhsEnd) +{ + size_t lenWithoutNull; + + if (!rhsBegin || !rhsEnd) return NULL; + + lenWithoutNull = rhsEnd - rhsBegin; + lhs = (char*)realloc(lhs, lenWithoutNull + 1); + memcpy(lhs, rhsBegin, lenWithoutNull); + lhs[lenWithoutNull] = '\0'; + return lhs; +} + +/* Take pointers to string buffer begin/end. End must equal begin + strlen(begin), +* or NULL, in which case it will be set to begin + strlen(begin). +* Returns pointer to heap-allocated copy of input, must be freed with free(). */ +static char* MakeHeapString(const char* str) +{ + return AssignHeapString(NULL, str); +} + +static char* MakeHeapStringFromRange(const char* strBegin, const char* strEnd) +{ + return AssignHeapStringFromRange(NULL, strBegin, strEnd); +} + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +static char* MakeHeapStringWithNativeSlashes(const char* str) +{ + char* buf = AssignHeapString(NULL, str); + ForwardSlashesToNative(buf); + return buf; +} + +/* Take pointer to a HeapString (lhs) and any C string (rhs), append rhs to lhs, +* reallocating the heap memory for lhs if necessary. Returns pointer to result +* HeapString, which may or may not be the same pointer passed in as lhs. +* HeapString must be freed with free(). */ +static char* AppendToHeapString(char* lhs, const char* rhs) +{ + size_t lenLhs, lenRhs; + lenLhs = strlen(lhs); + lenRhs = strlen(rhs); + if (lenRhs == 0) return lhs; + lhs = (char*)realloc(lhs, lenLhs + lenRhs + 1); + memcpy(lhs + lenLhs, rhs, lenRhs + 1); + return lhs; +} +#endif + +/* Take pointer to a HeapString (lhs) and any C string (rhs), append rhs to lhs, +* with a path separator between them, reallocating the heap memory for lhs if +* necessary. If rhs is null or empty, then the result is lhs unmodified. If +* lhs is null or empty and rhs is not, then the result is a path separator +* followed by rhs. Returns pointer to result HeapString, which may or may not +* be the same pointer passed in as lhs. HeapString must be freed with free(). */ +static char* AppendToHeapStringWithSep(char* lhs, const char* rhs) +{ + size_t lenLhs, lenRhs; + lenLhs = strlen(lhs); + lenRhs = strlen(rhs); + if (lenRhs == 0) return lhs; + lhs = (char*)realloc(lhs, lenLhs + lenRhs + 2); + lhs[lenLhs] = pathSep; + memcpy(lhs + lenLhs + 1, rhs, lenRhs + 1); + return lhs; +} + +/* dir is a HeapString. If dir is empty or just slashes, result will be a +* path relative to the root, i.e. beginning with a path separator. +* relativePath must be a valid relative path (not empty, not just slashes). +* Returns pointer to result HeapString, which may or may not be the same +* pointer passed in as lhs. HeapString must be freed with free(). */ +static char* AppendToPathHeapString(char* dir, const char* relativePath) +{ + const char* relPathAfterLeadingSlashes; + relPathAfterLeadingSlashes = AfterLeadingSlashesConst(relativePath); + StripTrailingSlashes(dir); + return AppendToHeapStringWithSep(dir, relPathAfterLeadingSlashes); +} + +static char* LoadFileIntoHeapString(const char* filename) +{ + FILE* f; + char* buf; + int err; + long pos; + size_t size; + size_t bytesRead; + + f = fopen(filename, "rb"); + if (!f) return NULL; + err = fseek(f, 0, SEEK_END); + if (err) { fclose(f); return NULL; } + pos = ftell(f); + if (pos < 0) { fclose(f); return NULL; } + rewind(f); + size = (size_t)pos; + + buf = (char*)malloc(size + 1); + if (!buf) { fclose(f); return NULL; } + bytesRead = fread(buf, 1, size, f); + if (bytesRead < size) { fclose(f); free(buf); return NULL; } + + buf[size] = '\0'; + fclose(f); + return buf; +} + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +static int HasSlashes(const char* cur) +{ + for (; *cur; ++cur) + { + if (*cur == pathSep) return 1; + } + return 0; +} + +static int HasTrailingSlash(const char* str) +{ + size_t len = strlen(str); + if (len == 0) return 0; + return str[len-1] == pathSep; +} +#endif + +static char* GetCurrentWorkingDir() +{ +#if defined(_WIN32) + DWORD size; + char* buf; + + // Returns size including space for null terminator + size = GetCurrentDirectoryA(0, NULL); + buf = (char*)malloc(size); + GetCurrentDirectoryA(size, buf); + return buf; +#else + size_t size = initialPathBufSize; + char* buf; + + buf = (char*)malloc(size); + while (!getcwd(buf, size)) + { + size *= 2; + buf = (char*)realloc(buf, size); + } + buf = (char*)realloc(buf, strlen(buf) + 1); + return buf; +#endif +} + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +/* Take pointer to string buffer of possibly-relative path, and returns +* equivalent absolute path. Input path must not be empty. +* Returns pointer to heap-allocated string, must be freed with free(). */ +static char* AbsolutePath(const char* path) +{ +#if defined(_WIN32) + size_t size; + char* buf; + + if (!path) return NULL; + + // Returns size including space for null terminator + size = (size_t)GetFullPathNameA(path, 0, NULL, NULL); + buf = (char*)malloc(size); + GetFullPathNameA(path, size, buf, NULL); + return buf; +#else + if (!path) return NULL; + + return path[0] == pathSep + ? MakeHeapString(path) // Absolute already + : AppendToPathHeapString(GetCurrentWorkingDir(), path); +#endif +} +#endif + +/* Take pointer to heap string of path, and modifies it in-place to be its +* parent directory, i.e. the directory containing the input file/directory. +* String is shortened, but not reallocated, permitting possibly faster +* appending of different path later. Returns the pointer passed in without +* modifying it for convenient chaining of path functions. If input path is +* NULL, NULL is returned. If input is an empty string, or root directory, +* the heap string will be set to an empty string to indicate there is no +* parent directory. Returned pointer to heap-allocated string must be +* freed with free(). */ +static char* ToParentDir(char* path) +{ + char* cur; + + if (!path) return NULL; + + StripTrailingSlashes(path); + + for (cur = path + strlen(path); cur >= path; --cur) + { + if (*cur == pathSep) + { + /* Found the last slash */ + if (cur == path) + { + /* Special case -- last slash is first character + * in buffer. Trailing slashes were trimmed first, + * so this can only occur when ParentDir should + * return the root directory. This is the only + * case where we want to keep the slash we found, + * so write the null terminator after the slash. */ + *(cur + 1) = '\0'; + } + else + { + /* Change slash to null, terminating the string + * before the last slash */ + *cur = '\0'; + } + return path; + }; + } + + /* No slashes found, so there's no parent directory. Assign empty + * string by nulling first character, which is safe because all heap + * strings must be at least one byte long. */ + path[0] = '\0'; + return path; +} + +#if defined(NVTXW3_TEST_PATH_UTILITIES) +/* Take pointer to string buffer of path, and returns the parent directory, +* i.e. the directory containing the input file/directory. If input path is +* NULL, empty string, or root directory, NULL is returned to indicate there +* is no parent directory, so return value must be NULL-checked. +* Returns pointer to heap-allocated string, must be freed with free(). */ +static char* ParentDir(const char* path) +{ + char* buf; + + if (!path) return NULL; + + buf = ToParentDir(MakeHeapString(path)); + + if (strlen(buf) == 0) + { + /* No slashes found, so there's no parent directory */ + free(buf); + return NULL; + } + else + { + return buf; + } +} + +static int PathExists(const char* path) +{ +#if defined(_WIN32) + DWORD result = GetFileAttributesA(path); + return result != INVALID_FILE_ATTRIBUTES; +#else + int result = access(path, F_OK); + return result != -1; +#endif +} +#endif + +/* Return a heap string containing the full path of the current process's +* executable file. Buffer allocated may be a little larger than the path +* string it contains, and is not realloc'ed to fit since typical usage of +* this function involves getting the parent directory and appending to it. +* Returned pointer to heap-allocated string must be freed with free(). */ +static char* GetCurrentProcessPath() +{ + char* buf; +#if defined(_WIN32) + { + DWORD size = initialPathBufSize; + DWORD newSize; + buf = NULL; + while (1) + { + buf = (char*)realloc(buf, size); + newSize = GetModuleFileNameA(NULL, buf, size); + if (newSize < size) break; + size *= 2; + } + } +#elif defined(__APPLE__) + { + size_t size = PROC_PIDPATHINFO_MAXSIZE; + pid_t pid; + buf = (char*)malloc(size); + pid = getpid(); + size = proc_pidpath(pid, buf, size); + if (size == 0) + { + buf[0] = '\0'; + } + } +#elif defined(__QNX__) + { + size_t size = fpathconf(0, _PC_MAX_INPUT); + if (size <= 0) + { + size = 4096; + } + ++size; + buf = (char*)malloc(size); + _cmdname(buf); + } +#else + { + size_t size = initialPathBufSize; + ssize_t bytesReadSigned; + size_t bytesRead; + const char* linkName = "/proc/self/exe"; + buf = NULL; + while (1) + { + buf = (char*)realloc(buf, size); + bytesReadSigned = readlink(linkName, buf, size); + if (bytesReadSigned < 0) { free(buf); return NULL; } + bytesRead = (size_t)bytesReadSigned; + if (bytesRead < size) break; + size *= 2; + } + buf[bytesRead] = '\0'; + } +#endif + return buf; +} + +static char* GetCurrentProcessDir() +{ + return ToParentDir(GetCurrentProcessPath()); +} + +static int KVPConsumerForSimplify( + void* state, + const char* readKeyBegin, + const char* readKeyEnd, + const char* readValBegin, + const char* readValEnd) +{ + char* curWrite = *(char**)state; + size_t size; + /* Safe to cast away const here, since we are pointing at a non-const heap string */ + char* keyBegin = (char*)readKeyBegin; + char* keyEnd = (char*)readKeyEnd; + char* valBegin = (char*)readValBegin; + char* valEnd = (char*)readValEnd; + + /* Rebuild the simplified config line at the write pointer, using memmove since the + * ranges may overlap or even be the exact same range. */ + size = keyEnd - keyBegin; + memmove(curWrite, keyBegin, size); + curWrite += size; + + *curWrite = '='; + ++curWrite; + + size = valEnd - valBegin; + memmove(curWrite, valBegin, size); + curWrite += size; + + *curWrite = '\n'; + ++curWrite; + + *(char**)state = curWrite; + + return 0; +} + +static char* SimplifyConfigHeapString(char* config) +{ + char* curWrite = config; + + nvtxwConsumeConfigString(config, KVPConsumerForSimplify, &curWrite); + + *curWrite = '\0'; + return (char*)realloc(config, strlen(config) + 1); +} + +typedef struct GetInitModeState_t +{ + int modeFound; + int modeStringFound; + int mode; + char* modeString; +} GetInitModeState_t; + +static int KVPConsumerForGetInitMode( + void* statePtr, + const char* keyBegin, + const char* keyEnd, + const char* valBegin, + const char* valEnd) +{ + GetInitModeState_t* state = (GetInitModeState_t*)statePtr; + const char* const keyMode = "InitMode"; + const char* const keyModeString = "InitModeString"; + const size_t keyModeLen = strlen(keyMode); + const size_t keyModeStringLen = strlen(keyModeString); + size_t keyLen; + + keyLen = keyEnd - keyBegin; + + if (!state->modeFound + && keyLen == keyModeLen + && strncmp(keyBegin, keyMode, keyLen) == 0) + { + int mode; + char* val; + val = MakeHeapStringFromRange(valBegin, valEnd); + mode = atoi(val); + free(val); + state->mode = mode; + state->modeFound = 1; + } + + if (!state->modeStringFound + && keyLen == keyModeStringLen + && strncmp(keyBegin, keyModeString, keyLen) == 0) + { + char* val; + val = MakeHeapStringFromRange(valBegin, valEnd); + state->modeString = val; + state->modeStringFound = 1; + } + + return state->modeFound && + (state->mode == NVTXW3_INIT_MODE_SEARCH_DEFAULT || state->modeStringFound); +} + +/* Returns zero for success, and writes out params mode and modeString (the latter +* is a HeapString). If mode is not detected, or if the mode requires a modeString +* and modeString is not detected, return non-zero error code. */ +static int GetInitModeFromConfig(const char* config, int* mode, char** modeString) +{ + GetInitModeState_t state = {0}; + + if (!mode || !modeString) return 1; + *mode = 0; + *modeString = NULL; + + nvtxwConsumeConfigString(config, KVPConsumerForGetInitMode, &state); + + /* Always an error if mode not found */ + if (!state.modeFound) + { + free(state.modeString); + return 1; + } + + /* Except in default mode, it's an error if modeString not found */ + if (state.mode != NVTXW3_INIT_MODE_SEARCH_DEFAULT && !state.modeStringFound) + { + return 2; + } + + *mode = state.mode; + *modeString = state.modeString; + return 0; +} + +/*-------------------------------------------------------------*/ +/* Backend loader helpers */ + +static nvtxwResultCode_t InitLibraryFilename( + const char* filename, /* required */ + const char* configString, /* optional */ + nvtxwGetInterface_t* getInterfaceFunc, /* already null-checked */ + void** moduleHandle) /* optional */ +{ + /* modeString is the filename of the library to load */ + NVTXW3_DLLHANDLE hModule; + nvtxwLoadImplementation_t pfnLoadImplementation; + nvtxwGetInterface_t tempGetInterfaceFunc = NULL; + nvtxwResultCode_t result; + char* configSimple = NULL; + + *getInterfaceFunc = NULL; + if (moduleHandle) *moduleHandle = NULL; + + if (!filename) + { + return NVTXW3_RESULT_INVALID_ARGUMENT; + } + + hModule = NVTXW3_DLLOPEN(filename); + if (!hModule) + { + return NVTXW3_RESULT_LIBRARY_NOT_FOUND; + } + + pfnLoadImplementation = (nvtxwLoadImplementation_t)NVTXW3_DLLFUNC(hModule, "nvtxwLoadImplementation"); + if (!pfnLoadImplementation) + { + NVTXW3_DLLCLOSE(hModule); + return NVTXW3_RESULT_LOADER_SYMBOL_MISSING; + } + + if (configString) + { + configSimple = SimplifyConfigHeapString(MakeHeapString(configString)); + } + + result = pfnLoadImplementation(configSimple, &tempGetInterfaceFunc); + free(configSimple); + if (result != NVTXW3_RESULT_SUCCESS || !tempGetInterfaceFunc) + { + NVTXW3_DLLCLOSE(hModule); + return result; + } + + /* Success - now write to output params */ + *getInterfaceFunc = tempGetInterfaceFunc; + if (moduleHandle) + { + void* mod = (void*)hModule; + *moduleHandle = mod; + } + + return NVTXW3_RESULT_SUCCESS; +} + +static nvtxwResultCode_t InitSearchDefault( + const char* configString, /* optional */ + nvtxwGetInterface_t* getInterfaceFunc, /* already null-checked */ + void** moduleHandle) /* optional */ +{ + nvtxwResultCode_t result; + char* filename; + + /* 1. Directory of current process's executable */ + filename = AppendToPathHeapString(GetCurrentProcessDir(), NVTXW3_LIB_FILENAME_DEFAULT); + result = InitLibraryFilename( + filename, configString, getInterfaceFunc, moduleHandle); + free(filename); + if (result == NVTXW3_RESULT_SUCCESS) + { + return NVTXW3_RESULT_SUCCESS; + } + + /* 2. Standard search paths for dynamic libraries */ + result = InitLibraryFilename( + NVTXW3_LIB_FILENAME_DEFAULT, configString, getInterfaceFunc, moduleHandle); + if (result == NVTXW3_RESULT_SUCCESS) + { + return NVTXW3_RESULT_SUCCESS; + } + + /* 3. Current working directory (may not be included in standard search paths) */ + filename = AppendToPathHeapString(GetCurrentWorkingDir(), NVTXW3_LIB_FILENAME_DEFAULT); + result = InitLibraryFilename( + filename, configString, getInterfaceFunc, moduleHandle); + free(filename); + + /* No usable backend found */ + return NVTXW3_RESULT_LIBRARY_NOT_FOUND; +} + +static nvtxwResultCode_t InitLibraryDirectory( + const char* directory, /* required */ + const char* configString, /* optional */ + nvtxwGetInterface_t* getInterfaceFunc, /* already null-checked */ + void** moduleHandle) /* optional */ +{ + nvtxwResultCode_t result; + char* filename; + + if (!directory) return NVTXW3_RESULT_INVALID_ARGUMENT; + + filename = AppendToPathHeapString( + MakeHeapString(directory), NVTXW3_LIB_FILENAME_DEFAULT); + + result = InitLibraryFilename(filename, configString, getInterfaceFunc, moduleHandle); + free(filename); + + return result; +} + +static nvtxwResultCode_t InitConfigString( + const char* config, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle) +{ + nvtxwResultCode_t result; + int err; + int mode = 0; + char* modeString = NULL; + + if (!config) return NVTXW3_RESULT_INVALID_ARGUMENT; + + err = GetInitModeFromConfig(config, &mode, &modeString); + if (err) + { + free(modeString); + return NVTXW3_RESULT_CONFIG_MISSING_LOADER_INFO; + } + + switch (mode) + { + case NVTXW3_INIT_MODE_SEARCH_DEFAULT : result = InitSearchDefault ( config, getInterfaceFunc, moduleHandle); break; + case NVTXW3_INIT_MODE_LIBRARY_FILENAME : result = InitLibraryFilename (modeString, config, getInterfaceFunc, moduleHandle); break; + case NVTXW3_INIT_MODE_LIBRARY_DIRECTORY: result = InitLibraryDirectory(modeString, config, getInterfaceFunc, moduleHandle); break; + default: result = NVTXW3_RESULT_UNSUPPORTED_LOADER_MODE; + } + + free(modeString); + return result; +} + +static nvtxwResultCode_t InitConfigEnvVar( + const char* configEnvVarName, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle) +{ + const char* config; + + if (!configEnvVarName) return NVTXW3_RESULT_INVALID_ARGUMENT; + + config = getenv(configEnvVarName); + if (!config) return NVTXW3_RESULT_ENV_VAR_NOT_FOUND; + + return InitConfigString(config, getInterfaceFunc, moduleHandle); +} + +static nvtxwResultCode_t InitConfigFilename( + const char* configFilename, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle) +{ + nvtxwResultCode_t result; + char* config; + + if (!configFilename) return NVTXW3_RESULT_INVALID_ARGUMENT; + + config = LoadFileIntoHeapString(configFilename); + if (!config) return NVTXW3_RESULT_CONFIG_NOT_FOUND; + + result = InitConfigString(config, getInterfaceFunc, moduleHandle); + free(config); + return result; +} + +static nvtxwResultCode_t InitConfigDirectory( + const char* configDirectory, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle) +{ + nvtxwResultCode_t result; + char* configFilename; + + if (!configDirectory) return NVTXW3_RESULT_INVALID_ARGUMENT; + + configFilename = AppendToPathHeapString( + MakeHeapString(configDirectory), NVTXW3_CONFIG_FILENAME_DEFAULT); + + result = InitConfigFilename(configFilename, getInterfaceFunc, moduleHandle); + free(configFilename); + return result; +} + +/* #define NVTXW3_TEST_PATH_UTILITIES */ +#if defined(NVTXW3_TEST_PATH_UTILITIES) +#include +#endif + +NVTXW3_DECLSPEC nvtxwResultCode_t nvtxwInitialize( + nvtxwInitMode_t mode, + const char* modeString, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle) +{ +#if defined(NVTXW3_TEST_PATH_UTILITIES) + TestPathUtilities(); +#endif + + if (!getInterfaceFunc) + { + return NVTXW3_RESULT_INVALID_ARGUMENT; + } + + switch (mode) + { + case NVTXW3_INIT_MODE_SEARCH_DEFAULT : return InitSearchDefault ( NULL, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_LIBRARY_FILENAME : return InitLibraryFilename (modeString, NULL, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_LIBRARY_DIRECTORY: return InitLibraryDirectory(modeString, NULL, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_CONFIG_FILENAME : return InitConfigFilename (modeString, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_CONFIG_DIRECTORY : return InitConfigDirectory (modeString, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_CONFIG_STRING : return InitConfigString (modeString, getInterfaceFunc, moduleHandle); + case NVTXW3_INIT_MODE_CONFIG_ENV_VAR : return InitConfigEnvVar (modeString, getInterfaceFunc, moduleHandle); + } + + return NVTXW3_RESULT_INVALID_INIT_MODE; +} + +NVTXW3_DECLSPEC void nvtxwUnload( + void* moduleHandle) +{ + nvtxwUnloadImplementation_t pfnUnload; + NVTXW3_DLLHANDLE hModule = (NVTXW3_DLLHANDLE)moduleHandle; + + if (!hModule) return; + + pfnUnload = (nvtxwUnloadImplementation_t)NVTXW3_DLLFUNC(hModule, "nvtxwUnloadImplementation"); + if (pfnUnload) + { + pfnUnload(); + } + + NVTXW3_DLLCLOSE(hModule); +} diff --git a/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.h b/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.h new file mode 100644 index 0000000..4b0e011 --- /dev/null +++ b/crate-tmp/src/nvtxw-sys/tools/nvtxw/c/nvtxw3.h @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Licensed under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.txt for license information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#if !defined(NVTXW3_API) +#define NVTXW3_API + +#include + +#include /* For nvtxwConsumeConfigString inline implementation */ + +#ifdef __cplusplus +#define NVTXW3_DECLSPEC extern "C" +#else +#define NVTXW3_DECLSPEC extern +#endif + +typedef int32_t nvtxwResultCode_t; + +#define NVTXW3_RESULT_SUCCESS 0 +#define NVTXW3_RESULT_FAILED 1 +#define NVTXW3_RESULT_INVALID_ARGUMENT 2 +#define NVTXW3_RESULT_INVALID_INIT_MODE 3 +#define NVTXW3_RESULT_LIBRARY_NOT_FOUND 4 +#define NVTXW3_RESULT_CONFIG_NOT_FOUND 5 +#define NVTXW3_RESULT_LOADER_SYMBOL_MISSING 6 +#define NVTXW3_RESULT_LOADER_FAILED 7 +#define NVTXW3_RESULT_INTERFACE_ID_NOT_SUPPORTED 8 +#define NVTXW3_RESULT_CONFIG_MISSING_LOADER_INFO 9 +#define NVTXW3_RESULT_UNSUPPORTED_LOADER_MODE 10 +#define NVTXW3_RESULT_ENV_VAR_NOT_FOUND 11 + + +#if defined(_WIN32) +#define NVTXW3_LIB_PREFIX "" +#define NVTXW3_LIB_SUFFIX ".dll" +#else +#define NVTXW3_LIB_PREFIX "lib" +#define NVTXW3_LIB_SUFFIX ".so" +#endif + +/* Name of backend library file to use with init mode LIBRARY_DIRECTORY. +* Note the platform-dependent prefix and suffix above are added here. */ +#define NVTXW3_LIB_FILENAME_DEFAULT NVTXW3_LIB_PREFIX "nvtxw3" NVTXW3_LIB_SUFFIX + +/* Name of config library file to use with init mode CONFIG_DIRECTORY. +* Note the platform-dependent prefix and suffix above are added here. */ +#define NVTXW3_CONFIG_FILENAME_DEFAULT "nvtxw3.ini" + +/* Init modes: nvtxwInitialize takes nvtxwInitMode_t mode, one of the #defines +* below, and a modeString, whose meaning is dependent on the mode. These modes +* provide a variety of ways to find the NVTXW backend implementation library. */ +typedef int32_t nvtxwInitMode_t; + +/* Default search mode is to look for library with default filename, as defined +* by NVTXW3_LIB_FILENAME_DEFAULT, in the following order: +* 1. Directory of current process's executable +* 2. Standard search paths for dynamic libraries +* 3. Current working directory (may not be included in standard search paths) +* The modeString argument is ignored. */ +#define NVTXW3_INIT_MODE_SEARCH_DEFAULT 0 + +/* The modeString argument is interpreted as a filename or pathname to the +* backend library. The string is passed directly to the platform function +* for loading dynamic libraries (dlopen/LoadLibrary), so that function's +* behavior will apply. In general, a filename with no path will try the +* standard search paths, and an absolute path will be used verbatim. */ +#define NVTXW3_INIT_MODE_LIBRARY_FILENAME 1 + +/* The modeString argument is interpreted as a directory in which to search +* for the backend library, whose filename is defined by the macro +* NVTXW3_LIB_FILENAME_DEFAULT. */ +#define NVTXW3_INIT_MODE_LIBRARY_DIRECTORY 2 + +/* The modeString argument is interpreted as a filename or pathname to a +* config file, which will be used to find the backend library. If the +* filename is not an absolute path, it will be interpreted as relative +* to the current working direcrtory. See below for config file format. */ +#define NVTXW3_INIT_MODE_CONFIG_FILENAME 3 + +/* The modeString argument is interpreted as a directory in which to search +* for a config file, which will be used to find the backend library. The +* name of the config file is defined by NVTXW3_CONFIG_FILENAME_DEFAULT. + See below for config file format. */ +#define NVTXW3_INIT_MODE_CONFIG_DIRECTORY 4 + +/* The modeString argument is interpreted as the config string itself. +* See below for config string format. */ +#define NVTXW3_INIT_MODE_CONFIG_STRING 5 + +/* The modeString argument is interpreted as the name of an environment +* variable that contains the config string. See below for config string +* format. */ +#define NVTXW3_INIT_MODE_CONFIG_ENV_VAR 6 + +/* Config format (for both files and flat config strings): +* +* The format is key=value pairs, delimited by new-line characters or +* | (pipe) characters. Values are prohibited from containing those +* characters. If an entry begins with #, the entry (up to the next +* new-line or pipe) is discarded as a comment. +* +* When the config string is provided to the SessionBegin function +* as an argument, it is preprocessed to remove comments, blank lines, +* and to convert all entry delimiters to a single \n (line feed). +* This allows the tool to have a simpler config parser, and to print +* the config in a readable format. +* +* If a config specifies the same key multiple times, only the first +* appearance should be honored, and the subsequent appearances should +* be ignored. This allows a simple scan for a particular key to loop +* from the beginning until the first occurrence is found, and not have +* to loop through the rest for repeats. Note that this means building +* a map from keys to values should not overwrite existing values if a +* found key already exists in the map. This guarantee allows adding +* extra key/value pairs to a config string by prepending (to override +* existing keys) or appending (to set values only if they weren't set +* already). +* +* Keys are tool-specific, but the loader supports two keys: +* +* - InitMode=n +* Just like the argument to nvtxwInitialize, this allows the user +* to specify how to find the backend library, using one of the +* numeric values of the NVTXW3_INIT_MODE_ constants. Currently, +* only values 0-2 are supported for init modes specified within +* a config file/string. +* +* - InitModeString=string +* Just like the argument to nvtxwInitialize, this allows the user +* to specify a mode-specific string for how to find the backend +* library. This key is ignored for mode 0 (SEARCH_DEFAULT), but +* required for other modes. Currently, only mode values 0-2 are +* supported for init modes specified within a config file/string. +*/ + +/*--------- Helpers for consuming config strings ----------------*/ + +/* Typedef of function pointer for callback to use with nvtxwConsumeConfigString. +* The state pointer can be used for anything -- nvtxwConsumeConfigString passes +* it directly to the callback. The begin/end pointers for the key and value are +* pointing to ranges within the input config string. If the input config string +* is known to be non-const, this callback can safely cast away const and write +* to these pointers, for example when simplifying an input config string. To +* check if a key name is a particular string, use: +* strncmp("ExampleKeyName", keyBegin, keyEnd - keyBegin) == 0 +* In C++, you can construct a string using std::string(keyBegin, keyEnd). +* Return zero to continue consuming key/value pairs, or non-zero to stop. */ +typedef int (*nvtxwKeyValuePairConsumer_t)( + void* state, + const char* keyBegin, + const char* keyEnd, + const char* valBegin, + const char* valEnd); + +/* Parse config and call the consumer callback (see typedef above) on each +* valid key/value pair found in the config. Inline implementation provided +* here so backend implementations of NVTXW can use this function without +* having to include nvtxw3.c in their build. Users of the NVTXW API may +* also find it useful to parse/modify a config before passing it to NVTXW. */ +NVTX_LINKONCE_DEFINE_FUNCTION +void nvtxwConsumeConfigString(const char* config, nvtxwKeyValuePairConsumer_t consumer, void* state) +{ + const char* curRead = config; + const char* const lineBreak = "|\n\r"; + const char* const whitespace = " \t\v"; /* Not including lineBreak characters */ + int consumerStopRequested = 0; + + if (!config || !consumer) return; + + while (*curRead && !consumerStopRequested) + { + const char* lineBegin; + const char* lineEnd; + const char* keyBegin; + const char* keyEnd; + const char* valBegin; + const char* valEnd; + + /* Read a line, trimming leading whitespace - get pointers to begin/end */ + lineBegin = curRead + strspn(curRead, whitespace); + lineEnd = lineBegin + strcspn(lineBegin, lineBreak); + + /* Set read pointer to beginning of next line, so we can continue any time */ + curRead = lineEnd + strspn(lineEnd, lineBreak); + + /* Ignore line if it's only whitespace */ + if (lineBegin == lineEnd) continue; + /* Ignore line if it's is a comment */ + if (*lineBegin == '#') continue; + + /* Determine if line has a key and value delimited by '=' */ + keyBegin = lineBegin; + keyEnd = keyBegin; + while (keyEnd < lineEnd && *keyEnd != '=') ++keyEnd; + + /* Ignore line if there's no '=' in the line */ + if (keyEnd == lineEnd) continue; + /* Ignore line if there's no key name before '=' */ + if (keyEnd == keyBegin) continue; + + /* keyEnd now points at '=' after the key */ + valBegin = keyEnd + 1; + valBegin += strspn(valBegin, whitespace); + + /* Ignore line if all characters after '=' are whitespace */ + if (valBegin == lineEnd) continue; + + valEnd = lineEnd; + + /* Got begin/end pointers for key and value. We know there are non-whitespace + * characters in both of them, and their leading whitespace was already trimmed. + * Now trim their trailing whitespace. */ + while (strchr(whitespace, *(keyEnd - 1))) --keyEnd; + while (strchr(whitespace, *(valEnd - 1))) --valEnd; + + /* Now key and value begin/end pointers can be passed to the consumer */ + consumerStopRequested = consumer(state, keyBegin, keyEnd, valBegin, valEnd); + } +} + +/*--------- Initialization interface ---------*/ + +typedef int32_t nvtxwInterfaceId_t; + +typedef nvtxwResultCode_t (*nvtxwGetInterface_t)( + nvtxwInterfaceId_t interfaceId, + const void** iface); + +/* Initialize the NVTXW library by providing information on how to +* load the backend library that implements the NVTXW API. `mode` must +* be one of the NVTXW3_INIT_MODE_ constants. `modeString` is required +* for all modes besides 0 (SEARCH_DEFAULT), and has mode-specific +* interpretation. See comments for the mode constants. Backend library +* must provide an exported function symbol "nvtxwLoadImplementation", +* which must return NVTXW3_RESULT_SUCCESS and provide a pointer to its +* GetInterface function for initialization to be considered successful. +* Modes that search multiple locations will continue searching after an +* unsuccessful attempt to initialize a library. +* `getInterfaceFunc` is an out-param that must be non-null to receive +* a pointer to the backend's GetInterface function, which is used to +* make version-safe calls into the backend library. +* `moduleHandle` is an out-param that can be null. If non-null, it +* receives the platform-specific module handle of the loaded backend +* library when NVTXW3_RESULT_SUCCESS is returned. This can be passed +* to nvtxwUnload to unload the backend library. */ +NVTXW3_DECLSPEC nvtxwResultCode_t nvtxwInitialize( + nvtxwInitMode_t mode, + const char* modeString, + nvtxwGetInterface_t* getInterfaceFunc, + void** moduleHandle); + +/* A backend library may optionally provide an exported function symbol +* "nvtxwUnloadImplementation". If it does, nvtxwUnload will call this +* function before closing the module handle. This gives the backend a +* chance to free any memory tracked in global variables before it gets +* unloaded. Attempting to unload the backend is not necessary and not +* even recommended in common cases -- it is included to ensure clients +* of the NVTXW API have a way to cleanly pass a memory checker. */ +NVTXW3_DECLSPEC void nvtxwUnload( + void* moduleHandle); + +/*----- Typedefs for function pointers backend implements -----*/ + +typedef nvtxwResultCode_t (*nvtxwLoadImplementation_t)( + const char* configString, + nvtxwGetInterface_t* getInterfaceFunc); + +typedef void (*nvtxwUnloadImplementation_t)(); + +/*--------- Interface IDs ----------------*/ + +#define NVTXW3_INTERFACE_ID_CORE_V1 2 + +/*--------- INTERFACE_ID_CORE_V1 ---------*/ + +typedef struct nvtxwSessionHandle_t +{ + void* opaque; +} nvtxwSessionHandle_t; + +typedef struct nvtxwStreamHandle_t +{ + void* opaque; +} nvtxwStreamHandle_t; + +/* Growable struct of arguments for SessionBegin */ +typedef struct nvtxwSessionAttributes_v1 +{ + /* Guaranteed to increase when new members are added at the end */ + size_t struct_size; + + /* Provide a name for the session. + * Tools may display this name, or use it to name a file or directory + * representing the session. */ + const char* name; + + /* String containing configuration options for the session. + * Format is key=value, one per line, delimited by \n (line feed). + * Key names must not contain an = (equals sign), and values may + * contain any character except \r (carriage return), \n (line feed), + * or | (pipe). Tools shall use reasonable defaults for any config + * options not provided, and ignore any keys they do not support. + * See above for explanation of how config strings are provided. + * See tool-specific documentation for lists of supported keys. */ + const char* configString; +} nvtxwSessionAttributes_t; + +/* Define whether event ordering in a stream is based on event scope */ + +/* Event ordering is defined at the stream level, independent of +* event scopes within the stream. */ +#define NVTXW3_STREAM_ORDER_INTERLEAVING_NONE (int16_t)0 + +/* Event ordering is defined at the event scope level. This means +* ordering guarantees described by the other fields only apply to +* events of the same scope within the stream. The order of events +* in different scopes is unspecified. */ +#define NVTXW3_STREAM_ORDER_INTERLEAVING_EVENT_SCOPE (int16_t)1 + + +/* Define how events are fully or partially sorted in a stream. */ + +/* No guarantees can be made about event ordering in the stream. +* Events may need to be sorted by the tool. */ +#define NVTXW3_STREAM_ORDERING_TYPE_UNKNOWN (int16_t)0 + +/* All events represent single points in time and are fully or +* partially sorted in the order in which they occurred. */ +#define NVTXW3_STREAM_ORDERING_TYPE_STRICT (int16_t)1 + +/* Events that represent single points in time are fully or +* partially sorted in the order in which they occurred, and +* events representing time ranges in order of begin time. */ +#define NVTXW3_STREAM_ORDERING_TYPE_PACKED_RANGE_START (int16_t)2 + +/* Events that represent single points in time are fully or +* partially sorted in the order in which they occurred, and +* events representing time ranges in order of end time. */ +#define NVTXW3_STREAM_ORDERING_TYPE_PACKED_RANGE_END (int16_t)3 + +/* Define how to quantify skid when events are partially sorted. Only considered +* when orderingType is not UNKNOWN. Which events in the stream this applies to +* depends on the value of orderInterleaving. Which timestamp is used for ordering +* in an event with multiple timestamps depends on the value of orderingType. */ + +/* Events are fully sorted. */ +#define NVTXW3_STREAM_ORDERING_SKID_NONE 0 + +/* Events are partially sorted. The orderingSkidAmount field defines "skid" as +* a number of nanoseconds. For any two events A and B in the stream or scope +* (depending on interleaving level), where A is written into the stream before +* B, the tool must handle the case where B has a lower timestamp than A, but +* can assume B's timestamp cannot be more than the "skid" number of nanoseconds +* earlier than A's timestamp. Note that timestamp values in events cannot be +* assumed to be in units of nanoseconds, so this value cannot be added directly +* timestamp values without conversion. */ +#define NVTXW3_STREAM_ORDERING_SKID_TIME_NS 1 + +/* Events are partially sorted. The orderingSkidAmount field defines "skid" as +* a number of events. Regarding only events in a stream or scope (depending on +* interleaving level), for any event A, the next "skid" number of events after +* A may have a lower timestamp than A (by any amount of time), but no events +* written after that can have a lower timestamp than A. */ + +/* Events are partially sorted. No event in the stream is written +* more than the given number of events before any event written +* previously in the stream. Note that +* timestamps in events may not be in units of nanoseconds. */ +#define NVTXW3_STREAM_ORDERING_SKID_EVENT_COUNT 2 + +/* Growable struct of arguments for StreamOpen */ +typedef struct nvtxwStreamAttributes_v1 +{ + /* Guaranteed to increase when new members are added at the end */ + size_t struct_size; + + /* Name of a stream, used for identification from other streams. + * Tools typically will not display stream names. No two streams + * in the same session may have the same name. */ + const char* name; + + /* Name of NVTX domain to use implicitly for all events written into + * this stream. Since registered IDs are required to be unique within + * a domain, all ID registration functions called on this stream must + * not register the same ID value to mean different things. Multiple + * streams may use the same domain by specifying the same value for + * this string, and the tool is expected to combine registrations from + * these streams into a single set of registrations for the domain. + * If two streams share a domain, and a registration is made in one + * stream, the registered ID may be used immediately afterwards in the + * other stream, provided the usage occurs on the same thread -- it is + * implementation-defined whether or not this is supported if the usage + * occurs on a different thread. Tools are expected to combine data + * from any domains registered with the same name, even between NVTXW + * and NVTX, when merging data acquired from both APIs. */ + const char* nvtxDomainName; + + /* The default scope for all events in the stream that don't specify + * any scope. See comments below for nvtxwEventScopeAttributes_t. + * Note that "nvtxwStream" without brackets may not be used as a node + * name here -- this field is defining what that node name will mean + * in scope registrations occurring later in this stream. However, + * "nvtxwStream[name]" referencing a different stream by its name + * (see above) to use its default scope is supported, as long as that + * stream was successfully opened (and may be already closed). */ + const char* eventScopePath; + + /* Information about event ordering inside the stream. See comments + * for #defines above. */ + int16_t orderInterleaving; /* NVTXW3_STREAM_ORDER_INTERLEAVING_* */ + int16_t orderingType; /* NVTXW3_STREAM_ORDERING_TYPE_* */ + int32_t orderingSkid; /* NVTXW3_STREAM_ORDERING_SKID_* */ + int64_t orderingSkidAmount; /* Numeric value, dependent on skid type */ +} nvtxwStreamAttributes_t; + +/* Growable struct of arguments for EventScopeRegister */ +typedef struct nvtxwEventScopeAttributes_v1 +{ + /* Guaranteed to increase when new members are added at the end */ + size_t struct_size; + + /* Path delimited by / characters, relative to hierarchy root. + * Nodes in the path may use name[key] syntax to indicate an + * array of sibling nodes, which may be combined with other + * non-array nodes or different arrays at the same scope. + * Leading slashes are ignored. Node names should be ASCII + * printable characters, excluding the /, [, and ] characters, + * which have special meaning here. A set of reserved node + * names with special properties is given in the documentation + * for NVTX Deferred Events. "nvtxwStream" is a reserved node + * name that can be used as a path's root node, indicating the + * path is relative to the eventScopePath set for the stream + * in which the event scope is registered. "nvtxwStream[name]" + * refers to the eventScopePath of a stream in the session with + * matching name. Note that the NVTX domain is implicitly a + * child node of the scope, since multiple domains can assign + * events to the same scope, and tools should isolate events + * from separate domains. */ + const char* path; + + /* Static event scope ID must be provided, unique within the domain, + >= NVTX_EVENT_SCOPE_ID_STATIC_START, and + < NVTX_EVENT_SCOPE_ID_DYNAMIC_START */ + uint64_t scopeId; +} nvtxwEventScopeAttributes_t; + +/* nvtxwInterfaceCore_t is a growable struct of function pointers to +* the NVTX Writer (NVTXW) API. Breaking changes will not be made to +* this interface without also changing the interface ID passed to +* nvtxwGetInterface_t, e.g. NVTXW3_INTERFACE_ID_CORE_V1. Non-breaking +* are made by adding fields to the end of the struct, ensuring the +* value of 'struct_size' increases, so the presence of a member can +* be checked by comparing struct_size with that member's offset. */ +typedef struct nvtxwInterfaceCore_v1 +{ + /* Guaranteed to increase when new members are added at the end */ + size_t struct_size; + + /* Create a session, which represents a collection of trace data + * from one or more streams. Takes a growable struct of session + * attributes (see nvtxwSessionAttributes_t). */ + nvtxwResultCode_t (*SessionBegin)( + nvtxwSessionHandle_t* session, + const nvtxwSessionAttributes_t* attr); + + /* Notify the implementation that all trace data for the session + * has been provided, and the session may be destroyed. Depending + * on configuration options, ending a session may trigger behavior + * like writing an output file or opening a data viewer. */ + nvtxwResultCode_t (*SessionEnd)( + nvtxwSessionHandle_t session); + + /* Create a stream within a session. A stream is the object events + * are written to. The NVTX domain and event scope are set when + * creating a stream, allowing individual events to avoid repeating + * these fields. Since ID values for schemas, registered strings, + * etc. are only unique within a domain, all registrations that + * assign an ID are done within a stream, since the domain is fixed + * inside a stream. Other stream properties set at creation time + * are a name string, and information about the way events in the + * stream are ordered. */ + nvtxwResultCode_t (*StreamOpen)( + nvtxwStreamHandle_t* stream, + nvtxwSessionHandle_t session, + const nvtxwStreamAttributes_t* attr); + + /* Destroy the stream object. This is not expected to trigger a + * reaction in the implementation that no more events are coming; + * only ending a session is intended to have that effect. */ + nvtxwResultCode_t (*StreamClose)( + nvtxwStreamHandle_t stream); + + /* Register a scope ID to represent a scope path, so the ID can be + * used in events or schemas to efficiently indicate a scope. + * Static event scope ID must be provided, unique within the domain, + * >= NVTX_EVENT_SCOPE_ID_STATIC_START, and + * < NVTX_EVENT_SCOPE_ID_DYNAMIC_START */ + nvtxwResultCode_t (*EventScopeRegister)( + nvtxwStreamHandle_t stream, + const nvtxwEventScopeAttributes_t* attr); + + /* Register a schema ID to represent a schema, which describes the + * binary layout of a payload. + * Static schema ID must be provided, unique within the domain, + * >= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START, and + * < NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START */ + nvtxwResultCode_t (*SchemaRegister)( + nvtxwStreamHandle_t stream, + const nvtxPayloadSchemaAttr_t* attr); + + /* Register a schema ID to represent an enum type, including the + * mapping between its values and their name strings. + * Static schema ID must be provided, unique within the domain, + >= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START, and + < NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START */ + nvtxwResultCode_t (*EnumRegister)( + nvtxwStreamHandle_t stream, + const nvtxPayloadEnumAttr_t* attr); + + /* Write a batch of payloads into the stream representing one or more + * events. A logical event with multiple payloads cannot be broken up + * across multiple calls to EventWrite. The schema definitions for + * the payloads dictate how they are interpreted as events. */ + nvtxwResultCode_t (*EventWrite)( + nvtxwStreamHandle_t stream, + const nvtxPayloadData_t* payloads, + size_t payloadCount); + +} nvtxwInterfaceCore_t; + +#endif