Commit 7dc4e964 authored by wanghan's avatar wanghan
Browse files

Initial commit: RCCL auto-tuning project

parents
/*************************************************************************
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_NVTX_H_
#define NCCL_NVTX_H_
#include "nvtx3/nvtx3.hpp"
#if __cpp_constexpr >= 201304L && !defined(NVTX3_CONSTEXPR_IF_CPP14)
#define NVTX3_CONSTEXPR_IF_CPP14 constexpr
#else
#define NVTX3_CONSTEXPR_IF_CPP14
#endif
// Define all NCCL-provided static schema IDs here (avoid duplicates).
#define NVTX_SID_CommInitRank 0
#define NVTX_SID_CommInitAll 1
#define NVTX_SID_CommDestroy 2 // same schema as NVTX_SID_CommInitRank
#define NVTX_SID_CommAbort 3 // same schema as NVTX_SID_CommInitRank
#define NVTX_SID_AllGather 4
#define NVTX_SID_AllReduce 5
#define NVTX_SID_Broadcast 6
#define NVTX_SID_ReduceScatter 7
#define NVTX_SID_Reduce 8
#define NVTX_SID_Send 9
#define NVTX_SID_Recv 10
// Define static schema ID for the reduction operation.
#define NVTX_PAYLOAD_ENTRY_NCCL_REDOP 11 + NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START
extern const nvtxDomainHandle_t ncclNvtxDomainHandle;
struct nccl_domain{static constexpr char const* name{"NCCL"};};
class payload_schema {
public:
explicit payload_schema(const nvtxPayloadSchemaEntry_t entries[], size_t numEntries, const uint64_t schemaId, const char* schemaName = nullptr) noexcept
{
schema_attr.name = schemaName;
schema_attr.entries = entries;
schema_attr.numEntries = numEntries;
schema_attr.schemaId = schemaId;
nvtxPayloadSchemaRegister(nvtx3::domain::get<nccl_domain>(), &schema_attr);
}
payload_schema() = delete;
~payload_schema() = default;
payload_schema(payload_schema const&) = default;
payload_schema& operator=(payload_schema const&) = default;
payload_schema(payload_schema&&) = default;
payload_schema& operator=(payload_schema&&) = default;
private:
nvtxPayloadSchemaAttr_t schema_attr{
NVTX_PAYLOAD_SCHEMA_ATTR_TYPE |
NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES |
NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES |
NVTX_PAYLOAD_SCHEMA_ATTR_STATIC_SIZE |
NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID,
nullptr,
NVTX_PAYLOAD_SCHEMA_TYPE_STATIC,
NVTX_PAYLOAD_SCHEMA_FLAG_NONE,
nullptr, 0, 0, 0};
};
// Create NVTX push/pop range with parameters
// @param name of the operation (see `NVTX_SID_*`)
// @param N schema name
// @param S schema (entries)
// @param P payload (struct)
#define NVTX3_FUNC_WITH_PARAMS(ID, S, P) \
static const payload_schema schema{S, std::extent<decltype(S)>::value, \
NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START + NVTX_SID_##ID, #ID}; \
static ::nvtx3::v1::registered_string_in<nccl_domain> const nvtx3_func_name__{__func__}; \
nvtxPayloadData_t nvtx3_bpl__[] = { \
{NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START + NVTX_SID_##ID, sizeof(P), &(P)}}; \
::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__, nvtx3_bpl__}; \
::nvtx3::v1::scoped_range_in<nccl_domain> const nvtx3_range__{nvtx3_func_attr__};
extern void initNvtxRegisteredEnums();
#endif
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
/** \file nvToolsExt.h
*/
/* ========================================================================= */
/** \mainpage
* \tableofcontents
* \section INTRODUCTION Introduction
*
* The NVIDIA Tools Extension library is a set of functions that a
* developer can use to provide additional information to tools.
* The additional information is used by the tool to improve
* analysis and visualization of data.
*
* The library introduces close to zero overhead if no tool is
* attached to the application. The overhead when a tool is
* attached is specific to the tool.
*
* \section INITIALIZATION_SECTION Initialization
*
* Typically the tool's library that plugs into NVTX is indirectly
* loaded via enviromental properties that are platform specific.
* For some platform or special cases, the user may be required
* to instead explicity initialize instead though. This can also
* be helpful to control when the API loads a tool's library instead
* of what would typically be the first function call to emit info.
* For these rare case, see \ref INITIALIZATION for additional information.
*
* \section MARKERS_AND_RANGES Markers and Ranges
*
* Markers and ranges are used to describe events at a specific time (markers)
* or over a time span (ranges) during the execution of the application
* respectively.
*
* \subsection MARKERS Markers
*
* Markers denote specific moments in time.
*
*
* See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
* how to specify the domain.
*
* \subsection THREAD_RANGES Thread Ranges
*
* Thread ranges denote nested time ranges. Nesting is maintained per thread
* per domain and does not require any additional correlation mechanism. The
* duration of a thread range is defined by the corresponding pair of
* nvtxRangePush* to nvtxRangePop API calls.
*
* See \ref DOMAINS and \ref EVENT_ATTRIBUTES for additional information on
* how to specify the domain.
*
* \subsection PROCESS_RANGES Process Ranges
*
* Process ranges denote a time span that can expose arbitrary concurrency, as
* opposed to thread ranges that only support nesting. In addition the range
* start event can happen on a different thread than the end marker. For the
* correlation of a start/end pair an unique correlation ID is used that is
* returned from the start API call and needs to be passed into the end API
* call.
*
* \subsection EVENT_ATTRIBUTES Event Attributes
*
* \ref MARKERS_AND_RANGES can be annotated with various attributes to provide
* additional information for an event or to guide the tool's visualization of
* the data. Each of the attributes is optional and if left unused the
* attributes fall back to a default value. The attributes include:
* - color
* - category
*
* To specify any attribute other than the text message, the \ref
* EVENT_ATTRIBUTE_STRUCTURE "Event Attribute Structure" must be used.
*
* \section DOMAINS Domains
*
* Domains enable developers to scope annotations. By default all events and
* annotations are in the default domain. Additional domains can be registered.
* This allows developers to scope markers, ranges, and resources names to
* avoid conflicts.
*
* The function ::nvtxDomainCreateA or ::nvtxDomainCreateW is used to create
* a named domain.
*
* Each domain maintains its own
* - categories
* - thread range stacks
* - registered strings
*
* The function ::nvtxDomainDestroy marks the end of the domain. Destroying
* a domain unregisters and destroys all objects associated with it such as
* registered strings, resource objects, named categories, and started ranges.
*
* \section RESOURCE_NAMING Resource Naming
*
* This section covers calls that allow to annotate objects with user-provided
* names in order to allow for a better analysis of complex trace data. All of
* the functions take the handle or the ID of the object to name and the name.
* The functions can be called multiple times during the execution of an
* application, however, in that case it is implementation dependent which
* name will be reported by the tool.
*
* \subsection CATEGORY_NAMING Category Naming
*
* Some function in this library support associating an integer category
* to enable filtering and sorting. The category naming functions allow
* the application to associate a user friendly name with the integer
* category. Support for domains have been added in NVTX_VERSION_2 to
* avoid collisions when domains are developed independantly.
*
* \subsection RESOURCE_OBJECTS Resource Objects
*
* Resource objects are a generic mechanism for attaching data to an application
* resource. The identifier field makes the association to a pointer or handle,
* while the type field helps provide deeper understanding of the identifier as
* well as enabling differentiation in cases where handles generated by different
* APIs may collide. The resource object may also have an associated message to
* associate with the application resource, enabling further annotation of this
* object and how it is used.
*
* The resource object was introduced in NVTX_VERSION_2 to supersede existing naming
* functions and allow the application resource identified by those functions to be
* associated to a domain. The other naming functions are still supported for backward
* compatibility but will be associated only to the default domain.
*
* \subsection RESOURCE_NAMING_OS Resource Naming
*
* Some operating system resources creation APIs do not support providing a user friendly
* name, such as some OS thread creation APIs. This API support resource naming though
* both through resource objects and functions following the pattern
* nvtxName[RESOURCE_TYPE][A|W](identifier, name). Resource objects introduced in NVTX_VERSION 2
* supersede the other functions with a a more general method of assigning names to OS resources,
* along with associating them to domains too. The older nvtxName* functions are only associated
* with the default domain.
* \section EXTENSIONS Optional Extensions
* Optional extensions will either appear within the existing sections the extend or appear
* in the "Related Pages" when they introduce new concepts.
*/
/**
* Tools Extension API version
*/
#if defined(NVTX_VERSION) && NVTX_VERSION < 3
#error "Trying to #include NVTX version 3 in a source file where an older NVTX version has already been included. If you are not directly using NVTX (the NVIDIA Tools Extension library), you are getting this error because libraries you are using have included different versions of NVTX. Suggested solutions are: (1) reorder #includes so the newest NVTX version is included first, (2) avoid using the conflicting libraries in the same .c/.cpp file, or (3) update the library using the older NVTX version to use the newer version instead."
#endif
/* Header guard */
#if !defined(NVTX_VERSION)
#define NVTX_VERSION 3
#if defined(_MSC_VER)
#define NVTX_API __stdcall
#define NVTX_INLINE_STATIC __inline static
#else /*defined(__GNUC__)*/
#define NVTX_API
#define NVTX_INLINE_STATIC inline static
#endif /* Platform */
#if defined(NVTX_NO_IMPL)
/* When omitting implementation, avoid declaring functions inline */
/* without definitions, since this causes compiler warnings. */
#define NVTX_DECLSPEC
#elif defined(NVTX_EXPORT_API)
/* Allow overriding definition of NVTX_DECLSPEC when exporting API. */
/* Default is empty, meaning non-inline with external linkage. */
#if !defined(NVTX_DECLSPEC)
#define NVTX_DECLSPEC
#endif
#else
/* Normal NVTX usage defines the NVTX API inline with static */
/* (internal) linkage. */
#define NVTX_DECLSPEC NVTX_INLINE_STATIC
#endif
#include "nvtxDetail/nvtxLinkOnce.h"
#define NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION) NAME##_v##VERSION
#define NVTX_VERSIONED_IDENTIFIER_L2(NAME, VERSION) NVTX_VERSIONED_IDENTIFIER_L3(NAME, VERSION)
#define NVTX_VERSIONED_IDENTIFIER(NAME) NVTX_VERSIONED_IDENTIFIER_L2(NAME, NVTX_VERSION)
/**
* The nvToolsExt library depends on stdint.h. If the build tool chain in use
* does not include stdint.h then define NVTX_STDINT_TYPES_ALREADY_DEFINED
* and define the following types:
* <ul>
* <li>uint8_t
* <li>int8_t
* <li>uint16_t
* <li>int16_t
* <li>uint32_t
* <li>int32_t
* <li>uint64_t
* <li>int64_t
* <li>uintptr_t
* <li>intptr_t
* </ul>
* #define NVTX_STDINT_TYPES_ALREADY_DEFINED if you are using your own header file.
*/
#ifndef NVTX_STDINT_TYPES_ALREADY_DEFINED
#include <stdint.h>
#endif
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/**
* Result Codes
*/
#define NVTX_SUCCESS 0
#define NVTX_FAIL 1
#define NVTX_ERR_INIT_LOAD_PROPERTY 2
#define NVTX_ERR_INIT_ACCESS_LIBRARY 3
#define NVTX_ERR_INIT_LOAD_LIBRARY 4
#define NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT 5
#define NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT 6
#define NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE 7
/**
* Size of the nvtxEventAttributes_t structure.
*/
#define NVTX_EVENT_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxEventAttributes_t) ) )
#define NVTX_NO_PUSH_POP_TRACKING ((int)-2)
typedef uint64_t nvtxRangeId_t;
/* Forward declaration of opaque domain registration structure */
struct nvtxDomainRegistration_st;
typedef struct nvtxDomainRegistration_st nvtxDomainRegistration;
/* \brief Domain Handle Structure.
* \anchor DOMAIN_HANDLE_STRUCTURE
*
* This structure is opaque to the user and is used as a handle to reference
* a domain. This type is returned from tools when using the NVTX API to
* create a domain.
*
*/
typedef nvtxDomainRegistration* nvtxDomainHandle_t;
/* Forward declaration of opaque string registration structure */
struct nvtxStringRegistration_st;
typedef struct nvtxStringRegistration_st nvtxStringRegistration;
/* \brief Registered String Handle Structure.
* \anchor REGISTERED_STRING_HANDLE_STRUCTURE
*
* This structure is opaque to the user and is used as a handle to reference
* a registered string. This type is returned from tools when using the NVTX
* API to create a registered string.
*
*/
typedef nvtxStringRegistration* nvtxStringHandle_t;
/* ========================================================================= */
/** \defgroup GENERAL General
* @{
*/
/** ---------------------------------------------------------------------------
* Color Types
* ------------------------------------------------------------------------- */
typedef enum nvtxColorType_t
{
NVTX_COLOR_UNKNOWN = 0, /**< Color attribute is unused. */
NVTX_COLOR_ARGB = 1 /**< An ARGB color is provided. */
} nvtxColorType_t;
/** ---------------------------------------------------------------------------
* Message Types
* ------------------------------------------------------------------------- */
typedef enum nvtxMessageType_t
{
NVTX_MESSAGE_UNKNOWN = 0, /**< Message payload is unused. */
NVTX_MESSAGE_TYPE_ASCII = 1, /**< A character sequence is used as payload. */
NVTX_MESSAGE_TYPE_UNICODE = 2, /**< A wide character sequence is used as payload. */
/* NVTX_VERSION_2 */
NVTX_MESSAGE_TYPE_REGISTERED = 3, /**< A unique string handle that was registered
with \ref nvtxDomainRegisterStringA() or
\ref nvtxDomainRegisterStringW(). */
} nvtxMessageType_t;
typedef union nvtxMessageValue_t
{
const char* ascii;
const wchar_t* unicode;
/* NVTX_VERSION_2 */
nvtxStringHandle_t registered;
} nvtxMessageValue_t;
/** @} */ /*END defgroup*/
/* ------------------------------------------------------------------------- */
/** \brief Force initialization (optional)
*
* Force NVTX library to initialize. The first call to any NVTX API function
* will automatically initialize the entire API. This can make the first call
* much slower than subsequent calls. In applications where the first call to
* NVTX may be in a performance-critical section, calling nvtxInitialize before
* any performance-critical sections will ensure NVTX initialization occurs at
* an acceptable time. Since nvtxInitialize takes no parameters and has no
* expected behavior besides initialization, it is convenient to add a call to
* nvtxInitialize in NVTX-instrumented applications that need to force earlier
* initialization without changing any other code. For example, if an app's
* first NVTX call is nvtxDomainCreate, and it is difficult to move that call
* earlier because the domain handle must be stored in an object only created
* at that point, adding a call to nvtxInitialize at the top of main() will
* ensure the later call to nvtxDomainCreate is as fast as possible.
*
* \version \NVTX_VERSION_3
*
* \param reserved - must be zero or NULL.
*
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved);
/** @} */
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \defgroup EVENT_ATTRIBUTES Event Attributes
* @{
*/
/** ---------------------------------------------------------------------------
* Payload Types
* ------------------------------------------------------------------------- */
typedef enum nvtxPayloadType_t
{
NVTX_PAYLOAD_UNKNOWN = 0, /**< Color payload is unused. */
NVTX_PAYLOAD_TYPE_UNSIGNED_INT64 = 1, /**< A 64 bit unsigned integer value is used as payload. */
NVTX_PAYLOAD_TYPE_INT64 = 2, /**< A 64 bit signed integer value is used as payload. */
NVTX_PAYLOAD_TYPE_DOUBLE = 3, /**< A 64 bit floating point value is used as payload. */
/* NVTX_VERSION_2 */
NVTX_PAYLOAD_TYPE_UNSIGNED_INT32 = 4, /**< A 32 bit floating point value is used as payload. */
NVTX_PAYLOAD_TYPE_INT32 = 5, /**< A 32 bit floating point value is used as payload. */
NVTX_PAYLOAD_TYPE_FLOAT = 6 /**< A 32 bit floating point value is used as payload. */
} nvtxPayloadType_t;
/** \brief Event Attribute Structure.
* \anchor EVENT_ATTRIBUTE_STRUCTURE
*
* This structure is used to describe the attributes of an event. The layout of
* the structure is defined by a specific version of the tools extension
* library and can change between different versions of the Tools Extension
* library.
*
* \par Initializing the Attributes
*
* The caller should always perform the following three tasks when using
* attributes:
* <ul>
* <li>Zero the structure
* <li>Set the version field
* <li>Set the size field
* </ul>
*
* Zeroing the structure sets all the event attributes types and values
* to the default value.
*
* The version and size field are used by the Tools Extension
* implementation to handle multiple versions of the attributes structure.
*
* It is recommended that the caller use one of the following to methods
* to initialize the event attributes structure:
*
* \par Method 1: Initializing nvtxEventAttributes for future compatibility
* \code
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* \endcode
*
* \par Method 2: Initializing nvtxEventAttributes for a specific version
* \code
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = 1;
* eventAttrib.size = (uint16_t)(sizeof(nvtxEventAttributes_v1));
* \endcode
*
* If the caller uses Method 1 it is critical that the entire binary
* layout of the structure be configured to 0 so that all fields
* are initialized to the default value.
*
* The caller should either use both NVTX_VERSION and
* NVTX_EVENT_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
* and a versioned type (Method 2). Using a mix of the two methods
* will likely cause either source level incompatibility or binary
* incompatibility in the future.
*
* \par Settings Attribute Types and Values
*
*
* \par Example:
* \code
* // Initialize
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
*
* // Configure the Attributes
* eventAttrib.colorType = NVTX_COLOR_ARGB;
* eventAttrib.color = 0xFF880000;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "Example";
* \endcode
*
* In the example the caller does not have to set the value of
* \ref ::nvtxEventAttributes_v2::category or
* \ref ::nvtxEventAttributes_v2::payload as these fields were set to
* the default value by {0}.
* \sa
* ::nvtxDomainMarkEx
* ::nvtxDomainRangeStartEx
* ::nvtxDomainRangePushEx
*/
typedef struct nvtxEventAttributes_v2
{
/**
* \brief Version flag of the structure.
*
* Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
* supported in this header file. This can optionally be overridden to
* another version of the tools extension library.
*/
uint16_t version;
/**
* \brief Size of the structure.
*
* Needs to be set to the size in bytes of the event attribute
* structure used to specify the event.
*/
uint16_t size;
/**
* \brief ID of the category the event is assigned to.
*
* A category is a user-controlled ID that can be used to group
* events. The tool may use category IDs to improve filtering or
* enable grouping of events in the same category. The functions
* \ref ::nvtxNameCategoryA or \ref ::nvtxNameCategoryW can be used
* to name a category.
*
* Default Value is 0
*/
uint32_t category;
/** \brief Color type specified in this attribute structure.
*
* Defines the color format of the attribute structure's \ref COLOR_FIELD
* "color" field.
*
* Default Value is NVTX_COLOR_UNKNOWN
*/
int32_t colorType; /* nvtxColorType_t */
/** \brief Color assigned to this event. \anchor COLOR_FIELD
*
* The color that the tool should use to visualize the event.
*/
uint32_t color;
/**
* \brief Payload type specified in this attribute structure.
*
* Defines the payload format of the attribute structure's \ref PAYLOAD_FIELD
* "payload" field.
*
* Default Value is NVTX_PAYLOAD_UNKNOWN
*/
int32_t payloadType; /* nvtxPayloadType_t */
int32_t reserved0;
/**
* \brief Payload assigned to this event. \anchor PAYLOAD_FIELD
*
* A numerical value that can be used to annotate an event. The tool could
* use the payload data to reconstruct graphs and diagrams.
*/
union payload_t
{
uint64_t ullValue;
int64_t llValue;
double dValue;
/* NVTX_VERSION_2 */
uint32_t uiValue;
int32_t iValue;
float fValue;
} payload;
/** \brief Message type specified in this attribute structure.
*
* Defines the message format of the attribute structure's \ref MESSAGE_FIELD
* "message" field.
*
* Default Value is NVTX_MESSAGE_UNKNOWN
*/
int32_t messageType; /* nvtxMessageType_t */
/** \brief Message assigned to this attribute structure. \anchor MESSAGE_FIELD
*
* The text message that is attached to an event.
*/
nvtxMessageValue_t message;
} nvtxEventAttributes_v2;
typedef struct nvtxEventAttributes_v2 nvtxEventAttributes_t;
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \defgroup MARKERS_AND_RANGES Markers and Ranges
*
* See \ref MARKERS_AND_RANGES for more details
*
* @{
*/
/** \name Marker */
/* ------------------------------------------------------------------------- */
/** \brief Marks an instantaneous event in the application.
*
* A marker can contain a text message or specify additional information
* using the event attributes structure. These attributes include a text
* message, color, category, and a payload. Each of the attributes is optional
* and can only be sent out using the \ref nvtxDomainMarkEx function.
*
* nvtxDomainMarkEx(NULL, event) is equivalent to calling
* nvtxMarkEx(event).
*
* \param domain - The domain of scoping the category.
* \param eventAttrib - The event attribute structure defining the marker's
* attribute types and attribute values.
*
* \sa
* ::nvtxMarkEx
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Marks an instantaneous event in the application.
*
* A marker can contain a text message or specify additional information
* using the event attributes structure. These attributes include a text
* message, color, category, and a payload. Each of the attributes is optional
* and can only be sent out using the \ref nvtxMarkEx function.
* If \ref nvtxMarkA or \ref nvtxMarkW are used to specify the marker
* or if an attribute is unspecified then a default value will be used.
*
* \param eventAttrib - The event attribute structure defining the marker's
* attribute types and attribute values.
*
* \par Example:
* \code
* // zero the structure
* nvtxEventAttributes_t eventAttrib = {0};
* // set the version and the size information
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* // configure the attributes. 0 is the default for all attributes.
* eventAttrib.colorType = NVTX_COLOR_ARGB;
* eventAttrib.color = 0xFF880000;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "Example nvtxMarkEx";
* nvtxMarkEx(&eventAttrib);
* \endcode
*
* \sa
* ::nvtxDomainMarkEx
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Marks an instantaneous event in the application.
*
* A marker created using \ref nvtxMarkA or \ref nvtxMarkW contains only a
* text message.
*
* \param message - The message associated to this marker event.
*
* \par Example:
* \code
* nvtxMarkA("Example nvtxMarkA");
* nvtxMarkW(L"Example nvtxMarkW");
* \endcode
*
* \sa
* ::nvtxDomainMarkEx
* ::nvtxMarkEx
*
* \version \NVTX_VERSION_0
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message);
NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message);
/** @} */
/** \name Process Ranges */
/* ------------------------------------------------------------------------- */
/** \brief Starts a process range in a domain.
*
* \param domain - The domain of scoping the category.
* \param eventAttrib - The event attribute structure defining the range's
* attribute types and attribute values.
*
* \return The unique ID used to correlate a pair of Start and End events.
*
* \remarks Ranges defined by Start/End can overlap.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "my range";
* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
* // ...
* nvtxDomainRangeEnd(rangeId);
* \endcode
*
* \sa
* ::nvtxDomainRangeEnd
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Starts a process range.
*
* \param eventAttrib - The event attribute structure defining the range's
* attribute types and attribute values.
*
* \return The unique ID used to correlate a pair of Start and End events.
*
* \remarks Ranges defined by Start/End can overlap.
*
* \par Example:
* \code
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.category = 3;
* eventAttrib.colorType = NVTX_COLOR_ARGB;
* eventAttrib.color = 0xFF0088FF;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "Example Range";
* nvtxRangeId_t rangeId = nvtxRangeStartEx(&eventAttrib);
* // ...
* nvtxRangeEnd(rangeId);
* \endcode
*
* \sa
* ::nvtxRangeEnd
* ::nvtxDomainRangeStartEx
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Starts a process range.
*
* \param message - The event message associated to this range event.
*
* \return The unique ID used to correlate a pair of Start and End events.
*
* \remarks Ranges defined by Start/End can overlap.
*
* \par Example:
* \code
* nvtxRangeId_t r1 = nvtxRangeStartA("Range 1");
* nvtxRangeId_t r2 = nvtxRangeStartW(L"Range 2");
* nvtxRangeEnd(r1);
* nvtxRangeEnd(r2);
* \endcode
*
* \sa
* ::nvtxRangeEnd
* ::nvtxRangeStartEx
* ::nvtxDomainRangeStartEx
*
* \version \NVTX_VERSION_0
* @{ */
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message);
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Ends a process range.
*
* \param domain - The domain
* \param id - The correlation ID returned from a nvtxRangeStart call.
*
* \remarks This function is offered completeness but is an alias for ::nvtxRangeEnd.
* It does not need a domain param since that is associated iwth the range ID at ::nvtxDomainRangeStartEx
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("my domain");
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "my range";
* nvtxRangeId_t rangeId = nvtxDomainRangeStartEx(&eventAttrib);
* // ...
* nvtxDomainRangeEnd(rangeId);
* \endcode
*
* \sa
* ::nvtxDomainRangeStartEx
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Ends a process range.
*
* \param id - The correlation ID returned from an nvtxRangeStart call.
*
* \sa
* ::nvtxDomainRangeStartEx
* ::nvtxRangeStartEx
* ::nvtxRangeStartA
* ::nvtxRangeStartW
*
* \version \NVTX_VERSION_0
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id);
/** @} */
/** \name Thread Ranges */
/* ------------------------------------------------------------------------- */
/** \brief Starts a nested thread range.
*
* \param domain - The domain of scoping.
* \param eventAttrib - The event attribute structure defining the range's
* attribute types and attribute values.
*
* \return The 0 based level of range being started. This value is scoped to the domain.
* If an error occurs, a negative value is returned.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.colorType = NVTX_COLOR_ARGB;
* eventAttrib.color = 0xFFFF0000;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "Level 0";
* nvtxDomainRangePushEx(domain, &eventAttrib);
*
* // Re-use eventAttrib
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
* eventAttrib.message.unicode = L"Level 1";
* nvtxDomainRangePushEx(domain, &eventAttrib);
*
* nvtxDomainRangePop(domain); //level 1
* nvtxDomainRangePop(domain); //level 0
* \endcode
*
* \sa
* ::nvtxDomainRangePop
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Starts a nested thread range.
*
* \param eventAttrib - The event attribute structure defining the range's
* attribute types and attribute values.
*
* \return The 0 based level of range being started. This level is per domain.
* If an error occurs a negative value is returned.
*
* \par Example:
* \code
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.colorType = NVTX_COLOR_ARGB;
* eventAttrib.color = 0xFFFF0000;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
* eventAttrib.message.ascii = "Level 0";
* nvtxRangePushEx(&eventAttrib);
*
* // Re-use eventAttrib
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_UNICODE;
* eventAttrib.message.unicode = L"Level 1";
* nvtxRangePushEx(&eventAttrib);
*
* nvtxRangePop();
* nvtxRangePop();
* \endcode
*
* \sa
* ::nvtxDomainRangePushEx
* ::nvtxRangePop
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Starts a nested thread range.
*
* \param message - The event message associated to this range event.
*
* \return The 0 based level of range being started. If an error occurs a
* negative value is returned.
*
* \par Example:
* \code
* nvtxRangePushA("Level 0");
* nvtxRangePushW(L"Level 1");
* nvtxRangePop();
* nvtxRangePop();
* \endcode
*
* \sa
* ::nvtxDomainRangePushEx
* ::nvtxRangePop
*
* \version \NVTX_VERSION_0
* @{ */
NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message);
NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Ends a nested thread range.
*
* \return The level of the range being ended. If an error occurs a negative
* value is returned on the current thread.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreate("example library");
* nvtxDomainRangePushA(domain, "Level 0");
* nvtxDomainRangePushW(domain, L"Level 1");
* nvtxDomainRangePop(domain);
* nvtxDomainRangePop(domain);
* \endcode
*
* \sa
* ::nvtxRangePushEx
* ::nvtxRangePushA
* ::nvtxRangePushW
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Ends a nested thread range.
*
* \return The level of the range being ended. If an error occurs a negative
* value is returned on the current thread.
*
* \par Example:
* \code
* nvtxRangePushA("Level 0");
* nvtxRangePushW(L"Level 1");
* nvtxRangePop();
* nvtxRangePop();
* \endcode
*
* \sa
* ::nvtxRangePushEx
* ::nvtxRangePushA
* ::nvtxRangePushW
*
* \version \NVTX_VERSION_0
* @{ */
NVTX_DECLSPEC int NVTX_API nvtxRangePop(void);
/** @} */
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \defgroup RESOURCE_NAMING Resource Naming
*
* See \ref RESOURCE_NAMING for more details
*
* @{
*/
/* ------------------------------------------------------------------------- */
/** \name Functions for Generic Resource Naming*/
/* ------------------------------------------------------------------------- */
/* ------------------------------------------------------------------------- */
/** \cond SHOW_HIDDEN
* \brief Resource typing helpers.
*
* Classes are used to make it easy to create a series of resource types
* per API without collisions
*/
#define NVTX_RESOURCE_MAKE_TYPE(CLASS, INDEX) ((((uint32_t)(NVTX_RESOURCE_CLASS_ ## CLASS))<<16)|((uint32_t)(INDEX)))
#define NVTX_RESOURCE_CLASS_GENERIC 1
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \brief Generic resource type for when a resource class is not available.
*
* \sa
* ::nvtxDomainResourceCreate
*
* \version \NVTX_VERSION_2
*/
typedef enum nvtxResourceGenericType_t
{
NVTX_RESOURCE_TYPE_UNKNOWN = 0,
NVTX_RESOURCE_TYPE_GENERIC_POINTER = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 1), /**< Generic pointer assumed to have no collisions with other pointers. */
NVTX_RESOURCE_TYPE_GENERIC_HANDLE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 2), /**< Generic handle assumed to have no collisions with other handles. */
NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 3), /**< OS native thread identifier. */
NVTX_RESOURCE_TYPE_GENERIC_THREAD_POSIX = NVTX_RESOURCE_MAKE_TYPE(GENERIC, 4) /**< POSIX pthread identifier. */
} nvtxResourceGenericType_t;
/** \brief Resource Attribute Structure.
* \anchor RESOURCE_ATTRIBUTE_STRUCTURE
*
* This structure is used to describe the attributes of a resource. The layout of
* the structure is defined by a specific version of the tools extension
* library and can change between different versions of the Tools Extension
* library.
*
* \par Initializing the Attributes
*
* The caller should always perform the following three tasks when using
* attributes:
* <ul>
* <li>Zero the structure
* <li>Set the version field
* <li>Set the size field
* </ul>
*
* Zeroing the structure sets all the resource attributes types and values
* to the default value.
*
* The version and size field are used by the Tools Extension
* implementation to handle multiple versions of the attributes structure.
*
* It is recommended that the caller use one of the following to methods
* to initialize the event attributes structure:
*
* \par Method 1: Initializing nvtxEventAttributes for future compatibility
* \code
* nvtxResourceAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
* \endcode
*
* \par Method 2: Initializing nvtxEventAttributes for a specific version
* \code
* nvtxResourceAttributes_v0 attribs = {0};
* attribs.version = 2;
* attribs.size = (uint16_t)(sizeof(nvtxResourceAttributes_v0));
* \endcode
*
* If the caller uses Method 1 it is critical that the entire binary
* layout of the structure be configured to 0 so that all fields
* are initialized to the default value.
*
* The caller should either use both NVTX_VERSION and
* NVTX_RESOURCE_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
* and a versioned type (Method 2). Using a mix of the two methods
* will likely cause either source level incompatibility or binary
* incompatibility in the future.
*
* \par Settings Attribute Types and Values
*
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
*
* // Initialize
* nvtxResourceAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
*
* // Configure the Attributes
* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
* attribs.identifier.pValue = (const void*)pMutex;
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
* attribs.message.ascii = "Single thread access to database.";
*
* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
* \endcode
*
* \sa
* ::nvtxDomainResourceCreate
*/
typedef struct nvtxResourceAttributes_v0
{
/**
* \brief Version flag of the structure.
*
* Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
* supported in this header file. This can optionally be overridden to
* another version of the tools extension library.
*/
uint16_t version;
/**
* \brief Size of the structure.
*
* Needs to be set to the size in bytes of this attribute
* structure.
*/
uint16_t size;
/**
* \brief Identifier type specifies how to interpret the identifier field
*
* Defines the identifier format of the attribute structure's \ref RESOURCE_IDENTIFIER_FIELD
* "identifier" field.
*
* Default Value is NVTX_RESOURCE_TYPE_UNKNOWN
*/
int32_t identifierType; /* values from enums following the pattern nvtxResource[name]Type_t */
/**
* \brief Identifier for the resource.
* \anchor RESOURCE_IDENTIFIER_FIELD
*
* An identifier may be a pointer or a handle to an OS or middleware API object.
* The resource type will assist in avoiding collisions where handles values may collide.
*/
union identifier_t
{
const void* pValue;
uint64_t ullValue;
} identifier;
/** \brief Message type specified in this attribute structure.
*
* Defines the message format of the attribute structure's \ref RESOURCE_MESSAGE_FIELD
* "message" field.
*
* Default Value is NVTX_MESSAGE_UNKNOWN
*/
int32_t messageType; /* nvtxMessageType_t */
/** \brief Message assigned to this attribute structure. \anchor RESOURCE_MESSAGE_FIELD
*
* The text message that is attached to a resource.
*/
nvtxMessageValue_t message;
} nvtxResourceAttributes_v0;
typedef struct nvtxResourceAttributes_v0 nvtxResourceAttributes_t;
/* \cond SHOW_HIDDEN
* \version \NVTX_VERSION_2
*/
#define NVTX_RESOURCE_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxResourceAttributes_v0) ) )
typedef struct nvtxResourceHandle* nvtxResourceHandle_t;
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \brief Create a resource object to track and associate data with OS and middleware objects
*
* Allows users to associate an API handle or pointer with a user-provided name.
*
*
* \param domain - Domain to own the resource object
* \param attribs - Attributes to be associated with the resource
*
* \return A handle that represents the newly created resource object.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
* nvtxResourceAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
* attribs.identifier.pValue = (const void*)pMutex;
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
* attribs.message.ascii = "Single thread access to database.";
* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
* \endcode
*
* \sa
* ::nvtxResourceAttributes_t
* ::nvtxDomainResourceDestroy
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Destroy a resource object to track and associate data with OS and middleware objects
*
* Allows users to associate an API handle or pointer with a user-provided name.
*
* \param resource - Handle to the resource in which to operate.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("example domain");
* nvtxResourceAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE;
* attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_POINTER;
* attribs.identifier.pValue = (const void*)pMutex;
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
* attribs.message.ascii = "Single thread access to database.";
* nvtxResourceHandle_t handle = nvtxDomainResourceCreate(domain, attribs);
* nvtxDomainResourceDestroy(handle);
* \endcode
*
* \sa
* ::nvtxDomainResourceCreate
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource);
/** @} */
/** \name Functions for NVTX Category Naming*/
/* ------------------------------------------------------------------------- */
/**
* \brief Annotate an NVTX category used within a domain.
*
* Categories are used to group sets of events. Each category is identified
* through a unique ID and that ID is passed into any of the marker/range
* events to assign that event to a specific category. The nvtxDomainNameCategory
* function calls allow the user to assign a name to a category ID that is
* specific to the domain.
*
* nvtxDomainNameCategory(NULL, category, name) is equivalent to calling
* nvtxNameCategory(category, name).
*
* \param domain - The domain of scoping the category.
* \param category - The category ID to name.
* \param name - The name of the category.
*
* \remarks The category names are tracked per domain.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("example");
* nvtxDomainNameCategoryA(domain, 1, "Memory Allocation");
* nvtxDomainNameCategoryW(domain, 2, L"Memory Transfer");
* \endcode
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
/** @} */
/** \brief Annotate an NVTX category.
*
* Categories are used to group sets of events. Each category is identified
* through a unique ID and that ID is passed into any of the marker/range
* events to assign that event to a specific category. The nvtxNameCategory
* function calls allow the user to assign a name to a category ID.
*
* \param category - The category ID to name.
* \param name - The name of the category.
*
* \remarks The category names are tracked per process.
*
* \par Example:
* \code
* nvtxNameCategory(1, "Memory Allocation");
* nvtxNameCategory(2, "Memory Transfer");
* nvtxNameCategory(3, "Memory Object Lifetime");
* \endcode
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name);
/** @} */
/** \name Functions for OS Threads Naming*/
/* ------------------------------------------------------------------------- */
/** \brief Annotate an OS thread.
*
* Allows the user to name an active thread of the current process. If an
* invalid thread ID is provided or a thread ID from a different process is
* used the behavior of the tool is implementation dependent.
*
* Tools expect thread ID to be a number that uniquely identifies the thread
* at the time of the call. Note that a thread's ID can be reused after
* it is destroyed. Tools may choose how to handle aliasing of thread IDs.
*
* POSIX pthread_t type returned by pthread_self() may not comply with these
* expectations. Please use OS-specific thread ID instead of pthread_t.
*
* The thread name is associated to the default domain. To support domains
* use resource objects via ::nvtxDomainResourceCreate.
*
* \param threadId - The ID of the thread to name.
* \param name - The name of the thread.
*
* \par Examples:
* MS Windows:
* \code
* #include <windows.h>
* nvtxNameOsThread(GetCurrentThreadId(), "Current thread");
* nvtxNameOsThread(GetThreadId(SomeThreadHandle), "Other thread");
* \endcode
*
* Android:
* \code
* #include <unistd.h>
* nvtxNameOsThreadA(gettid(), "Current thread");
* nvtxNameOsThreadA(getpid(), "Main thread");
* \endcode
*
* Linux:
* \code
* #include <sys/syscall.h>
* nvtxNameOsThreadA(syscall(SYS_gettid), "Current thread");
* \endcode
* \code
* #include <unistd.h>
* nvtxNameOsThreadA(getpid(), "Main thread");
* \endcode
*
* OS X:
* \code
* #include <sys/syscall.h>
* nvtxNameOsThreadA(syscall(SYS_thread_selfid), "Current thread");
* \endcode
* \code
* #include <pthread.h>
* __uint64_t id;
* pthread_threadid_np(pthread_self(), &id);
* nvtxNameOsThreadA(id, "Current thread");
* pthread_threadid_np(somePThreadId, &id);
* nvtxNameOsThreadA(id, "Other thread");
* \endcode
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name);
/** @} */
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \defgroup STRING_REGISTRATION String Registration
*
* Registered strings are intended to increase performance by lowering instrumentation
* overhead. String may be registered once and the handle may be passed in place of
* a string where an the APIs may allow.
*
* See \ref STRING_REGISTRATION for more details
*
* @{
*/
/* ------------------------------------------------------------------------- */
/** \brief Register a string.
* Registers an immutable string with NVTX. Once registered the pointer used
* to register the domain name can be used in nvtxEventAttributes_t
* \ref MESSAGE_FIELD. This allows NVTX implementation to skip copying the
* contents of the message on each event invocation.
*
* String registration is an optimization. It is recommended to use string
* registration if the string will be passed to an event many times.
*
* String are not unregistered, except that by unregistering the entire domain
*
* \param domain - Domain handle. If NULL then the global domain is used.
* \param string - A unique pointer to a sequence of characters.
*
* \return A handle representing the registered string.
*
* \par Example:
* \code
* nvtxDomainCreateA("com.nvidia.nvtx.example");
* nvtxStringHandle_t message = nvtxDomainRegisterStringA(domain, "registered string");
* nvtxEventAttributes_t eventAttrib = {0};
* eventAttrib.version = NVTX_VERSION;
* eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
* eventAttrib.message.registered = message;
* \endcode
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string);
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string);
/** @} */
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \defgroup DOMAINS Domains
*
* Domains are used to group events to a developer defined scope. Middleware
* vendors may also scope their own events to avoid collisions with the
* the application developer's events, so that the application developer may
* inspect both parts and easily differentiate or filter them. By default
* all events are scoped to a global domain where NULL is provided or when
* using APIs provided b versions of NVTX below v2
*
* Domains are intended to be typically long lived objects with the intention
* of logically separating events of large modules from each other such as
* middleware libraries from each other and the main application.
*
* See \ref DOMAINS for more details
*
* @{
*/
/* ------------------------------------------------------------------------- */
/** \brief Register a NVTX domain.
*
* Domains are used to scope annotations. All NVTX_VERSION_0 and NVTX_VERSION_1
* annotations are scoped to the global domain. The function nvtxDomainCreate
* creates a new named domain.
*
* Each domain maintains its own nvtxRangePush and nvtxRangePop stack.
*
* \param name - A unique string representing the domain.
*
* \return A handle representing the domain.
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
*
* nvtxMarkA("nvtxMarkA to global domain");
*
* nvtxEventAttributes_t eventAttrib1 = {0};
* eventAttrib1.version = NVTX_VERSION;
* eventAttrib1.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib1.message.ascii = "nvtxDomainMarkEx to global domain";
* nvtxDomainMarkEx(NULL, &eventAttrib1);
*
* nvtxEventAttributes_t eventAttrib2 = {0};
* eventAttrib2.version = NVTX_VERSION;
* eventAttrib2.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
* eventAttrib2.message.ascii = "nvtxDomainMarkEx to com.nvidia.nvtx.example";
* nvtxDomainMarkEx(domain, &eventAttrib2);
* nvtxDomainDestroy(domain);
* \endcode
*
* \sa
* ::nvtxDomainDestroy
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* name);
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Unregister a NVTX domain.
*
* Unregisters the domain handle and frees all domain specific resources.
*
* \param domain - the domain handle
*
* \par Example:
* \code
* nvtxDomainHandle_t domain = nvtxDomainCreateA("com.nvidia.nvtx.example");
* nvtxDomainDestroy(domain);
* \endcode
*
* \sa
* ::nvtxDomainCreateA
* ::nvtxDomainCreateW
*
* \version \NVTX_VERSION_2
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain);
/** @} */
/** @} */ /*END defgroup*/
/* ========================================================================= */
/** \cond SHOW_HIDDEN */
#ifdef UNICODE
#define nvtxMark nvtxMarkW
#define nvtxRangeStart nvtxRangeStartW
#define nvtxRangePush nvtxRangePushW
#define nvtxNameCategory nvtxNameCategoryW
#define nvtxNameOsThread nvtxNameOsThreadW
/* NVTX_VERSION_2 */
#define nvtxDomainCreate nvtxDomainCreateW
#define nvtxDomainRegisterString nvtxDomainRegisterStringW
#define nvtxDomainNameCategory nvtxDomainNameCategoryW
#else
#define nvtxMark nvtxMarkA
#define nvtxRangeStart nvtxRangeStartA
#define nvtxRangePush nvtxRangePushA
#define nvtxNameCategory nvtxNameCategoryA
#define nvtxNameOsThread nvtxNameOsThreadA
/* NVTX_VERSION_2 */
#define nvtxDomainCreate nvtxDomainCreateA
#define nvtxDomainRegisterString nvtxDomainRegisterStringA
#define nvtxDomainNameCategory nvtxDomainNameCategoryA
#endif
/** \endcond */
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#define NVTX_IMPL_GUARD /* Ensure other headers cannot included directly */
#include "nvtxDetail/nvtxTypes.h"
#ifndef NVTX_NO_IMPL
#include "nvtxDetail/nvtxImpl.h"
#endif /*NVTX_NO_IMPL*/
#undef NVTX_IMPL_GUARD
#endif /* !defined(NVTX_VERSION) */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "nvToolsExt.h"
#include "cuda.h"
#ifndef NVTOOLSEXT_CUDA_V3
#define NVTOOLSEXT_CUDA_V3
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* ========================================================================= */
/** \name Functions for CUDA Resource Naming
*/
/** \addtogroup RESOURCE_NAMING
* \section RESOURCE_NAMING_CUDA CUDA Resource Naming
*
* This section covers the API functions that allow to annotate CUDA resources
* with user-provided names.
*
* @{
*/
/* ------------------------------------------------------------------------- */
/* \cond SHOW_HIDDEN
* \brief Used to build a non-colliding value for resource types separated class
* \version \NVTX_VERSION_2
*/
#define NVTX_RESOURCE_CLASS_CUDA 4
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \brief Resource types for CUDA
*/
typedef enum nvtxResourceCUDAType_t
{
NVTX_RESOURCE_TYPE_CUDA_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDA, 1), /* CUdevice */
NVTX_RESOURCE_TYPE_CUDA_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 2), /* CUcontext */
NVTX_RESOURCE_TYPE_CUDA_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDA, 3), /* CUstream */
NVTX_RESOURCE_TYPE_CUDA_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 4), /* CUevent */
} nvtxResourceCUDAType_t;
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA device.
*
* Allows the user to associate a CUDA device with a user-provided name.
*
* \param device - The handle of the CUDA device to name.
* \param name - The name of the CUDA device.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA context.
*
* Allows the user to associate a CUDA context with a user-provided name.
*
* \param context - The handle of the CUDA context to name.
* \param name - The name of the CUDA context.
*
* \par Example:
* \code
* CUresult status = cuCtxCreate( &cuContext, 0, cuDevice );
* if ( CUDA_SUCCESS != status )
* goto Error;
* nvtxNameCuContext(cuContext, "CTX_NAME");
* \endcode
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA stream.
*
* Allows the user to associate a CUDA stream with a user-provided name.
*
* \param stream - The handle of the CUDA stream to name.
* \param name - The name of the CUDA stream.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA event.
*
* Allows the user to associate a CUDA event with a user-provided name.
*
* \param event - The handle of the CUDA event to name.
* \param name - The name of the CUDA event.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name);
/** @} */
/** @} */ /* END RESOURCE_NAMING */
/* ========================================================================= */
#ifdef UNICODE
#define nvtxNameCuDevice nvtxNameCuDeviceW
#define nvtxNameCuContext nvtxNameCuContextW
#define nvtxNameCuStream nvtxNameCuStreamW
#define nvtxNameCuEvent nvtxNameCuEventW
#else
#define nvtxNameCuDevice nvtxNameCuDeviceA
#define nvtxNameCuContext nvtxNameCuContextA
#define nvtxNameCuStream nvtxNameCuStreamA
#define nvtxNameCuEvent nvtxNameCuEventA
#endif
#ifdef __cplusplus
}
#endif /* __cplusplus */
#ifndef NVTX_NO_IMPL
#define NVTX_IMPL_GUARD_CUDA /* Ensure other headers cannot included directly */
#include "nvtxDetail/nvtxImplCuda_v3.h"
#undef NVTX_IMPL_GUARD_CUDA
#endif /*NVTX_NO_IMPL*/
#endif /* NVTOOLSEXT_CUDA_V3 */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "nvToolsExt.h"
#include "cuda.h"
#include "driver_types.h"
#ifndef NVTOOLSEXT_CUDART_V3
#define NVTOOLSEXT_CUDART_V3
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* ========================================================================= */
/** \name Functions for CUDA Resource Naming
*/
/** \addtogroup RESOURCE_NAMING
* \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
*
* This section covers the API functions that allow to annotate CUDA resources
* with user-provided names.
*
* @{
*/
/* ------------------------------------------------------------------------- */
/* \cond SHOW_HIDDEN
* \brief Used to build a non-colliding value for resource types separated class
* \version \NVTX_VERSION_2
*/
#define NVTX_RESOURCE_CLASS_CUDART 5
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \brief Resource types for CUDART
*/
typedef enum nvtxResourceCUDARTType_t
{
NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2), /* cudaEvent_t */
} nvtxResourceCUDARTType_t;
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA device.
*
* Allows the user to associate a CUDA device with a user-provided name.
*
* \param device - The id of the CUDA device to name.
* \param name - The name of the CUDA device.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA stream.
*
* Allows the user to associate a CUDA stream with a user-provided name.
*
* \param stream - The handle of the CUDA stream to name.
* \param name - The name of the CUDA stream.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates a CUDA event.
*
* Allows the user to associate a CUDA event with a user-provided name.
*
* \param event - The handle of the CUDA event to name.
* \param name - The name of the CUDA event.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
/** @} */
/** @} */ /* END RESOURCE_NAMING */
/* ========================================================================= */
#ifdef UNICODE
#define nvtxNameCudaDevice nvtxNameCudaDeviceW
#define nvtxNameCudaStream nvtxNameCudaStreamW
#define nvtxNameCudaEvent nvtxNameCudaEventW
#else
#define nvtxNameCudaDevice nvtxNameCudaDeviceA
#define nvtxNameCudaStream nvtxNameCudaStreamA
#define nvtxNameCudaEvent nvtxNameCudaEventA
#endif
#ifdef __cplusplus
}
#endif /* __cplusplus */
#ifndef NVTX_NO_IMPL
#define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot included directly */
#include "nvtxDetail/nvtxImplCudaRt_v3.h"
#undef NVTX_IMPL_GUARD_CUDART
#endif /*NVTX_NO_IMPL*/
#endif /* NVTOOLSEXT_CUDART_V3 */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "nvToolsExt.h"
#include <CL/cl.h>
#ifndef NVTOOLSEXT_OPENCL_V3
#define NVTOOLSEXT_OPENCL_V3
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* ========================================================================= */
/** \name Functions for OpenCL Resource Naming
*/
/** \addtogroup RESOURCE_NAMING
* \section RESOURCE_NAMING_OPENCL OpenCL Resource Naming
*
* This section covers the API functions that allow to annotate OpenCL resources
* with user-provided names.
*
* @{
*/
/* ------------------------------------------------------------------------- */
/* \cond SHOW_HIDDEN
* \brief Used to build a non-colliding value for resource types separated class
* \version \NVTX_VERSION_2
*/
#define NVTX_RESOURCE_CLASS_OPENCL 6
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \brief Resource types for OpenCL
*/
typedef enum nvtxResourceOpenCLType_t
{
NVTX_RESOURCE_TYPE_OPENCL_DEVICE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 1),
NVTX_RESOURCE_TYPE_OPENCL_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 2),
NVTX_RESOURCE_TYPE_OPENCL_COMMANDQUEUE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 3),
NVTX_RESOURCE_TYPE_OPENCL_MEMOBJECT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 4),
NVTX_RESOURCE_TYPE_OPENCL_SAMPLER = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 5),
NVTX_RESOURCE_TYPE_OPENCL_PROGRAM = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 6),
NVTX_RESOURCE_TYPE_OPENCL_EVENT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 7),
} nvtxResourceOpenCLType_t;
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL device.
*
* Allows to associate an OpenCL device with a user-provided name.
*
* \param device - The handle of the OpenCL device to name.
* \param name - The name of the OpenCL device.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL context.
*
* Allows to associate an OpenCL context with a user-provided name.
*
* \param context - The handle of the OpenCL context to name.
* \param name - The name of the OpenCL context.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL command queue.
*
* Allows to associate an OpenCL command queue with a user-provided name.
*
* \param command_queue - The handle of the OpenCL command queue to name.
* \param name - The name of the OpenCL command queue.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL memory object.
*
* Allows to associate an OpenCL memory object with a user-provided name.
*
* \param memobj - The handle of the OpenCL memory object to name.
* \param name - The name of the OpenCL memory object.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL sampler.
*
* Allows to associate an OpenCL sampler with a user-provided name.
*
* \param sampler - The handle of the OpenCL sampler to name.
* \param name - The name of the OpenCL sampler.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL program.
*
* Allows to associate an OpenCL program with a user-provided name.
*
* \param program - The handle of the OpenCL program to name.
* \param name - The name of the OpenCL program.
*
* \code
* cpProgram = clCreateProgramWithSource(cxGPUContext, 1,
* (const char **) &cSourceCL, &program_length, &ciErrNum);
* shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
* nvtxNameClProgram(cpProgram, L"PROGRAM_NAME");
* \endcode
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name);
/** @} */
/* ------------------------------------------------------------------------- */
/** \brief Annotates an OpenCL event.
*
* Allows to associate an OpenCL event with a user-provided name.
*
* \param evnt - The handle of the OpenCL event to name.
* \param name - The name of the OpenCL event.
*
* \version \NVTX_VERSION_1
* @{ */
NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name);
/** @} */
/** @} */ /* END RESOURCE_NAMING */
/* ========================================================================= */
#ifdef UNICODE
#define nvtxNameClDevice nvtxNameClDeviceW
#define nvtxNameClContext nvtxNameClContextW
#define nvtxNameClCommandQueue nvtxNameClCommandQueueW
#define nvtxNameClMemObject nvtxNameClMemObjectW
#define nvtxNameClSampler nvtxNameClSamplerW
#define nvtxNameClProgram nvtxNameClProgramW
#define nvtxNameClEvent nvtxNameClEventW
#else
#define nvtxNameClDevice nvtxNameClDeviceA
#define nvtxNameClContext nvtxNameClContextA
#define nvtxNameClCommandQueue nvtxNameClCommandQueueA
#define nvtxNameClMemObject nvtxNameClMemObjectA
#define nvtxNameClSampler nvtxNameClSamplerA
#define nvtxNameClProgram nvtxNameClProgramA
#define nvtxNameClEvent nvtxNameClEventA
#endif
#ifdef __cplusplus
}
#endif /* __cplusplus */
#ifndef NVTX_NO_IMPL
#define NVTX_IMPL_GUARD_OPENCL /* Ensure other headers cannot included directly */
#include "nvtxDetail/nvtxImplOpenCL_v3.h"
#undef NVTX_IMPL_GUARD_OPENCL
#endif /*NVTX_NO_IMPL*/
#endif /* NVTOOLSEXT_OPENCL_V3 */
/*
* Copyright 2021-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "nvToolsExt.h"
#ifndef NVTOOLSEXT_PAYLOAD_H
#define NVTOOLSEXT_PAYLOAD_H
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/**
* \brief A compatibility ID value used in initialization to identify version
* differences.
*/
#define NVTX_EXT_COMPATID_PAYLOAD 0x0103
/**
* \brief This module ID identifies the payload extension. It has to be unique
* among the extension modules.
*/
#define NVTX_EXT_MODULEID_PAYLOAD 2
/**
* \brief Additional values for the enum @ref nvtxPayloadType_t
*/
#define NVTX_PAYLOAD_TYPE_BINARY ((int32_t)0xDFBD0009)
/** ---------------------------------------------------------------------------
* Payload schema entry flags.
* ------------------------------------------------------------------------- */
#define NVTX_PAYLOAD_ENTRY_FLAG_UNUSED 0
/**
* Absolute pointer into a payload (entry) of the same event.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_POINTER (1 << 1)
/**
* Offset from base address of the payload.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_BASE (1 << 2)
/**
* Offset from the end of this payload entry.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_OFFSET_FROM_HERE (1 << 3)
/**
* The value is an array with fixed length, set with the field `arrayLength`.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE (1 << 4)
/**
* The value is a zero-/null-terminated array.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED (2 << 4)
/**
* \brief A single or multi-dimensional array of variable length.
*
* The field `arrayLength` contains the index of the schema entry that holds the
* length(s). If the other field points to a scalar entry then this will be the
* 1D array. If the other field points to a FIXED_SIZE array, then the number of
* dimensions is defined with the registration of the scheme. If the other field
* is ZERO_TERMINATED, the array the dimensions can be determined at runtime.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX (3 << 4)
/**
* A tool may not support deep copy and just ignore this flag.
* See @ref NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY for more details.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_DEEP_COPY (1 << 9)
/**
* The entry specifies the message in a deferred event. The entry type can be
* any string type. The flag is ignored for schemas that are not flagged with
* `NVTX_PAYLOAD_SCHEMA_FLAG_RANGE*` or `NVTX_PAYLOAD_SCHEMA_FLAG_MARK`.
*/
#define NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE (1 << 10)
/**
* @note The ‘array’ flags assume that the array is embedded. Otherwise,
* @ref NVTX_PAYLOAD_ENTRY_FLAG_POINTER has to be additionally specified. Some
* combinations may be invalid based on the `NVTX_PAYLOAD_SCHEMA_TYPE_*` this
* entry is enclosed. For instance, variable length embedded arrays are valid
* within @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC but invalid with
* @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC. See `NVTX_PAYLOAD_SCHEMA_TYPE_*` for
* additional details.
*/
/* Helper macro to check if an entry represents an array. */
#define NVTX_PAYLOAD_ENTRY_FLAG_IS_ARRAY (\
NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE | \
NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED | \
NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX)
/** ---------------------------------------------------------------------------
* Types of entries in a payload schema.
* ------------------------------------------------------------------------- */
/**
* @note Several of the predefined types contain the size (in bits) in their
* names. For some data types the size (in bytes) is not fixed and may differ
* for different platforms/operating systems/compilers. To provide portability,
* an array of sizes (in bytes) for type 1 to 28 ( @ref
* NVTX_PAYLOAD_ENTRY_TYPE_CHAR to @ref NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE)
* is passed to the NVTX extension initialization function
* @ref InitializeInjectionNvtxExtension via the `extInfo` field of
* @ref nvtxExtModuleInfo_t.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_INVALID 0
/**
* Basic integer types.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR 1
#define NVTX_PAYLOAD_ENTRY_TYPE_UCHAR 2
#define NVTX_PAYLOAD_ENTRY_TYPE_SHORT 3
#define NVTX_PAYLOAD_ENTRY_TYPE_USHORT 4
#define NVTX_PAYLOAD_ENTRY_TYPE_INT 5
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT 6
#define NVTX_PAYLOAD_ENTRY_TYPE_LONG 7
#define NVTX_PAYLOAD_ENTRY_TYPE_ULONG 8
#define NVTX_PAYLOAD_ENTRY_TYPE_LONGLONG 9
#define NVTX_PAYLOAD_ENTRY_TYPE_ULONGLONG 10
/**
* Integer types with explicit size.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_INT8 11
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT8 12
#define NVTX_PAYLOAD_ENTRY_TYPE_INT16 13
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT16 14
#define NVTX_PAYLOAD_ENTRY_TYPE_INT32 15
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT32 16
#define NVTX_PAYLOAD_ENTRY_TYPE_INT64 17
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT64 18
/**
* C floating point types
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT 19
#define NVTX_PAYLOAD_ENTRY_TYPE_DOUBLE 20
#define NVTX_PAYLOAD_ENTRY_TYPE_LONGDOUBLE 21
/**
* Size type (`size_t`)
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_SIZE 22
/**
* Any address, e.g. `void*`. If the pointer type matters, use the flag @ref
* NVTX_PAYLOAD_ENTRY_FLAG_POINTER and the respective type instead.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_ADDRESS 23
/**
* Special character types.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_WCHAR 24 /* wide character (since C90) */
#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR8 25 /* since C2x and C++20 */
#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR16 26
#define NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 27
/**
* There is type size and alignment information for all previous types.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_INFO_ARRAY_SIZE (NVTX_PAYLOAD_ENTRY_TYPE_CHAR32 + 1)
/**
* Store raw 8-bit binary data. As with `char`, 1-byte alignment is assumed.
* Typically a tool will display this as hex or binary.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_BYTE 32
/**
* These types do not have standardized equivalents. It is assumed that the
* number at the end corresponds to the bits used to store the value and that
* the alignment corresponds to standardized types of the same size.
* A tool may not support these types.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_INT128 33
#define NVTX_PAYLOAD_ENTRY_TYPE_UINT128 34
#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT16 42
#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT32 43
#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT64 44
#define NVTX_PAYLOAD_ENTRY_TYPE_FLOAT128 45
#define NVTX_PAYLOAD_ENTRY_TYPE_BF16 50
#define NVTX_PAYLOAD_ENTRY_TYPE_TF32 52
/**
* These types are normalized numbers stored in integers. UNORMs represent 0.0
* to 1.0 and SNORMs represent -1.0 to 1.0. The number after represents the
* number of integer bits. Alignment is take from equivalent types INT# matching
* to SNORM# and UINT# matching to UNORM#.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_SNORM8 61
#define NVTX_PAYLOAD_ENTRY_TYPE_UNORM8 62
#define NVTX_PAYLOAD_ENTRY_TYPE_SNORM16 63
#define NVTX_PAYLOAD_ENTRY_TYPE_UNORM16 64
#define NVTX_PAYLOAD_ENTRY_TYPE_SNORM32 65
#define NVTX_PAYLOAD_ENTRY_TYPE_UNORM32 66
#define NVTX_PAYLOAD_ENTRY_TYPE_SNORM64 67
#define NVTX_PAYLOAD_ENTRY_TYPE_UNORM64 68
/**
* String types.
*
* If `arrayOrUnionDetail` is greater than `0`, the entry is a fixed-size string
* with the provided length.
*
* `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_FIXED_SIZE` is ignored for string types. It
* just specifies once more that the entry is a fixed-size string.
*
* Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_ZERO_TERMINATED` indicates a
* zero-terminated string. If `arrayOrUnionDetail` is greater than `0`, a zero-
* terminated array of fixed-size strings is assumed.
*
* Setting the flag `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_LENGTH_INDEX` specifies the
* entry index of the entry which contains the string length. It is not possible
* to describe a variable length array of strings.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING 75 /* `char*`, system LOCALE */
#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF8 76
#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF16 77
#define NVTX_PAYLOAD_ENTRY_TYPE_CSTRING_UTF32 78
/**
* @ref nvtxStringHandle_t returned by @ref nvtxDomainRegisterString
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE 80
/**
* Entry types to be used in deferred events. Data types are as defined by
* NVTXv3 core: category -> uint32_t, color -> uint32_t, color type -> int32_t.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_CATEGORY 90
#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_COLORTYPE 91
#define NVTX_PAYLOAD_ENTRY_TYPE_NVTX_COLOR 92
/**
* This type marks the union selector member (entry index) in schemas used by
* a union with internal internal selector.
* See @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_UNION_SELECTOR 100
/**
* Timestamp types occupy the range from 128 to 255
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP64 128 /* data type is uint64_t */
/**
* CPU timestamp sources.
* \todo All 64 bits?
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_TSC 129
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_TSC_NONVIRTUALIZED 130
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_REALTIME 131
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_REALTIME_COARSE 132
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_MONOTONIC 133
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_MONOTONIC_RAW 134
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_MONOTONIC_COARSE 135
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_BOOTTIME 136
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_PROCESS_CPUTIME_ID 137
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPU_CLOCK_GETTIME_THREAD_CPUTIME_ID 138
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_WIN_QPC 160
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_WIN_GSTAFT 161
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_WIN_GSTAFTP 162
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_C_TIME 163
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_C_CLOCK 164
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_C_TIMESPEC_GET 165
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_STEADY_CLOCK 166
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_HIGH_RESOLUTION_CLOCK 167
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_SYSTEM_CLOCK 168
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_UTC_CLOCK 169
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_TAI_CLOCK 170
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_GPS_CLOCK 171
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_CPP_FILE_CLOCK 172
/**
* \brief GPU timestamp sources.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_GPU_GLOBALTIMER 192
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_GPU_SM_CLOCK 193
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_GPU_SM_CLOCK64 194
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_GPU_CUPTI 195
/**
* The timestamp was provided by the NVTX handler’s timestamp routine.
*/
#define NVTX_PAYLOAD_ENTRY_TYPE_TIMESTAMP_TOOL_PROVIDED 224
/**
* This predefined schema ID can be used in `nvtxPayloadData_t` to indicate that
* the payload is a blob of memory which other payload entries may point into.
* A tool will not expose this payload directly.
*/
#define NVTX_TYPE_PAYLOAD_SCHEMA_REFERENCED 1022
/**
* This predefined schema ID can be used in `nvtxPayloadData_t` to indicate that
* the payload is a blob which can be shown with an arbitrary data viewer.
*/
#define NVTX_TYPE_PAYLOAD_SCHEMA_RAW 1023
/* Custom (static) schema IDs. */
#define NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START (1 << 24)
/* Dynamic schema IDs (generated by the tool) start here. */
#define NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START 4294967296 // 1 << 32
/**
* \brief Size and alignment information for predefined payload entry types.
*
* The struct contains the size and the alignment size in bytes. A respective
* array for the predefined types is passed via nvtxExtModuleInfo_t to the NVTX
* client/handler. The type (ID) is used as index into this array.
*/
typedef struct nvtxPayloadEntryTypeInfo_t
{
uint16_t size;
uint16_t align;
} nvtxPayloadEntryTypeInfo_t;
/**
* \brief Entry in a schema.
*
* A payload schema consists of an array of payload schema entries. It is
* registered with @ref nvtxPayloadSchemaRegister. `flag` can be set to `0` for
* simple values, 'type' is the only "required" field. If not set explicitly,
* all other fields are zero-initialized, which means that the entry has no name
* and the offset is determined based on self-alignment rules.
*
* Example schema:
* nvtxPayloadSchemaEntry_t desc[] = {
* {0, NVTX_EXT_PAYLOAD_TYPE_UINT8, "one byte"},
* {0, NVTX_EXT_PAYLOAD_TYPE_INT32, "four bytes"}
* };
*/
typedef struct nvtxPayloadSchemaEntry_t
{
/**
* \brief Flags to augment the basic type.
*
* This field allows additional properties of the payload entry to be
* specified. Valid values are `NVTX_PAYLOAD_ENTRY_FLAG_*`.
*/
uint64_t flags;
/**
* \brief Predefined payload schema entry type or ID of a registered payload
* schema.
*/
uint64_t type;
/**
* \brief Name of the payload entry. (Optional)
*
* Providing a name is useful to give a meaning to the associated value.
*/
const char* name;
/**
* \brief Description of the payload entry. (Optional)
*/
const char* description;
/**
* \brief String or array length or union selector for union types.
*
* If @ref type is a C string type, this defines the length of the string.
*
* If @ref flags specify that the entry is an array, this field defines the
* length of the array. See `NVTX_PAYLOAD_ENTRY_FLAG_ARRAY_*` for more
* details.
*
* If @ref type implies that the entry is a union with schema type
* @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION (external selection of the union
* member), this field contains the index (starting with 0) to an entry of
* integer type in the same schema. The associated field contains the
* selected union member.
*
* @note An array of schema type @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION is not
* supported. @ref NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR can
* be used instead.
*/
uint64_t arrayOrUnionDetail;
/**
* \brief Offset in the binary payload data (in bytes).
*
* This field specifies the byte offset from the base address of the actual
* binary data (blob) to the data of this entry.
*
* This is an optional field, but it is recommended to specify this field to
* avoid issues in the automatic detection of the offset by a tool/handler.
*/
uint64_t offset;
/**
* Semantics are not yet defined.
*/
void* semantics;
/**
* Reserved for future use. Do not use it!
*/
void* reserved;
} nvtxPayloadSchemaEntry_t;
/**
* \brief Binary payload data, size and decoding information.
*
* An array of nvtxPayloadData_t is passed to the NVTX event attribute payload
* member. To attach a single payload the macro @ref NVTX_EXT_PAYLOAD_SET_ATTR
* can be used.
*/
typedef struct nvtxPayloadData_t
{
/**
* The schema ID, which defines the layout of the binary data.
*/
uint64_t schemaId;
/**
* Size of the binary payload (blob) in bytes.
*/
size_t size;
/**
* Pointer to the binary payload data.
*/
const void* payload;
} nvtxPayloadData_t;
/* Helper macros for safe double-cast of pointer to uint64_t value */
#ifndef NVTX_POINTER_AS_PAYLOAD_ULLVALUE
# ifdef __cplusplus
# define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) \
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(p))
# else
#define NVTX_POINTER_AS_PAYLOAD_ULLVALUE(p) ((uint64_t)(uintptr_t)p)
# endif
#endif
#define NVTX_PAYLOAD_CONCAT2(a,b) a##b
#define NVTX_PAYLOAD_CONCAT(a,b) NVTX_PAYLOAD_CONCAT2(a,b)
#define NVTX_DATA_VAR NVTX_PAYLOAD_CONCAT(nvtxDFDB,__LINE__)
/**
* \brief Helper macro to attach a single payload to an NVTX event attribute.
*
* @note The NVTX push, start or mark operation must not be in the same or a
* nested scope.
*/
#define NVTX_PAYLOAD_EVTATTR_SET(EVTATTR, SCHEMA_ID, PAYLOAD_ADDR, SIZE) \
nvtxPayloadData_t NVTX_DATA_VAR[] = {{SCHEMA_ID, SIZE, PAYLOAD_ADDR}}; \
(EVTATTR).payload.ullValue = \
NVTX_POINTER_AS_PAYLOAD_ULLVALUE(NVTX_DATA_VAR); \
(EVTATTR).payloadType = NVTX_PAYLOAD_TYPE_BINARY; \
(EVTATTR).reserved0 = 1;
/**
* \brief Helper macro to attach multiple payloads to an NVTX event attribute.
*
* The payload data array (`nvtxPayloadData_t`) is passed as first argument to
* this macro.
*/
#define NVTX_PAYLOAD_EVTATTR_SET_MULTIPLE(EVTATTR, PAYLOADS) \
(EVTATTR).payloadType = NVTX_PAYLOAD_TYPE_BINARY; \
(EVTATTR).reserved0 = sizeof(PAYLOADS)/sizeof(nvtxPayloadData_t); \
(EVTATTR).payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(PAYLOADS);
/**
* \brief The payload schema type.
*
* A schema can be either of these types.
*/
enum nvtxPayloadSchemaType
{
NVTX_PAYLOAD_SCHEMA_TYPE_INVALID = 0,
NVTX_PAYLOAD_SCHEMA_TYPE_STATIC = 1,
NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC = 2,
NVTX_PAYLOAD_SCHEMA_TYPE_UNION = 3,
NVTX_PAYLOAD_SCHEMA_TYPE_UNION_WITH_INTERNAL_SELECTOR = 4
};
/**
* \brief Flags for static and dynamic schemas.
*/
enum nvtxPayloadSchemaFlags
{
NVTX_PAYLOAD_SCHEMA_FLAG_NONE = 0,
/**
* This flag indicates that a schema and the corresponding payloads can
* contain fields which require a deep copy.
*/
NVTX_PAYLOAD_SCHEMA_FLAG_DEEP_COPY = (1 << 1),
/**
* This flag indicates that a schema and the corresponding payloads can
* be referenced by another payload of the same event.
*/
NVTX_PAYLOAD_SCHEMA_FLAG_REFERENCED = (1 << 2),
/**
* The schema describes a deferred event/marker. Such a schema requires one
* timestamp entry and one string entry with the flag
* `NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE`. Category and color can be
* optionally specified with the respective entry types. The deferred event
* can contain a binary payload itself by using a custom schema ID as type
* its schema description. Multiple occurrences of the same event can be
* described by specifying an array timestamps.
*/
NVTX_PAYLOAD_SCHEMA_FLAG_DEFERRED_EVENT = (1 << 3),
/**
* The schema describes a deferred event/marker. Such a schema requires
* one start timestamp, one end timestamp and one string entry with the flag
* `NVTX_PAYLOAD_ENTRY_FLAG_EVENT_MESSAGE`. Category and color can be
* optionally specified with the respective entry types. The deferred range
* can contain a binary payload itself by using a custom schema ID as type
* its schema description.
*
* Timestamps can be provided in different ways:
* - A single range has two timestamp entries with the first (smaller entry
* index) being used as the start/push timestamp.
* - If the range schema contains one array of timestamps, the tool assumes
* that the array contains alternating start and end timestamps.
* - If two timestamp arrays are specified the first entry (with the
* smaller entry index) is assumed to contain the start timestamps. Both
* arrays have to be of the same size.
*/
NVTX_PAYLOAD_SCHEMA_FLAG_DEFERRED_RANGE = (2 << 3)
};
/**
* The values allow the valid fields in @ref nvtxPayloadSchemaAttr_t to be
* specified via setting the field `fieldMask`.
*/
#define NVTX_PAYLOAD_SCHEMA_ATTR_NAME (1 << 1)
#define NVTX_PAYLOAD_SCHEMA_ATTR_TYPE (1 << 2)
#define NVTX_PAYLOAD_SCHEMA_ATTR_FLAGS (1 << 3)
#define NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES (1 << 4)
#define NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES (1 << 5)
#define NVTX_PAYLOAD_SCHEMA_ATTR_STATIC_SIZE (1 << 6)
#define NVTX_PAYLOAD_SCHEMA_ATTR_ALIGNMENT (1 << 7)
#define NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID (1 << 8)
/**
* NVTX payload schema attributes.
*/
typedef struct nvtxPayloadSchemaAttr_t
{
/**
* \brief Mask of valid fields in this structure.
*
* The values from `enum nvtxPayloadSchemaAttributes` have to be used.
*/
uint64_t fieldMask;
/**
* \brief Name of the payload schema. (Optional)
*/
const char* name;
/**
* \brief Payload schema type. (Mandatory) \anchor PAYLOAD_TYPE_FIELD
*
* A value from `enum nvtxPayloadSchemaType` has to be used.
*/
uint64_t type;
/**
* \brief Payload schema flags. (Optional)
*
* Flags defined in `enum nvtxPayloadSchemaFlags` can be used to set
* additional properties of the schema.
*/
uint64_t flags;
/**
* \brief Entries of a payload schema. (Mandatory) \anchor ENTRIES_FIELD
*
* This field is a pointer to an array of schema entries, each describing a
* field in a data structure, e.g. in a C struct or union.
*/
const nvtxPayloadSchemaEntry_t* entries;
/**
* \brief Number of entries in the payload schema. (Mandatory)
*
* Number of entries in the array of payload entries \ref ENTRIES_FIELD.
*/
size_t numEntries;
/**
* \brief The binary payload size in bytes for static payload schemas.
*
* If \ref PAYLOAD_TYPE_FIELD is @ref NVTX_PAYLOAD_SCHEMA_TYPE_DYNAMIC this
* value is ignored. If this field is not specified for a schema of type
* @ref NVTX_PAYLOAD_SCHEMA_TYPE_STATIC, the size can be automatically
* determined by a tool.
*/
size_t payloadStaticSize;
/**
* \brief The byte alignment for packed structures.
*
* If not specified, this field defaults to `0`, which means that the fields
* in the data structure are not packed and natural alignment rules can be
* applied.
*/
size_t packAlign;
/* Static/custom schema ID must be
>= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START and
< NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START */
uint64_t schemaId;
} nvtxPayloadSchemaAttr_t;
/**
* \brief Register a payload schema.
*
* @param domain NVTX domain handle.
* @param attr NVTX payload schema attributes.
*/
NVTX_DECLSPEC uint64_t NVTX_API nvtxPayloadSchemaRegister(
nvtxDomainHandle_t domain, const nvtxPayloadSchemaAttr_t* attr);
/**
* \brief Enumeration entry.
*
* Since the value of an enum entry might not be meaningful for the analysis,
* a tool can show the name of enum entry instead.
*
* @note EXPERIMENTAL
*/
typedef struct nvtxPayloadEnum_t
{
/**
* Name of the enum value.
*/
const char* name;
/**
* Value of the enum entry.
*/
uint64_t value;
/**
* Indicates that this entry sets a specific set of bits, which can be used
* to easily define bitsets.
*/
int8_t isFlag;
} nvtxPayloadEnum_t;
/**
* The values are used to set the field `fieldMask` and specify which fields in
* `nvtxPayloadEnumAttr_t` are set.
*/
#define NVTX_PAYLOAD_ENUM_ATTR_NAME (1 << 1)
#define NVTX_PAYLOAD_ENUM_ATTR_ENTRIES (1 << 2)
#define NVTX_PAYLOAD_ENUM_ATTR_NUM_ENTRIES (1 << 3)
#define NVTX_PAYLOAD_ENUM_ATTR_SIZE (1 << 4)
#define NVTX_PAYLOAD_ENUM_ATTR_SCHEMA_ID (1 << 5)
/**
* NVTX payload enumeration type attributes.
*/
typedef struct nvtxPayloadEnumAttr_t {
/**
* Mask of valid fields in this struct.
* The values from `enum nvtxPayloadSchemaAttributes` have to be used.
*/
uint64_t fieldMask;
/**
* Name of the enum. (Optional)
*/
const char* name;
/**
* Entries of the enum. (Mandatory)
*/
const nvtxPayloadEnum_t* entries;
/**
* Number of entries in the enum. (Mandatory)
*/
size_t numEntries;
/**
* Size of enumeration type in bytes
*/
size_t sizeOfEnum;
/**
* Static/custom schema ID must be
* >= NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START and
* < NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_DYNAMIC_START
*/
uint64_t schemaId;
} nvtxPayloadEnumAttr_t;
/**
* \brief Register an enumeration type with the payload extension.
*
* @param domain NVTX domain handle
* @param attr NVTX payload enumeration type attributes.
*/
NVTX_DECLSPEC uint64_t nvtxPayloadEnumRegister(nvtxDomainHandle_t domain,
const nvtxPayloadEnumAttr_t* attr);
/**
* \brief Callback Ids of API functions in the payload extension.
*
* The NVTX handler can use these values to register a handler function. When
* InitializeInjectionNvtxExtension(nvtxExtModuleInfo_t* moduleInfo) is
* executed, a handler routine 'handlenvtxPayloadRegisterSchema' can be
* registered as follows:
* moduleInfo->segments->slots[NVTX3EXT_CBID_nvtxPayloadSchemaRegister] =
* (intptr_t)handlenvtxPayloadRegisterSchema;
*/
typedef enum NvtxExtPayloadCallbackId
{
NVTX3EXT_CBID_nvtxPayloadSchemaRegister = 0,
NVTX3EXT_CBID_nvtxPayloadEnumRegister = 1,
NVTX3EXT_CBID_PAYLOAD_FN_NUM = 2
} NvtxExtPayloadCallbackId;
#ifdef __GNUC__
#pragma GCC visibility push(internal)
#endif
#define NVTX_EXT_TYPES_GUARD /* Ensure other headers cannot include directly */
#include "nvtxExtDetail/nvtxExtTypes.h"
#undef NVTX_EXT_TYPES_GUARD
#ifndef NVTX_NO_IMPL
#define NVTX_EXT_IMPL_PAYLOAD_GUARD /* Ensure other headers cannot included directly */
#include "nvtxExtDetail/nvtxExtPayloadTypeInfo.h"
#include "nvtxExtDetail/nvtxExtImplPayload_v1.h"
#undef NVTX_EXT_IMPL_PAYLOAD_GUARD
#endif /*NVTX_NO_IMPL*/
#ifdef __GNUC__
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* NVTOOLSEXT_PAYLOAD_H */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "nvToolsExt.h"
#ifndef NVTOOLSEXT_SYNC_V3
#define NVTOOLSEXT_SYNC_V3
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/* \cond SHOW_HIDDEN
* \version \NVTX_VERSION_2
*/
#define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxSyncUserAttributes_v0) ) )
/** \endcond */
/**
* \page PAGE_SYNCHRONIZATION Synchronization
*
* This section covers a subset of the API that allow users to track additional
* synchronization details of their application. Naming OS synchronization primitives
* may allow users to better understand the data collected by traced synchronization
* APIs. Additionally, a user defined synchronization object can allow the users to
* to tell the tools when the user is building their own synchronization system
* that do not rely on the OS to provide behaviors and instead use techniques like
* atomic operations and spinlocks.
*
* See module \ref SYNCHRONIZATION for details.
*
* \par Example:
* \code
* class MyMutex
* {
* volatile long bLocked;
* nvtxSyncUser_t hSync;
* public:
* MyMutex(const char* name, nvtxDomainHandle_t d){
* bLocked = 0;
*
* nvtxSyncUserAttributes_t attribs = { 0 };
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
* attribs.message.ascii = name;
* hSync = nvtxDomainSyncUserCreate(d, &attribs);
* }
*
* ~MyMutex() {
* nvtxDomainSyncUserDestroy(hSync);
* }
*
* bool Lock() {
* nvtxDomainSyncUserAcquireStart(hSync);
* bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1);//atomic compiler intrinsic
* if (acquired) {
* nvtxDomainSyncUserAcquireSuccess(hSync);
* }
* else {
* nvtxDomainSyncUserAcquireFailed(hSync);
* }
* return acquired;
* }
* void Unlock() {
* nvtxDomainSyncUserReleasing(hSync);
* bLocked = false;
* }
* };
* \endcode
*
* \version \NVTX_VERSION_2
*/
/* ------------------------------------------------------------------------- */
/* \cond SHOW_HIDDEN
* \brief Used to build a non-colliding value for resource types separated class
* \version \NVTX_VERSION_2
*/
#define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
#define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
/** \endcond */
/* ------------------------------------------------------------------------- */
/** \defgroup SYNCHRONIZATION Synchronization
* See page \ref PAGE_SYNCHRONIZATION.
* @{
*/
/** \brief Resource type values for OSs with POSIX Thread API support
*/
typedef enum nvtxResourceSyncPosixThreadType_t
{
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t */
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t */
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t */
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t */
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t */
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t */
} nvtxResourceSyncPosixThreadType_t;
/** \brief Resource type values for Windows OSs
*/
typedef enum nvtxResourceSyncWindowsType_t
{
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
} nvtxResourceSyncWindowsType_t;
/** \brief Resource type values for Linux and Linux derived OSs such as Android
* \sa
* ::nvtxResourceSyncPosixThreadType_t
*/
typedef enum nvtxResourceSyncLinuxType_t
{
NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
} nvtxResourceSyncLinuxType_t;
/** \brief Resource type values for Android come from Linux.
* \sa
* ::nvtxResourceSyncLinuxType_t
* ::nvtxResourceSyncPosixThreadType_t
*/
typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
/** \brief User Defined Synchronization Object Handle .
* \anchor SYNCUSER_HANDLE_STRUCTURE
*
* This structure is opaque to the user and is used as a handle to reference
* a user defined syncrhonization object. The tools will return a pointer through the API for the application
* to hold on it's behalf to reference the string in the future.
*
*/
typedef struct nvtxSyncUser* nvtxSyncUser_t;
/** \brief User Defined Synchronization Object Attributes Structure.
* \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
*
* This structure is used to describe the attributes of a user defined synchronization
* object. The layout of the structure is defined by a specific version of the tools
* extension library and can change between different versions of the Tools Extension
* library.
*
* \par Initializing the Attributes
*
* The caller should always perform the following three tasks when using
* attributes:
* <ul>
* <li>Zero the structure
* <li>Set the version field
* <li>Set the size field
* </ul>
*
* Zeroing the structure sets all the event attributes types and values
* to the default value.
*
* The version and size field are used by the Tools Extension
* implementation to handle multiple versions of the attributes structure.
*
* It is recommended that the caller use one of the following to methods
* to initialize the event attributes structure:
*
* \par Method 1: Initializing nvtxEventAttributes for future compatibility
* \code
* nvtxSyncUserAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
* \endcode
*
* \par Method 2: Initializing nvtxSyncUserAttributes_t for a specific version
* \code
* nvtxSyncUserAttributes_t attribs = {0};
* attribs.version = 1;
* attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
* \endcode
*
* If the caller uses Method 1 it is critical that the entire binary
* layout of the structure be configured to 0 so that all fields
* are initialized to the default value.
*
* The caller should either use both NVTX_VERSION and
* NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
* and a versioned type (Method 2). Using a mix of the two methods
* will likely cause either source level incompatibility or binary
* incompatibility in the future.
*
* \par Settings Attribute Types and Values
*
*
* \par Example:
* \code
* // Initialize
* nvtxSyncUserAttributes_t attribs = {0};
* attribs.version = NVTX_VERSION;
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
*
* // Configure the Attributes
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
* attribs.message.ascii = "Example";
* \endcode
*
* \sa
* ::nvtxDomainSyncUserCreate
*/
typedef struct nvtxSyncUserAttributes_v0
{
/**
* \brief Version flag of the structure.
*
* Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
* supported in this header file. This can optionally be overridden to
* another version of the tools extension library.
*/
uint16_t version;
/**
* \brief Size of the structure.
*
* Needs to be set to the size in bytes of the event attribute
* structure used to specify the event.
*/
uint16_t size;
/** \brief Message type specified in this attribute structure.
*
* Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
* "message" field.
*
* Default Value is NVTX_MESSAGE_UNKNOWN
*/
int32_t messageType; /* nvtxMessageType_t */
/** \brief Message assigned to this attribute structure.
*
* The text message that is attached to an event.
*/
nvtxMessageValue_t message;
} nvtxSyncUserAttributes_v0;
typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
/* ------------------------------------------------------------------------- */
/** \brief Create a user defined synchronization object
* This is used to track non-OS synchronization working with spinlocks and atomics
*
* \param domain - Domain to own the resource
* \param attribs - A structure to assign multiple attributes to the object.
*
* \return A handle that represents the newly created user defined synchronization object.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/
NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
/* ------------------------------------------------------------------------- */
/** \brief Destroy a user defined synchronization object
* This is used to track non-OS synchronization working with spinlocks and atomics
*
* \param handle - A handle to the object to operate on.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
/* ------------------------------------------------------------------------- */
/** \brief Signal to tools that an attempt to acquire a user defined synchronization object
*
* \param handle - A handle to the object to operate on.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
/* ------------------------------------------------------------------------- */
/** \brief Signal to tools of failure in acquiring a user defined synchronization object
* This should be called after \ref nvtxDomainSyncUserAcquireStart
*
* \param handle - A handle to the object to operate on.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
/* ------------------------------------------------------------------------- */
/** \brief Signal to tools of success in acquiring a user defined synchronization object
* This should be called after \ref nvtxDomainSyncUserAcquireStart.
*
* \param handle - A handle to the object to operate on.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
/* ------------------------------------------------------------------------- */
/** \brief Signal to tools of releasing a reservation on user defined synchronization object
* This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
*
* \param handle - A handle to the object to operate on.
*
* \sa
* ::nvtxDomainSyncUserCreate
* ::nvtxDomainSyncUserDestroy
* ::nvtxDomainSyncUserAcquireStart
* ::nvtxDomainSyncUserAcquireFailed
* ::nvtxDomainSyncUserAcquireSuccess
* ::nvtxDomainSyncUserReleasing
*
* \version \NVTX_VERSION_2
*/
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
/** @} */ /*END defgroup*/
#ifdef __cplusplus
}
#endif /* __cplusplus */
#ifndef NVTX_NO_IMPL
#define NVTX_IMPL_GUARD_SYNC /* Ensure other headers cannot included directly */
#include "nvtxDetail/nvtxImplSync_v3.h"
#undef NVTX_IMPL_GUARD_SYNC
#endif /*NVTX_NO_IMPL*/
#endif /* NVTOOLSEXT_SYNC_V3 */
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Temporary helper #defines, #undef'ed at end of header */
#define NVTX3_CPP_VERSION_MAJOR 1
#define NVTX3_CPP_VERSION_MINOR 0
/* This section handles the decision of whether to provide unversioned symbols.
* If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is #defined, unversioned symbols are
* not provided, and explicit-version symbols such as nvtx3::v1::scoped_range
* and NVTX3_V1_FUNC_RANGE must be used. By default, the first #include of this
* header will define the unversioned symbols such as nvtx3::scoped_range and
* NVTX3_FUNC_RANGE. Subsequently including a different major version of this
* header without #defining NVTX3_CPP_REQUIRE_EXPLICIT_VERSION triggers an error
* since the symbols would conflict. Subsequently including of a different
* minor version within the same major version is allowed. Functionality of
* minor versions is cumulative, regardless of include order.
*
* Since NVTX3_CPP_REQUIRE_EXPLICIT_VERSION allows all combinations of versions
* to coexist without problems within a translation unit, the recommended best
* practice for instrumenting header-based libraries with NVTX C++ Wrappers is
* is to #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before including nvtx3.hpp,
* #undef it afterward, and only use explicit-version symbols. This is not
* necessary in common cases, such as instrumenting a standalone application, or
* static/shared libraries in .cpp files or headers private to those projects.
*/
/* clang-format off */
#if !defined(NVTX3_CPP_REQUIRE_EXPLICIT_VERSION)
/* Define macro used by all definitions in this header to indicate the
* unversioned symbols should be defined in addition to the versioned ones.
*/
#define NVTX3_INLINE_THIS_VERSION
#if !defined(NVTX3_CPP_INLINED_VERSION_MAJOR)
/* First occurrence of this header in the translation unit. Define macros
* indicating which version shall be used for unversioned symbols.
*/
/**
* @brief Semantic major version number for NVTX C++ wrappers of unversioned symbols
*
* Breaking changes may occur between major versions, and different major versions
* cannot provide unversioned symbols in the same translation unit (.cpp file).
*
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
*
* Not to be confused with the version number of the NVTX core library.
*/
#define NVTX3_CPP_INLINED_VERSION_MAJOR 1 // NVTX3_CPP_VERSION_MAJOR
/**
* @brief Semantic minor version number for NVTX C++ wrappers of unversioned symbols
*
* No breaking changes occur between minor versions -- minor version changes within
* a major version are purely additive.
*
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
*
* Not to be confused with the version number of the NVTX core library.
*/
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
#elif NVTX3_CPP_INLINED_VERSION_MAJOR != NVTX3_CPP_VERSION_MAJOR
/* Unsupported case -- cannot define unversioned symbols for different major versions
* in the same translation unit.
*/
#error \
"Two different major versions of the NVTX C++ Wrappers are being included in a single .cpp file, with unversioned symbols enabled in both. Only one major version can enable unversioned symbols in a .cpp file. To disable unversioned symbols, #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before #including nvtx3.hpp, and use the explicit-version symbols instead -- this is the preferred way to use nvtx3.hpp from a header file."
#elif (NVTX3_CPP_INLINED_VERSION_MAJOR == NVTX3_CPP_VERSION_MAJOR) && \
(NVTX3_CPP_INLINED_VERSION_MINOR < NVTX3_CPP_VERSION_MINOR)
/* An older minor version of the same major version already defined unversioned
* symbols. The new features provided in this header will be inlined
* redefine the minor version macro to this header's version.
*/
#undef NVTX3_CPP_INLINED_VERSION_MINOR
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
// else, already have this version or newer, nothing to do
#endif
#endif
/* clang-format on */
/**
* @file nvtx3.hpp
*
* @brief Provides C++ constructs making the NVTX library safer and easier to
* use with zero overhead.
*/
/**
* \mainpage
* \tableofcontents
*
* \section QUICK_START Quick Start
*
* To add NVTX ranges to your code, use the `nvtx3::scoped_range` RAII object. A
* range begins when the object is created, and ends when the object is
* destroyed.
*
* \code{.cpp}
* #include "nvtx3.hpp"
* void some_function() {
* // Begins a NVTX range with the messsage "some_function"
* // The range ends when some_function() returns and `r` is destroyed
* nvtx3::scoped_range r{"some_function"};
*
* for(int i = 0; i < 6; ++i) {
* nvtx3::scoped_range loop{"loop range"};
* std::this_thread::sleep_for(std::chrono::seconds{1});
* }
* } // Range ends when `r` is destroyed
* \endcode
*
* The example code above generates the following timeline view in Nsight
* Systems:
*
* \image html
* https://raw.githubusercontent.com/NVIDIA/NVTX/release-v3/docs/images/example_range.png
*
* Alternatively, use the \ref MACROS like `NVTX3_FUNC_RANGE()` to add
* ranges to your code that automatically use the name of the enclosing function
* as the range's message.
*
* \code{.cpp}
* #include "nvtx3.hpp"
* void some_function() {
* // Creates a range with a message "some_function" that ends when the
* // enclosing function returns
* NVTX3_FUNC_RANGE();
* ...
* }
* \endcode
*
*
* \section Overview
*
* The NVTX library provides a set of functions for users to annotate their code
* to aid in performance profiling and optimization. These annotations provide
* information to tools like Nsight Systems to improve visualization of
* application timelines.
*
* \ref RANGES are one of the most commonly used NVTX constructs for annotating
* a span of time. For example, imagine a user wanted to see every time a
* function, `my_function`, is called and how long it takes to execute. This can
* be accomplished with an NVTX range created on the entry to the function and
* terminated on return from `my_function` using the push/pop C APIs:
*
* \code{.cpp}
* void my_function(...) {
* nvtxRangePushA("my_function"); // Begins NVTX range
* // do work
* nvtxRangePop(); // Ends NVTX range
* }
* \endcode
*
* One of the challenges with using the NVTX C API is that it requires manually
* terminating the end of the range with `nvtxRangePop`. This can be challenging
* if `my_function()` has multiple returns or can throw exceptions as it
* requires calling `nvtxRangePop()` before all possible return points.
*
* NVTX C++ solves this inconvenience through the "RAII" technique by providing
* a `nvtx3::scoped_range` class that begins a range at construction and ends
* the range on destruction. The above example then becomes:
*
* \code{.cpp}
* void my_function(...) {
* nvtx3::scoped_range r{"my_function"}; // Begins NVTX range
* // do work
* } // Range ends on exit from `my_function` when `r` is destroyed
* \endcode
*
* The range object `r` is deterministically destroyed whenever `my_function`
* returns---ending the NVTX range without manual intervention. For more
* information, see \ref RANGES and `nvtx3::scoped_range_in`.
*
* Another inconvenience of the NVTX C APIs are the several constructs where the
* user is expected to initialize an object at the beginning of an application
* and reuse that object throughout the lifetime of the application. For example
* see domains, categories, and registered messages.
*
* Example:
* \code{.cpp}
* nvtxDomainHandle_t D = nvtxDomainCreateA("my domain");
* // Reuse `D` throughout the rest of the application
* \endcode
*
* This can be problematic if the user application or library does not have an
* explicit initialization function called before all other functions to
* ensure that these long-lived objects are initialized before being used.
*
* NVTX C++ makes use of the "construct on first use" technique to alleviate
* this inconvenience. In short, a function local static object is constructed
* upon the first invocation of a function and returns a reference to that
* object on all future invocations. See the documentation for `nvtx3::domain`,
* `nvtx3::named_category`, `nvtx3::registered_string`, and
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use for more
* information.
*
* Using construct on first use, the above example becomes:
* \code{.cpp}
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* // The first invocation of `domain::get` for the type `my_domain` will
* // construct a `nvtx3::domain` object and return a reference to it. Future
* // invocations simply return a reference.
* nvtx3::domain const& D = nvtx3::domain::get<my_domain>();
* \endcode
* For more information about NVTX and how it can be used, see
* https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx and
* https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/
* for more information.
*
* \section RANGES Ranges
*
* Ranges are used to describe a span of time during the execution of an
* application. Common examples are using ranges to annotate the time it takes
* to execute a function or an iteration of a loop.
*
* NVTX C++ uses RAII to automate the generation of ranges that are tied to the
* lifetime of objects. Similar to `std::lock_guard` in the C++ Standard
* Template Library.
*
* \subsection scoped_range Scoped Range
*
* `nvtx3::scoped_range_in` is a class that begins a range upon construction
* and ends the range at destruction. This is one of the most commonly used
* constructs in NVTX C++ and is useful for annotating spans of time on a
* particular thread. These ranges can be nested to arbitrary depths.
*
* `nvtx3::scoped_range` is an alias for a `nvtx3::scoped_range_in` in the
* global NVTX domain. For more information about Domains, see \ref DOMAINS.
*
* Various attributes of a range can be configured constructing a
* `nvtx3::scoped_range_in` with a `nvtx3::event_attributes` object. For
* more information, see \ref ATTRIBUTES.
*
* Example:
*
* \code{.cpp}
* void some_function() {
* // Creates a range for the duration of `some_function`
* nvtx3::scoped_range r{};
*
* while(true) {
* // Creates a range for every loop iteration
* // `loop_range` is nested inside `r`
* nvtx3::scoped_range loop_range{};
* }
* }
* \endcode
*
* \subsection unique_range Unique Range
*
* `nvtx3::unique_range` is similar to `nvtx3::scoped_range`, with a few key differences:
* - `unique_range` objects can be destroyed in any order whereas `scoped_range` objects must be
* destroyed in exact reverse creation order
* - `unique_range` can start and end on different threads
* - `unique_range` is moveable
* - `unique_range` objects can be constructed as heap objects
*
* There is extra overhead associated with `unique_range` constructs and therefore use of
* `nvtx3::scoped_range_in` should be preferred.
*
* \section MARKS Marks
*
* `nvtx3::mark` annotates an instantaneous point in time with a "marker".
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::mark("operation failed!");
* }
* \endcode
*
* \section DOMAINS Domains
*
* Similar to C++ namespaces, domains allow for scoping NVTX events. By default,
* all NVTX events belong to the "global" domain. Libraries and applications
* should scope their events to use a custom domain to differentiate where the
* events originate from.
*
* It is common for a library or application to have only a single domain and
* for the name of that domain to be known at compile time. Therefore, Domains
* in NVTX C++ are represented by _tag types_.
*
* For example, to define a custom domain, simply define a new concrete type
* (a `class` or `struct`) with a `static` member called `name` that contains
* the desired name of the domain.
*
* \code{.cpp}
* struct my_domain{ static constexpr char const* name{"my domain"}; };
* \endcode
*
* For any NVTX C++ construct that can be scoped to a domain, the type
* `my_domain` can be passed as an explicit template argument to scope it to
* the custom domain.
*
* The tag type `nvtx3::domain::global` represents the global NVTX domain.
*
* \code{.cpp}
* // By default, `scoped_range_in` belongs to the global domain
* nvtx3::scoped_range_in<> r0{};
*
* // Alias for a `scoped_range_in` in the global domain
* nvtx3::scoped_range r1{};
*
* // `r` belongs to the custom domain
* nvtx3::scoped_range_in<my_domain> r{};
* \endcode
*
* When using a custom domain, it is recommended to define type aliases for NVTX
* constructs in the custom domain.
* \code{.cpp}
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
* using my_named_category = nvtx3::named_category_in<my_domain>;
* \endcode
*
* See `nvtx3::domain` for more information.
*
* \section ATTRIBUTES Event Attributes
*
* NVTX events can be customized with various attributes to provide additional
* information (such as a custom message) or to control visualization of the
* event (such as the color used). These attributes can be specified per-event
* via arguments to a `nvtx3::event_attributes` object.
*
* NVTX events can be customized via four "attributes":
* - \ref COLOR : color used to visualize the event in tools.
* - \ref MESSAGES : Custom message string.
* - \ref PAYLOAD : User-defined numerical value.
* - \ref CATEGORY : Intra-domain grouping.
*
* It is possible to construct a `nvtx3::event_attributes` from any number of
* attribute objects (nvtx3::color, nvtx3::message, nvtx3::payload,
* nvtx3::category) in any order. If an attribute is not specified, a tool
* specific default value is used. See `nvtx3::event_attributes` for more
* information.
*
* \code{.cpp}
* // Set message, same as passing nvtx3::message{"message"}
* nvtx3::event_attributes attr{"message"};
*
* // Set message and color
* nvtx3::event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
*
* // Set message, color, payload, category
* nvtx3::event_attributes attr{"message",
* nvtx3::rgb{127, 255, 0},
* nvtx3::payload{42},
* nvtx3::category{1}};
*
* // Same as above -- can use any order of arguments
* nvtx3::event_attributes attr{nvtx3::payload{42},
* nvtx3::category{1},
* "message",
* nvtx3::rgb{127, 255, 0}};
*
* // Multiple arguments of the same type are allowed, but only the first is
* // used -- in this example, payload is set to 42:
* nvtx3::event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
*
* // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
* using namespace nvtx3;
* event_attributes attr{"message", rgb{127, 255, 0}, payload{42}, category{1}};
* \endcode
*
* \subsection MESSAGES message
*
* `nvtx3::message` sets the message string for an NVTX event.
*
* Example:
* \code{.cpp}
* // Create an `event_attributes` with the message "my message"
* nvtx3::event_attributes attr{nvtx3::message{"my message"}};
*
* // strings and string literals implicitly assumed to be a `nvtx3::message`
* nvtx3::event_attributes attr{"my message"};
* \endcode
*
* \subsubsection REGISTERED_MESSAGE Registered Messages
*
* Associating a `nvtx3::message` with an event requires copying the contents of
* the message every time the message is used, i.e., copying the entire message
* string. This may cause non-trivial overhead in performance sensitive code.
*
* To eliminate this overhead, NVTX allows registering a message string,
* yielding a "handle" that is inexpensive to copy that may be used in place of
* a message string. When visualizing the events, tools such as Nsight Systems
* will take care of mapping the message handle to its string.
*
* A message should be registered once and the handle reused throughout the rest
* of the application. This can be done by either explicitly creating static
* `nvtx3::registered_string` objects, or using the
* `nvtx3::registered_string::get` construct on first use helper (recommended).
*
* Similar to \ref DOMAINS, `nvtx3::registered_string::get` requires defining a
* custom tag type with a static `message` member whose value will be the
* contents of the registered string.
*
* Example:
* \code{.cpp}
* // Explicitly constructed, static `registered_string` in my_domain:
* static registered_string_in<my_domain> static_message{"my message"};
*
* // Or use construct on first use:
* // Define a tag type with a `message` member string to register
* struct my_message{ static constexpr char const* message{ "my message" }; };
*
* // Uses construct on first use to register the contents of
* // `my_message::message`
* auto& msg = nvtx3::registered_string_in<my_domain>::get<my_message>();
* \endcode
*
* \subsection COLOR color
*
* Associating a `nvtx3::color` with an event allows controlling how the event
* is visualized in a tool such as Nsight Systems. This is a convenient way to
* visually differentiate among different events.
*
* \code{.cpp}
* // Define a color via rgb color values
* nvtx3::color c{nvtx3::rgb{127, 255, 0}};
* nvtx3::event_attributes attr{c};
*
* // rgb color values can be passed directly to an `event_attributes`
* nvtx3::event_attributes attr1{nvtx3::rgb{127,255,0}};
* \endcode
*
* \subsection CATEGORY category
*
* A `nvtx3::category` is simply an integer id that allows for fine-grain
* grouping of NVTX events. For example, one might use separate categories for
* IO, memory allocation, compute, etc.
*
* \code{.cpp}
* nvtx3::event_attributes{nvtx3::category{1}};
* \endcode
*
* \subsubsection NAMED_CATEGORIES Named Categories
*
* Associates a `name` string with a category `id` to help differentiate among
* categories.
*
* For any given category id `Id`, a `named_category{Id, "name"}` should only
* be constructed once and reused throughout an application. This can be done by
* either explicitly creating static `nvtx3::named_category` objects, or using
* the `nvtx3::named_category::get` construct on first use helper (recommended).
*
* Similar to \ref DOMAINS, `nvtx3::named_category::get` requires defining a
* custom tag type with static `name` and `id` members.
*
* \code{.cpp}
* // Explicitly constructed, static `named_category` in my_domain:
* static nvtx3::named_category_in<my_domain> static_category{42, "my category"};
*
* // Or use construct on first use:
* // Define a tag type with `name` and `id` members
* struct my_category {
* static constexpr char const* name{"my category"}; // category name
* static constexpr uint32_t id{42}; // category id
* };
*
* // Use construct on first use to name the category id `42`
* // with name "my category":
* auto& cat = named_category_in<my_domain>::get<my_category>();
*
* // Range `r` associated with category id `42`
* nvtx3::event_attributes attr{cat};
* \endcode
*
* \subsection PAYLOAD payload
*
* Allows associating a user-defined numerical value with an event.
*
* \code{.cpp}
* // Constructs a payload from the `int32_t` value 42
* nvtx3:: event_attributes attr{nvtx3::payload{42}};
* \endcode
*
*
* \section EXAMPLE Example
*
* Putting it all together:
* \code{.cpp}
* // Define a custom domain tag type
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* // Define a named category tag type
* struct my_category{
* static constexpr char const* name{"my category"};
* static constexpr uint32_t id{42};
* };
*
* // Define a registered string tag type
* struct my_message{ static constexpr char const* message{"my message"}; };
*
* // For convenience, use aliases for domain scoped objects
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
* using my_named_category = nvtx3::named_category_in<my_domain>;
*
* // Default values for all attributes
* nvtx3::event_attributes attr{};
* my_scoped_range r0{attr};
*
* // Custom (unregistered) message, and unnamed category
* nvtx3::event_attributes attr1{"message", nvtx3::category{2}};
* my_scoped_range r1{attr1};
*
* // Alternatively, pass arguments of `event_attributes` ctor directly to
* // `my_scoped_range`
* my_scoped_range r2{"message", nvtx3::category{2}};
*
* // construct on first use a registered string
* auto& msg = my_registered_string::get<my_message>();
*
* // construct on first use a named category
* auto& cat = my_named_category::get<my_category>();
*
* // Use registered string and named category with a custom payload
* my_scoped_range r3{msg, cat, nvtx3::payload{42}};
*
* // Any number of arguments in any order
* my_scoped_range r{nvtx3::rgb{127, 255,0}, msg};
*
* \endcode
* \section MACROS Convenience Macros
*
* Oftentimes users want to quickly and easily add NVTX ranges to their library
* or application to aid in profiling and optimization.
*
* A convenient way to do this is to use the \ref NVTX3_FUNC_RANGE and
* \ref NVTX3_FUNC_RANGE_IN macros. These macros take care of constructing an
* `nvtx3::scoped_range_in` with the name of the enclosing function as the
* range's message.
*
* \code{.cpp}
* void some_function() {
* // Automatically generates an NVTX range for the duration of the function
* // using "some_function" as the event's message.
* NVTX3_FUNC_RANGE();
* }
* \endcode
*
*/
/* Temporary helper #defines, removed with #undef at end of header */
/* Some compilers do not correctly support SFINAE, which is used in this API
* to detect common usage errors and provide clearer error messages (by using
* static_assert) than the compiler would produce otherwise. These compilers
* will generate errors while compiling this file such as:
*
* error: ‘name’ is not a member of ‘nvtx3::v1::domain::global’
*
* The following compiler versions are known to have this problem, and so are
* set by default to disable the SFINAE-based checks:
*
* - All MSVC versions prior to VS2017 Update 7 (15.7)
* - GCC 8.1-8.3 (the problem was fixed in GCC 8.4)
*
* If you find your compiler hits this problem, you can work around it by
* defining NVTX3_USE_CHECKED_OVERLOADS_FOR_GET to 0 before including this
* header, or you can add a check for your compiler version to this #if.
* Also, please report the issue on the NVTX github page.
*/
#if !defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET)
#if defined(_MSC_VER) && _MSC_VER < 1914 \
|| defined(__GNUC__) && __GNUC__ == 8 && __GNUC_MINOR__ < 4
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 0
#else
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 1
#endif
#define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
#endif
/* Within this header, nvtx3::NVTX3_VERSION_NAMESPACE resolves to nvtx3::vX,
* where "X" is the major version number. */
#define NVTX3_CONCAT(A, B) A##B
#define NVTX3_NAMESPACE_FOR(VERSION) NVTX3_CONCAT(v, VERSION)
#define NVTX3_VERSION_NAMESPACE NVTX3_NAMESPACE_FOR(NVTX3_CPP_VERSION_MAJOR)
/* Avoid duplicating #if defined(NVTX3_INLINE_THIS_VERSION) for namespaces
* in each minor version by making a macro to use unconditionally, which
* resolves to "inline" or nothing as appropriate. */
#if defined(NVTX3_INLINE_THIS_VERSION)
#define NVTX3_INLINE_IF_REQUESTED inline
#else
#define NVTX3_INLINE_IF_REQUESTED
#endif
/* Enables the use of constexpr when support for C++14 constexpr is present.
*
* Initialization of a class member that is a union to a specific union member
* can only be done in the body of a constructor, not in a member initializer
* list. A constexpr constructor must have an empty body until C++14, so there
* is no way to make an initializer of a member union constexpr in C++11. This
* macro allows making functions constexpr in C++14 or newer, but non-constexpr
* in C++11 compilation. It is used here on constructors that initialize their
* member unions.
*/
#if __cpp_constexpr >= 201304L
#define NVTX3_CONSTEXPR_IF_CPP14 constexpr
#else
#define NVTX3_CONSTEXPR_IF_CPP14
#endif
/* Use a macro for static asserts, which defaults to static_assert, but that
* testing tools can replace with a logging function. For example:
* #define NVTX3_STATIC_ASSERT(c, m) \
* do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0)
*/
#if !defined(NVTX3_STATIC_ASSERT)
#define NVTX3_STATIC_ASSERT(condition, message) static_assert(condition, message);
#define NVTX3_STATIC_ASSERT_DEFINED_HERE
#endif
/* Implementation sections, enclosed in guard macros for each minor version */
#ifndef NVTX3_CPP_DEFINITIONS_V1_0
#define NVTX3_CPP_DEFINITIONS_V1_0
#include "nvToolsExt.h"
#include "nvToolsExtPayload.h"
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <cstddef>
namespace nvtx3 {
NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE
{
namespace detail {
template <typename Unused>
struct always_false : std::false_type {};
template <typename T, typename = void>
struct has_name : std::false_type {};
template <typename T>
struct has_name<T, decltype((void)T::name, void())> : std::true_type {};
template <typename T, typename = void>
struct has_id : std::false_type {};
template <typename T>
struct has_id<T, decltype((void)T::id, void())> : std::true_type {};
template <typename T, typename = void>
struct has_message : std::false_type {};
template <typename T>
struct has_message<T, decltype((void)T::message, void())> : std::true_type {};
template <typename T, typename = void>
struct is_c_string : std::false_type {};
template <typename T>
struct is_c_string<T, typename std::enable_if<
std::is_convertible<T, char const* >::value ||
std::is_convertible<T, wchar_t const*>::value
>::type> : std::true_type {};
template <typename T>
using is_uint32 = std::is_same<typename std::decay<T>::type, uint32_t>;
} // namespace detail
/**
* @brief `domain`s allow for grouping NVTX events into a single scope to
* differentiate them from events in other `domain`s.
*
* By default, all NVTX constructs are placed in the "global" NVTX domain.
*
* A custom `domain` may be used in order to differentiate a library's or
* application's NVTX events from other events.
*
* `domain`s are expected to be long-lived and unique to a library or
* application. As such, it is assumed a domain's name is known at compile
* time. Therefore, all NVTX constructs that can be associated with a domain
* require the domain to be specified via a *type* `D` passed as an
* explicit template parameter.
*
* The type `domain::global` may be used to indicate that the global NVTX
* domain should be used.
*
* None of the C++ NVTX constructs require the user to manually construct a
* `domain` object. Instead, if a custom domain is desired, the user is
* expected to define a type `D` that contains a member
* `D::name` which resolves to either a `char const*` or `wchar_t
* const*`. The value of `D::name` is used to name and uniquely
* identify the custom domain.
*
* Upon the first use of an NVTX construct associated with the type
* `D`, the "construct on first use" pattern is used to construct a
* function local static `domain` object. All future NVTX constructs
* associated with `D` will use a reference to the previously
* constructed `domain` object. See `domain::get`.
*
* Example:
* \code{.cpp}
* // The type `my_domain` defines a `name` member used to name and identify
* // the `domain` object identified by `my_domain`.
* struct my_domain{ static constexpr char const* name{"my_domain"}; };
*
* // The NVTX range `r` will be grouped with all other NVTX constructs
* // associated with `my_domain`.
* nvtx3::scoped_range_in<my_domain> r{};
*
* // An alias can be created for a `scoped_range_in` in the custom domain
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
* my_scoped_range my_range{};
*
* // `domain::global` indicates that the global NVTX domain is used
* nvtx3::scoped_range_in<domain::global> r2{};
*
* // For convenience, `nvtx3::scoped_range` is an alias for a range in the
* // global domain
* nvtx3::scoped_range r3{};
* \endcode
*/
class domain {
public:
domain(domain const&) = delete;
domain& operator=(domain const&) = delete;
domain(domain&&) = delete;
domain& operator=(domain&&) = delete;
/**
* @brief Tag type for the "global" NVTX domain.
*
* This type may be passed as a template argument to any function/class
* expecting a type to identify a domain to indicate that the global domain
* should be used.
*
* All NVTX events in the global domain across all libraries and
* applications will be grouped together.
*
*/
struct global {
};
#if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
/**
* @brief Returns reference to an instance of a function local static
* `domain` object.
*
* Uses the "construct on first use" idiom to safely ensure the `domain`
* object is initialized exactly once upon first invocation of
* `domain::get<D>()`. All following invocations will return a
* reference to the previously constructed `domain` object. See
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
*
* None of the constructs in this header require the user to directly invoke
* `domain::get`. It is automatically invoked when constructing objects like
* a `scoped_range_in` or `category`. Advanced users may wish to use
* `domain::get` for the convenience of the "construct on first use" idiom
* when using domains with their own use of the NVTX C API.
*
* This function is threadsafe as of C++11. If two or more threads call
* `domain::get<D>` concurrently, exactly one of them is guaranteed
* to construct the `domain` object and the other(s) will receive a
* reference to the object after it is fully constructed.
*
* The domain's name is specified via the type `D` pass as an
* explicit template parameter. `D` is required to contain a
* member `D::name` that resolves to either a `char const*` or
* `wchar_t const*`. The value of `D::name` is used to name and
* uniquely identify the `domain`.
*
* Example:
* \code{.cpp}
* // The type `my_domain` defines a `name` member used to name and identify
* // the `domain` object identified by `my_domain`.
* struct my_domain{ static constexpr char const* name{"my domain"}; };
*
* auto& D1 = domain::get<my_domain>(); // First invocation constructs a
* // `domain` with the name "my domain"
*
* auto& D2 = domain::get<my_domain>(); // Quickly returns reference to
* // previously constructed `domain`.
* \endcode
*
* @tparam D Type that contains a `D::name` member used to
* name the `domain` object.
* @return Reference to the `domain` corresponding to the type `D`.
*/
template <typename D = global,
typename std::enable_if<
detail::is_c_string<decltype(D::name)>::value
, int>::type = 0>
static domain const& get() noexcept
{
static domain const d(D::name);
return d;
}
/**
* @brief Overload of `domain::get` to provide a clear compile error when
* `D` has a `name` member that is not directly convertible to either
* `char const*` or `wchar_t const*`.
*/
template <typename D = global,
typename std::enable_if<
!detail::is_c_string<decltype(D::name)>::value
, int>::type = 0>
static domain const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
"Type used to identify an NVTX domain must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is not "
"convertible to either of those types");
static domain const unused;
return unused; // Function must compile for static_assert to be triggered
}
/**
* @brief Overload of `domain::get` to provide a clear compile error when
* `D` does not have a `name` member.
*/
template <typename D = global,
typename std::enable_if<
!detail::has_name<D>::value
, int>::type = 0>
static domain const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
"Type used to identify an NVTX domain must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
static domain const unused;
return unused; // Function must compile for static_assert to be triggered
}
#else
template <typename D = global>
static domain const& get() noexcept
{
static domain const d(D::name);
return d;
}
#endif
/**
* @brief Conversion operator to `nvtxDomainHandle_t`.
*
* Allows transparently passing a domain object into an API expecting a
* native `nvtxDomainHandle_t` object.
*/
operator nvtxDomainHandle_t() const noexcept { return _domain; }
private:
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(char const* name) noexcept : _domain{nvtxDomainCreateA(name)} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(wchar_t const* name) noexcept : _domain{nvtxDomainCreateW(name)} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(std::string const& name) noexcept : domain{name.c_str()} {}
/**
* @brief Construct a new domain with the specified `name`.
*
* This constructor is private as it is intended that `domain` objects only
* be created through the `domain::get` function.
*
* @param name A unique name identifying the domain
*/
explicit domain(std::wstring const& name) noexcept : domain{name.c_str()} {}
/**
* @brief Default constructor creates a `domain` representing the
* "global" NVTX domain.
*
* All events not associated with a custom `domain` are grouped in the
* "global" NVTX domain.
*
*/
domain() noexcept {}
/**
* @brief Intentionally avoid calling nvtxDomainDestroy on the `domain` object.
*
* No currently-available tools attempt to free domain resources when the
* nvtxDomainDestroy function is called, due to the thread-safety and
* efficiency challenges of freeing thread-local storage for other threads.
* Since libraries may be disallowed from introducing static destructors,
* and destroying the domain is likely to have no effect, the destructor
* for `domain` intentionally chooses to not destroy the domain.
*
* In a situation where domain destruction is necessary, either manually
* call nvtxDomainDestroy on the domain's handle, or make a class that
* derives from `domain` and calls nvtxDomainDestroy in its destructor.
*/
~domain() = default;
private:
nvtxDomainHandle_t const _domain{}; ///< The `domain`s NVTX handle
};
/**
* @brief Returns reference to the `domain` object that represents the global
* NVTX domain.
*
* This specialization for `domain::global` returns a default constructed,
* `domain` object for use when the "global" domain is desired.
*
* All NVTX events in the global domain across all libraries and applications
* will be grouped together.
*
* @return Reference to the `domain` corresponding to the global NVTX domain.
*
*/
template <>
inline domain const& domain::get<domain::global>() noexcept
{
static domain const d{};
return d;
}
/**
* @brief Indicates the values of the red, green, and blue color channels for
* an RGB color to use as an event attribute (assumes no transparency).
*
*/
struct rgb {
/// Type used for component values
using component_type = uint8_t;
/**
* @brief Construct a rgb with red, green, and blue channels
* specified by `red_`, `green_`, and `blue_`, respectively.
*
* Valid values are in the range `[0,255]`.
*
* @param red_ Value of the red channel
* @param green_ Value of the green channel
* @param blue_ Value of the blue channel
*/
constexpr rgb(
component_type red_,
component_type green_,
component_type blue_) noexcept
: red{red_}, green{green_}, blue{blue_}
{
}
component_type red{}; ///< Red channel value
component_type green{}; ///< Green channel value
component_type blue{}; ///< Blue channel value
};
/**
* @brief Indicates the value of the alpha, red, green, and blue color
* channels for an ARGB color to use as an event attribute.
*
*/
struct argb final : rgb {
/**
* @brief Construct an argb with alpha, red, green, and blue channels
* specified by `alpha_`, `red_`, `green_`, and `blue_`, respectively.
*
* Valid values are in the range `[0,255]`.
*
* @param alpha_ Value of the alpha channel (opacity)
* @param red_ Value of the red channel
* @param green_ Value of the green channel
* @param blue_ Value of the blue channel
*
*/
constexpr argb(
component_type alpha_,
component_type red_,
component_type green_,
component_type blue_) noexcept
: rgb{red_, green_, blue_}, alpha{alpha_}
{
}
component_type alpha{}; ///< Alpha channel value
};
/**
* @brief Represents a custom color that can be associated with an NVTX event
* via it's `event_attributes`.
*
* Specifying colors for NVTX events is a convenient way to visually
* differentiate among different events in a visualization tool such as Nsight
* Systems.
*
*/
class color {
public:
/// Type used for the color's value
using value_type = uint32_t;
/**
* @brief Constructs a `color` using the value provided by `hex_code`.
*
* `hex_code` is expected to be a 4 byte argb hex code.
*
* The most significant byte indicates the value of the alpha channel
* (opacity) (0-255)
*
* The next byte indicates the value of the red channel (0-255)
*
* The next byte indicates the value of the green channel (0-255)
*
* The least significant byte indicates the value of the blue channel
* (0-255)
*
* @param hex_code The hex code used to construct the `color`
*/
constexpr explicit color(value_type hex_code) noexcept : _value{hex_code} {}
/**
* @brief Construct a `color` using the alpha, red, green, blue components
* in `argb`.
*
* @param argb The alpha, red, green, blue components of the desired `color`
*/
constexpr color(argb argb_) noexcept
: color{from_bytes_msb_to_lsb(argb_.alpha, argb_.red, argb_.green, argb_.blue)}
{
}
/**
* @brief Construct a `color` using the red, green, blue components in
* `rgb`.
*
* Uses maximum value for the alpha channel (opacity) of the `color`.
*
* @param rgb The red, green, blue components of the desired `color`
*/
constexpr color(rgb rgb_) noexcept
: color{from_bytes_msb_to_lsb(0xFF, rgb_.red, rgb_.green, rgb_.blue)}
{
}
/**
* @brief Returns the `color`s argb hex code
*
*/
constexpr value_type get_value() const noexcept { return _value; }
/**
* @brief Return the NVTX color type of the color.
*
*/
constexpr nvtxColorType_t get_type() const noexcept { return _type; }
color() = delete;
~color() = default;
color(color const&) = default;
color& operator=(color const&) = default;
color(color&&) = default;
color& operator=(color&&) = default;
private:
/**
* @brief Constructs an unsigned, 4B integer from the component bytes in
* most to least significant byte order.
*
*/
constexpr static value_type from_bytes_msb_to_lsb(
uint8_t byte3,
uint8_t byte2,
uint8_t byte1,
uint8_t byte0) noexcept
{
return uint32_t{byte3} << 24 | uint32_t{byte2} << 16 | uint32_t{byte1} << 8 | uint32_t{byte0};
}
value_type _value{}; ///< color's argb color code
nvtxColorType_t _type{NVTX_COLOR_ARGB}; ///< NVTX color type code
};
/**
* @brief Object for intra-domain grouping of NVTX events.
*
* A `category` is simply an integer id that allows for fine-grain grouping of
* NVTX events. For example, one might use separate categories for IO, memory
* allocation, compute, etc.
*
* Example:
* \code{.cpp}
* nvtx3::category cat1{1};
*
* // Range `r1` belongs to the category identified by the value `1`.
* nvtx3::scoped_range r1{cat1};
*
* // Range `r2` belongs to the same category as `r1`
* nvtx3::scoped_range r2{nvtx3::category{1}};
* \endcode
*
* To associate a name string with a category id, see `named_category`.
*
*/
class category {
public:
/// Type used for `category`s integer id.
using id_type = uint32_t;
/**
* @brief Construct a `category` with the specified `id`.
*
* The `category` will be unnamed and identified only by its `id` value.
*
* All `category`s in a domain sharing the same `id` are equivalent.
*
* @param[in] id The `category`'s identifying value
*/
constexpr explicit category(id_type id) noexcept : id_{id} {}
/**
* @brief Returns the id of the category.
*
*/
constexpr id_type get_id() const noexcept { return id_; }
category() = delete;
~category() = default;
category(category const&) = default;
category& operator=(category const&) = default;
category(category&&) = default;
category& operator=(category&&) = default;
private:
id_type id_{}; ///< category's unique identifier
};
/**
* @brief A `category` with an associated name string.
*
* Associates a `name` string with a category `id` to help differentiate among
* categories.
*
* For any given category id `Id`, a `named_category(Id, "name")` should only
* be constructed once and reused throughout an application. This can be done
* by either explicitly creating static `named_category` objects, or using the
* `named_category::get` construct on first use helper (recommended).
*
* Creating two or more `named_category` objects with the same value for `id`
* in the same domain results in undefined behavior.
*
* Similarly, behavior is undefined when a `named_category` and `category`
* share the same value of `id`.
*
* Example:
* \code{.cpp}
* // Explicitly constructed, static `named_category` in global domain:
* static nvtx3::named_category static_category{42, "my category"};
*
* // Range `r` associated with category id `42`
* nvtx3::scoped_range r{static_category};
*
* // OR use construct on first use:
*
* // Define a type with `name` and `id` members
* struct my_category {
* static constexpr char const* name{"my category"}; // category name
* static constexpr uint32_t id{42}; // category id
* };
*
* // Use construct on first use to name the category id `42`
* // with name "my category"
* auto& cat = named_category_in<my_domain>::get<my_category>();
*
* // Range `r` associated with category id `42`
* nvtx3::scoped_range r{cat};
* \endcode
*
* `named_category_in<D>`'s association of a name to a category id is local to
* the domain specified by the type `D`. An id may have a different name in
* another domain.
*
* @tparam D Type containing `name` member used to identify the `domain` to
* which the `named_category_in` belongs. Else, `domain::global` to indicate
* that the global NVTX domain should be used.
*/
template <typename D = domain::global>
class named_category_in final : public category {
public:
#if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
/**
* @brief Returns a global instance of a `named_category_in` as a
* function-local static.
*
* Creates a `named_category_in<D>` with name and id specified by the contents
* of a type `C`. `C::name` determines the name and `C::id` determines the
* category id.
*
* This function is useful for constructing a named `category` exactly once
* and reusing the same instance throughout an application.
*
* Example:
* \code{.cpp}
* // Define a type with `name` and `id` members
* struct my_category {
* static constexpr char const* name{"my category"}; // category name
* static constexpr uint32_t id{42}; // category id
* };
*
* // Use construct on first use to name the category id `42`
* // with name "my category"
* auto& cat = named_category_in<my_domain>::get<my_category>();
*
* // Range `r` associated with category id `42`
* nvtx3::scoped_range r{cat};
* \endcode
*
* Uses the "construct on first use" idiom to safely ensure the `category`
* object is initialized exactly once. See
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
*
* @tparam C Type containing a member `C::name` that resolves to either a
* `char const*` or `wchar_t const*` and `C::id`.
*/
template <typename C,
typename std::enable_if<
detail::is_c_string<decltype(C::name)>::value &&
detail::is_uint32<decltype(C::id)>::value
, int>::type = 0>
static named_category_in const& get() noexcept
{
static named_category_in const cat(C::id, C::name);
return cat;
}
/**
* @brief Overload of `named_category_in::get` to provide a clear compile error
* when `C` has the required `name` and `id` members, but they are not the
* required types. `name` must be directly convertible to `char const*` or
* `wchar_t const*`, and `id` must be `uint32_t`.
*/
template <typename C,
typename std::enable_if<
!detail::is_c_string<decltype(C::name)>::value ||
!detail::is_uint32<decltype(C::id)>::value
, int>::type = 0>
static named_category_in const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::is_c_string<decltype(C::name)>::value,
"Type used to name an NVTX category must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is not "
"convertible to either of those types");
NVTX3_STATIC_ASSERT(detail::is_uint32<decltype(C::id)>::value,
"Type used to name an NVTX category must contain a static constexpr member "
"called 'id' of type uint32_t -- 'id' member is the wrong type");
static named_category_in const unused;
return unused; // Function must compile for static_assert to be triggered
}
/**
* @brief Overload of `named_category_in::get` to provide a clear compile error
* when `C` does not have the required `name` and `id` members.
*/
template <typename C,
typename std::enable_if<
!detail::has_name<C>::value ||
!detail::has_id<C>::value
, int>::type = 0>
static named_category_in const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::has_name<C>::value,
"Type used to name an NVTX category must contain a static constexpr member "
"called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
NVTX3_STATIC_ASSERT(detail::has_id<C>::value,
"Type used to name an NVTX category must contain a static constexpr member "
"called 'id' of type uint32_t -- 'id' member is missing");
static named_category_in const unused;
return unused; // Function must compile for static_assert to be triggered
}
#else
template <typename C>
static named_category_in const& get() noexcept
{
static named_category_in const cat(C::id, C::name);
return cat;
}
#endif
private:
// Default constructor is only used internally for static_assert(false) cases.
named_category_in() noexcept : category{0} {}
public:
/**
* @brief Construct a `named_category_in` with the specified `id` and `name`.
*
* The name `name` will be registered with `id`.
*
* Every unique value of `id` should only be named once.
*
* @param[in] id The category id to name
* @param[in] name The name to associated with `id`
*/
named_category_in(id_type id, char const* name) noexcept : category{id}
{
#ifndef NVTX_DISABLE
nvtxDomainNameCategoryA(domain::get<D>(), get_id(), name);
#else
(void)id;
(void)name;
#endif
};
/**
* @brief Construct a `named_category_in` with the specified `id` and `name`.
*
* The name `name` will be registered with `id`.
*
* Every unique value of `id` should only be named once.
*
* @param[in] id The category id to name
* @param[in] name The name to associated with `id`
*/
named_category_in(id_type id, wchar_t const* name) noexcept : category{id}
{
#ifndef NVTX_DISABLE
nvtxDomainNameCategoryW(domain::get<D>(), get_id(), name);
#else
(void)id;
(void)name;
#endif
};
};
/**
* @brief Alias for a `named_category_in` in the global NVTX domain.
*
*/
using named_category = named_category_in<domain::global>;
/**
* @brief A message registered with NVTX.
*
* Normally, associating a `message` with an NVTX event requires copying the
* contents of the message string. This may cause non-trivial overhead in
* highly performance sensitive regions of code.
*
* message registration is an optimization to lower the overhead of
* associating a message with an NVTX event. Registering a message yields a
* handle that is inexpensive to copy that may be used in place of a message
* string.
*
* A particular message should only be registered once and the handle
* reused throughout the rest of the application. This can be done by either
* explicitly creating static `registered_string_in` objects, or using the
* `registered_string_in::get` construct on first use helper (recommended).
*
* Example:
* \code{.cpp}
* // Explicitly constructed, static `registered_string` in my_domain:
* static registered_string_in<my_domain> static_message{"message"};
*
* // "message" is associated with the range `r`
* nvtx3::scoped_range r{static_message};
*
* // Or use construct on first use:
*
* // Define a type with a `message` member that defines the contents of the
* // registered string
* struct my_message{ static constexpr char const* message{ "my message" }; };
*
* // Uses construct on first use to register the contents of
* // `my_message::message`
* auto& msg = registered_string_in<my_domain>::get<my_message>();
*
* // "my message" is associated with the range `r`
* nvtx3::scoped_range r{msg};
* \endcode
*
* `registered_string_in`s are local to a particular domain specified via
* the type `D`.
*
* @tparam D Type containing `name` member used to identify the `domain` to
* which the `registered_string_in` belongs. Else, `domain::global` to indicate
* that the global NVTX domain should be used.
*/
template <typename D = domain::global>
class registered_string_in {
public:
#if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
/**
* @brief Returns a global instance of a `registered_string_in` as a function
* local static.
*
* Provides a convenient way to register a message with NVTX without having
* to explicitly register the message.
*
* Upon first invocation, constructs a `registered_string_in` whose contents
* are specified by `message::message`.
*
* All future invocations will return a reference to the object constructed
* in the first invocation.
*
* Example:
* \code{.cpp}
* // Define a type with a `message` member that defines the contents of the
* // registered string
* struct my_message{ static constexpr char const* message{ "my message" };
* };
*
* // Uses construct on first use to register the contents of
* // `my_message::message`
* auto& msg = registered_string_in<my_domain>::get<my_message>();
*
* // "my message" is associated with the range `r`
* nvtx3::scoped_range r{msg};
* \endcode
*
* @tparam M Type required to contain a member `M::message` that
* resolves to either a `char const*` or `wchar_t const*` used as the
* registered string's contents.
* @return Reference to a `registered_string_in` associated with the type `M`.
*/
template <typename M,
typename std::enable_if<
detail::is_c_string<decltype(M::message)>::value
, int>::type = 0>
static registered_string_in const& get() noexcept
{
static registered_string_in const regstr(M::message);
return regstr;
}
/**
* @brief Overload of `registered_string_in::get` to provide a clear compile error
* when `M` has a `message` member that is not directly convertible to either
* `char const*` or `wchar_t const*`.
*/
template <typename M,
typename std::enable_if<
!detail::is_c_string<decltype(M::message)>::value
, int>::type = 0>
static registered_string_in const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
"Type used to register an NVTX string must contain a static constexpr member "
"called 'message' of type const char* or const wchar_t* -- 'message' member is "
"not convertible to either of those types");
static registered_string_in const unused;
return unused; // Function must compile for static_assert to be triggered
}
/**
* @brief Overload of `registered_string_in::get` to provide a clear compile error when
* `M` does not have a `message` member.
*/
template <typename M,
typename std::enable_if<
!detail::has_message<M>::value
, int>::type = 0>
static registered_string_in const& get() noexcept
{
NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
"Type used to register an NVTX string must contain a static constexpr member "
"called 'message' of type const char* or const wchar_t* -- 'message' member "
"is missing");
static registered_string_in const unused;
return unused; // Function must compile for static_assert to be triggered
}
#else
template <typename M>
static registered_string_in const& get() noexcept
{
static registered_string_in const regstr(M::message);
return regstr;
}
#endif
/**
* @brief Constructs a `registered_string_in` from the specified `msg` string.
*
* Registers `msg` with NVTX and associates a handle with the registered
* message.
*
* A particular message should should only be registered once and the handle
* reused throughout the rest of the application.
*
* @param msg The contents of the message
*/
explicit registered_string_in(char const* msg) noexcept
: handle_{nvtxDomainRegisterStringA(domain::get<D>(), msg)}
{
}
/**
* @brief Constructs a `registered_string_in` from the specified `msg` string.
*
* Registers `msg` with NVTX and associates a handle with the registered
* message.
*
* A particular message should should only be registered once and the handle
* reused throughout the rest of the application.
*
* @param msg The contents of the message
*/
explicit registered_string_in(std::string const& msg) noexcept
: registered_string_in{msg.c_str()} {}
/**
* @brief Constructs a `registered_string_in` from the specified `msg` string.
*
* Registers `msg` with NVTX and associates a handle with the registered
* message.
*
* A particular message should should only be registered once and the handle
* reused throughout the rest of the application.
*
* @param msg The contents of the message
*/
explicit registered_string_in(wchar_t const* msg) noexcept
: handle_{nvtxDomainRegisterStringW(domain::get<D>(), msg)}
{
}
/**
* @brief Constructs a `registered_string_in` from the specified `msg` string.
*
* Registers `msg` with NVTX and associates a handle with the registered
* message.
*
* A particular message should only be registered once and the handle
* reused throughout the rest of the application.
*
* @param msg The contents of the message
*/
explicit registered_string_in(std::wstring const& msg) noexcept
: registered_string_in{msg.c_str()} {}
/**
* @brief Returns the registered string's handle
*
*/
nvtxStringHandle_t get_handle() const noexcept { return handle_; }
private:
// Default constructor is only used internally for static_assert(false) cases.
registered_string_in() noexcept {};
public:
~registered_string_in() = default;
registered_string_in(registered_string_in const&) = default;
registered_string_in& operator=(registered_string_in const&) = default;
registered_string_in(registered_string_in&&) = default;
registered_string_in& operator=(registered_string_in&&) = default;
private:
nvtxStringHandle_t handle_{}; ///< The handle returned from
///< registering the message with NVTX
};
/**
* @brief Alias for a `registered_string_in` in the global NVTX domain.
*
*/
using registered_string = registered_string_in<domain::global>;
/**
* @brief Allows associating a message string with an NVTX event via
* its `EventAttribute`s.
*
* Associating a `message` with an NVTX event through its `event_attributes`
* allows for naming events to easily differentiate them from other events.
*
* Every time an NVTX event is created with an associated `message`, the
* contents of the message string must be copied. This may cause non-trivial
* overhead in highly performance sensitive sections of code. Use of a
* `nvtx3::registered_string` is recommended in these situations.
*
* Example:
* \code{.cpp}
* // Creates an `event_attributes` with message "message 0"
* nvtx3::event_attributes attr0{nvtx3::message{"message 0"}};
*
* // `range0` contains message "message 0"
* nvtx3::scoped_range range0{attr0};
*
* // `std::string` and string literals are implicitly assumed to be
* // the contents of an `nvtx3::message`
* // Creates an `event_attributes` with message "message 1"
* nvtx3::event_attributes attr1{"message 1"};
*
* // `range1` contains message "message 1"
* nvtx3::scoped_range range1{attr1};
*
* // `range2` contains message "message 2"
* nvtx3::scoped_range range2{nvtx3::Mesage{"message 2"}};
*
* // `std::string` and string literals are implicitly assumed to be
* // the contents of an `nvtx3::message`
* // `range3` contains message "message 3"
* nvtx3::scoped_range range3{"message 3"};
* \endcode
*/
class message {
public:
using value_type = nvtxMessageValue_t;
/**
* @brief Construct a `message` whose contents are specified by `msg`.
*
* @param msg The contents of the message
*/
NVTX3_CONSTEXPR_IF_CPP14 message(char const* msg) noexcept : type_{NVTX_MESSAGE_TYPE_ASCII}
{
value_.ascii = msg;
}
/**
* @brief Construct a `message` whose contents are specified by `msg`.
*
* @param msg The contents of the message
*/
message(std::string const& msg) noexcept : message{msg.c_str()} {}
/**
* @brief Disallow construction for `std::string` r-value
*
* `message` is a non-owning type and therefore cannot take ownership of an
* r-value. Therefore, constructing from an r-value is disallowed to prevent
* a dangling pointer.
*
*/
message(std::string&&) = delete;
/**
* @brief Construct a `message` whose contents are specified by `msg`.
*
* @param msg The contents of the message
*/
NVTX3_CONSTEXPR_IF_CPP14 message(wchar_t const* msg) noexcept : type_{NVTX_MESSAGE_TYPE_UNICODE}
{
value_.unicode = msg;
}
/**
* @brief Construct a `message` whose contents are specified by `msg`.
*
* @param msg The contents of the message
*/
message(std::wstring const& msg) noexcept : message{msg.c_str()} {}
/**
* @brief Disallow construction for `std::wstring` r-value
*
* `message` is a non-owning type and therefore cannot take ownership of an
* r-value. Therefore, constructing from an r-value is disallowed to prevent
* a dangling pointer.
*
*/
message(std::wstring&&) = delete;
/**
* @brief Construct a `message` from a `registered_string_in`.
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the `registered_string_in` belongs. Else, `domain::global` to
* indicate that the global NVTX domain should be used.
* @param msg The message that has already been registered with NVTX.
*/
template <typename D>
NVTX3_CONSTEXPR_IF_CPP14 message(registered_string_in<D> const& msg) noexcept
: type_{NVTX_MESSAGE_TYPE_REGISTERED}
{
value_.registered = msg.get_handle();
}
/**
* @brief Construct a `message` from NVTX C API type and value.
*
* @param type nvtxMessageType_t enum value indicating type of the payload
* @param value nvtxMessageValue_t union containing message
*/
constexpr message(
nvtxMessageType_t const& type,
nvtxMessageValue_t const& value) noexcept
: type_{type}, value_(value)
{
}
/**
* @brief Construct a `message` from NVTX C API registered string handle.
*
* @param handle nvtxStringHandle_t value of registered string handle
*/
NVTX3_CONSTEXPR_IF_CPP14 message(nvtxStringHandle_t handle) noexcept
: type_{NVTX_MESSAGE_TYPE_REGISTERED}
{
value_.registered = handle;
}
/**
* @brief Return the union holding the value of the message.
*
*/
constexpr value_type get_value() const noexcept { return value_; }
/**
* @brief Return the type information about the value the union holds.
*
*/
constexpr nvtxMessageType_t get_type() const noexcept { return type_; }
private:
nvtxMessageType_t type_{}; ///< message type
nvtxMessageValue_t value_{}; ///< message contents
};
/**
* @brief A numerical value that can be associated with an NVTX event via
* its `event_attributes`.
*
* Example:
* \code{.cpp}
* // Constructs a payload from the int32_t value 42
* nvtx3:: event_attributes attr{nvtx3::payload{42}};
*
* // `range0` will have an int32_t payload of 42
* nvtx3::scoped_range range0{attr};
*
* // range1 has double payload of 3.14
* nvtx3::scoped_range range1{nvtx3::payload{3.14}};
* \endcode
*/
class payload {
public:
using value_type = typename nvtxEventAttributes_v2::payload_t;
/**
* @brief Construct a `payload` from a signed, 8 byte integer.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int64_t value) noexcept
: type_{NVTX_PAYLOAD_TYPE_INT64}, value_{}
{
value_.llValue = value;
}
/**
* @brief Construct a `payload` from a signed, 4 byte integer.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int32_t value) noexcept
: type_{NVTX_PAYLOAD_TYPE_INT32}, value_{}
{
value_.iValue = value;
}
/**
* @brief Construct a `payload` from an unsigned, 8 byte integer.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint64_t value) noexcept
: type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT64}, value_{}
{
value_.ullValue = value;
}
/**
* @brief Construct a `payload` from an unsigned, 4 byte integer.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint32_t value) noexcept
: type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT32}, value_{}
{
value_.uiValue = value;
}
/**
* @brief Construct a `payload` from a single-precision floating point
* value.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(float value) noexcept
: type_{NVTX_PAYLOAD_TYPE_FLOAT}, value_{}
{
value_.fValue = value;
}
/**
* @brief Construct a `payload` from a double-precision floating point
* value.
*
* @param value Value to use as contents of the payload
*/
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(double value) noexcept
: type_{NVTX_PAYLOAD_TYPE_DOUBLE}, value_{}
{
value_.dValue = value;
}
/**
* @brief Construct a `payload` from NVTX C API type and value.
*
* @param type nvtxPayloadType_t enum value indicating type of the payload
* @param value nvtxEventAttributes_t::payload_t union containing payload
*/
constexpr payload(
nvtxPayloadType_t const& type,
value_type const& value) noexcept
: type_{type}, value_(value)
{
}
/**
* @brief Return the union holding the value of the payload
*
*/
constexpr value_type get_value() const noexcept { return value_; }
/**
* @brief Return the information about the type the union holds.
*
*/
constexpr nvtxPayloadType_t get_type() const noexcept { return type_; }
private:
nvtxPayloadType_t type_; ///< Type of the payload value
value_type value_; ///< Union holding the payload value
};
/**
* @brief Describes the attributes of a NVTX event.
*
* NVTX events can be customized via four "attributes":
*
* - color: color used to visualize the event in tools such as Nsight
* Systems. See `color`.
* - message: Custom message string. See `message`.
* - payload: User-defined numerical value. See `payload`.
* - category: Intra-domain grouping. See `category`.
*
* These component attributes are specified via an `event_attributes` object.
* See `nvtx3::color`, `nvtx3::message`, `nvtx3::payload`, and
* `nvtx3::category` for how these individual attributes are constructed.
*
* While it is possible to specify all four attributes, it is common to want
* to only specify a subset of attributes and use default values for the
* others. For convenience, `event_attributes` can be constructed from any
* number of attribute components in any order.
*
* Example:
* \code{.cpp}
* // Set message, same as using nvtx3::message{"message"}
* event_attributes attr{"message"};
*
* // Set message and color
* event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
*
* // Set message, color, payload, category
* event_attributes attr{"message",
* nvtx3::rgb{127, 255, 0},
* nvtx3::payload{42},
* nvtx3::category{1}};
*
* // Same as above -- can use any order of arguments
* event_attributes attr{nvtx3::payload{42},
* nvtx3::category{1},
* "message",
* nvtx3::rgb{127, 255, 0}};
*
* // Multiple arguments of the same type are allowed, but only the first is
* // used -- in this example, payload is set to 42:
* event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
*
* // Range `r` will be customized according the attributes in `attr`
* nvtx3::scoped_range r{attr};
*
* // For convenience, `event_attributes` constructor arguments may be passed
* // to the `scoped_range_in` contructor -- they are forwarded to the
* // `event_attributes` constructor
* nvtx3::scoped_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"};
*
* // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
* using namespace nvtx3;
* scoped_range r{payload{42}, category{1}, "message"};
* \endcode
*
*/
class event_attributes {
public:
using value_type = nvtxEventAttributes_t;
/**
* @brief Default constructor creates an `event_attributes` with no
* category, color, payload, nor message.
*/
constexpr event_attributes() noexcept
: attributes_{
NVTX_VERSION, // version
sizeof(nvtxEventAttributes_t), // size
0, // category
NVTX_COLOR_UNKNOWN, // color type
0, // color value
NVTX_PAYLOAD_UNKNOWN, // payload type
0, // reserved 4B
0, // payload value (union)
NVTX_MESSAGE_UNKNOWN, // message type
0 // message value (union)
}
{
}
/**
* @brief Variadic constructor where the first argument is a `category`.
*
* Sets the value of the `EventAttribute`s category based on `c` and
* forwards the remaining variadic parameter pack to the next constructor.
*
*/
template <typename... Args>
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(category const& c, Args const&... args) noexcept
: event_attributes(args...)
{
attributes_.category = c.get_id();
}
/**
* @brief Variadic constructor where the first argument is a `color`.
*
* Sets the value of the `EventAttribute`s color based on `c` and forwards
* the remaining variadic parameter pack to the next constructor.
*
*/
template <typename... Args>
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(color const& c, Args const&... args) noexcept
: event_attributes(args...)
{
attributes_.color = c.get_value();
attributes_.colorType = c.get_type();
}
/**
* @brief Variadic constructor where the first argument is a `payload`.
*
* Sets the value of the `EventAttribute`s payload based on `p` and forwards
* the remaining variadic parameter pack to the next constructor.
*
*/
template <typename... Args>
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(payload const& p, Args const&... args) noexcept
: event_attributes(args...)
{
attributes_.payload = p.get_value();
attributes_.payloadType = p.get_type();
}
/**
* @brief Variadic constructor where the first argument is a `message`.
*
* Sets the value of the `EventAttribute`s message based on `m` and forwards
* the remaining variadic parameter pack to the next constructor.
*
*/
template <typename... Args>
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(message const& m, Args const&... args) noexcept
: event_attributes(args...)
{
attributes_.message = m.get_value();
attributes_.messageType = m.get_type();
}
/**
* @brief Variadic constructor where the first argument is a binary payload.
*
* Sets the value of the `EventAttribute`s message based on `m` and forwards
* the remaining variadic parameter pack to the next constructor.
*
*/
template <typename... Args>
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(nvtxPayloadData_t const* bpl, Args const&... args) noexcept
: event_attributes(args...)
{
attributes_.payloadType = NVTX_PAYLOAD_TYPE_BINARY;
attributes_.reserved0 = 1; // NCCL uses only a single binary payload per event.
attributes_.payload.ullValue = NVTX_POINTER_AS_PAYLOAD_ULLVALUE(bpl);
}
~event_attributes() = default;
event_attributes(event_attributes const&) = default;
event_attributes& operator=(event_attributes const&) = default;
event_attributes(event_attributes&&) = default;
event_attributes& operator=(event_attributes&&) = default;
/**
* @brief Get raw pointer to underlying NVTX attributes object.
*
*/
constexpr value_type const* get() const noexcept { return &attributes_; }
private:
value_type attributes_{}; ///< The NVTX attributes structure
};
/**
* @brief A RAII object for creating a NVTX range local to a thread within a
* domain.
*
* When constructed, begins a nested NVTX range on the calling thread in the
* specified domain. Upon destruction, ends the NVTX range.
*
* Behavior is undefined if a `scoped_range_in` object is
* created/destroyed on different threads.
*
* `scoped_range_in` is neither moveable nor copyable.
*
* `scoped_range_in`s may be nested within other ranges.
*
* The domain of the range is specified by the template type parameter `D`.
* By default, the `domain::global` is used, which scopes the range to the
* global NVTX domain. The convenience alias `scoped_range` is provided for
* ranges scoped to the global domain.
*
* A custom domain can be defined by creating a type, `D`, with a static
* member `D::name` whose value is used to name the domain associated with
* `D`. `D::name` must resolve to either `char const*` or `wchar_t const*`
*
* Example:
* \code{.cpp}
* // Define a type `my_domain` with a member `name` used to name the domain
* // associated with the type `my_domain`.
* struct my_domain{
* static constexpr char const* name{"my domain"};
* };
* \endcode
*
* Usage:
* \code{.cpp}
* nvtx3::scoped_range_in<my_domain> r1{"range 1"}; // Range in my domain
*
* // Three equivalent ways to make a range in the global domain:
* nvtx3::scoped_range_in<nvtx3::domain::global> r2{"range 2"};
* nvtx3::scoped_range_in<> r3{"range 3"};
* nvtx3::scoped_range r4{"range 4"};
*
* // Create an alias to succinctly make ranges in my domain:
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
*
* my_scoped_range r3{"range 3"};
* \endcode
*/
template <class D = domain::global>
class scoped_range_in {
public:
/**
* @brief Construct a `scoped_range_in` with the specified
* `event_attributes`
*
* Example:
* \code{cpp}
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
* nvtx3::scoped_range range{attr}; // Creates a range with message contents
* // "msg" and green color
* \endcode
*
* @param[in] attr `event_attributes` that describes the desired attributes
* of the range.
*/
explicit scoped_range_in(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
nvtxDomainRangePushEx(domain::get<D>(), attr.get());
#else
(void)attr;
#endif
}
/**
* @brief Constructs a `scoped_range_in` from the constructor arguments
* of an `event_attributes`.
*
* Forwards the arguments `args...` to construct an
* `event_attributes` object. The `event_attributes` object is then
* associated with the `scoped_range_in`.
*
* For more detail, see `event_attributes` documentation.
*
* Example:
* \code{cpp}
* // Creates a range with message "message" and green color
* nvtx3::scoped_range r{"message", nvtx3::rgb{127,255,0}};
* \endcode
*
* @param[in] args Arguments to used to construct an `event_attributes` associated with this
* range.
*
*/
template <typename... Args>
explicit scoped_range_in(Args const&... args) noexcept
: scoped_range_in{event_attributes{args...}}
{
}
/**
* @brief Default constructor creates a `scoped_range_in` with no
* message, color, payload, nor category.
*
*/
scoped_range_in() noexcept : scoped_range_in{event_attributes{}} {}
/**
* @brief Delete `operator new` to disallow heap allocated objects.
*
* `scoped_range_in` must follow RAII semantics to guarantee proper push/pop semantics.
*
*/
void* operator new(std::size_t) = delete;
scoped_range_in(scoped_range_in const&) = delete;
scoped_range_in& operator=(scoped_range_in const&) = delete;
scoped_range_in(scoped_range_in&&) = delete;
scoped_range_in& operator=(scoped_range_in&&) = delete;
/**
* @brief Destroy the scoped_range_in, ending the NVTX range event.
*/
~scoped_range_in() noexcept
{
#ifndef NVTX_DISABLE
nvtxDomainRangePop(domain::get<D>());
#endif
}
};
/**
* @brief Alias for a `scoped_range_in` in the global NVTX domain.
*
*/
using scoped_range = scoped_range_in<domain::global>;
namespace detail {
/// @cond internal
template <typename D = domain::global>
class optional_scoped_range_in
{
public:
optional_scoped_range_in() = default;
void begin(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
// This class is not meant to be part of the public NVTX C++ API and should
// only be used in the `NVTX3_FUNC_RANGE_IF` and `NVTX3_FUNC_RANGE_IF_IN`
// macros. However, to prevent developers from misusing this class, make
// sure to not start multiple ranges.
if (initialized) { return; }
nvtxDomainRangePushEx(domain::get<D>(), attr.get());
initialized = true;
#endif
}
~optional_scoped_range_in() noexcept
{
#ifndef NVTX_DISABLE
if (initialized) { nvtxDomainRangePop(domain::get<D>()); }
#endif
}
void* operator new(std::size_t) = delete;
optional_scoped_range_in(optional_scoped_range_in const&) = delete;
optional_scoped_range_in& operator=(optional_scoped_range_in const&) = delete;
optional_scoped_range_in(optional_scoped_range_in&&) = delete;
optional_scoped_range_in& operator=(optional_scoped_range_in&&) = delete;
private:
#ifndef NVTX_DISABLE
bool initialized = false;
#endif
};
/// @endcond
} // namespace detail
/**
* @brief Handle used for correlating explicit range start and end events.
*
* A handle is "null" if it does not correspond to any range.
*
*/
struct range_handle {
/// Type used for the handle's value
using value_type = nvtxRangeId_t;
/**
* @brief Construct a `range_handle` from the given id.
*
*/
constexpr explicit range_handle(value_type id) noexcept : _range_id{id} {}
/**
* @brief Constructs a null range handle.
*
* A null range_handle corresponds to no range. Calling `end_range` on a
* null handle is undefined behavior when a tool is active.
*
*/
constexpr range_handle() noexcept = default;
/**
* @brief Checks whether this handle is null
*
* Provides contextual conversion to `bool`.
*
* \code{cpp}
* range_handle handle{};
* if (handle) {...}
* \endcode
*
*/
constexpr explicit operator bool() const noexcept { return get_value() != null_range_id; };
/**
* @brief Implicit conversion from `nullptr` constructs a null handle.
*
* Satisfies the "NullablePointer" requirement to make `range_handle` comparable with `nullptr`.
*
*/
constexpr range_handle(std::nullptr_t) noexcept {}
/**
* @brief Returns the `range_handle`'s value
*
* @return value_type The handle's value
*/
constexpr value_type get_value() const noexcept { return _range_id; }
private:
/// Sentinel value for a null handle that corresponds to no range
static constexpr value_type null_range_id = nvtxRangeId_t{0};
value_type _range_id{null_range_id}; ///< The underlying NVTX range id
};
/**
* @brief Compares two range_handles for equality
*
* @param lhs The first range_handle to compare
* @param rhs The second range_handle to compare
*/
inline constexpr bool operator==(range_handle lhs, range_handle rhs) noexcept
{
return lhs.get_value() == rhs.get_value();
}
/**
* @brief Compares two range_handles for inequality
*
* @param lhs The first range_handle to compare
* @param rhs The second range_handle to compare
*/
inline constexpr bool operator!=(range_handle lhs, range_handle rhs) noexcept { return !(lhs == rhs); }
/**
* @brief Manually begin an NVTX range.
*
* Explicitly begins an NVTX range and returns a unique handle. To end the
* range, pass the handle to `end_range_in<D>()`.
*
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
*
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
* preferred unless one is unable to tie the range to the lifetime of an object.
*
* Example:
* \code{.cpp}
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
* // Manually begin a range
* nvtx3::range_handle h = nvtx3::start_range_in<my_domain>(attr);
* ...
* nvtx3::end_range_in<my_domain>(h); // End the range
* \endcode
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the range belongs. Else, `domain::global` to indicate that the
* global NVTX domain should be used.
* @param[in] attr `event_attributes` that describes the desired attributes
* of the range.
* @return Unique handle to be passed to `end_range_in` to end the range.
*/
template <typename D = domain::global>
inline range_handle start_range_in(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
return range_handle{nvtxDomainRangeStartEx(domain::get<D>(), attr.get())};
#else
(void)attr;
return {};
#endif
}
/**
* @brief Manually begin an NVTX range.
*
* Explicitly begins an NVTX range and returns a unique handle. To end the
* range, pass the handle to `end_range_in<D>()`.
*
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
*
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
* preferred unless one is unable to tie the range to the lifetime of an object.
*
* This overload uses `args...` to construct an `event_attributes` to
* associate with the range. For more detail, see `event_attributes`.
*
* Example:
* \code{cpp}
* // Manually begin a range
* nvtx3::range_handle h = nvtx3::start_range_in<D>("msg", nvtx3::rgb{127,255,0});
* ...
* nvtx3::end_range_in<D>(h); // Ends the range
* \endcode
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the range belongs. Else, `domain::global` to indicate that the
* global NVTX domain should be used.
* @param args[in] Variadic parameter pack of the arguments for an `event_attributes`.
* @return Unique handle to be passed to `end_range` to end the range.
*/
template <typename D = domain::global, typename... Args>
inline range_handle start_range_in(Args const&... args) noexcept
{
#ifndef NVTX_DISABLE
return start_range_in<D>(event_attributes{args...});
#else
return {};
#endif
}
/**
* @brief Manually begin an NVTX range in the global domain.
*
* Explicitly begins an NVTX range and returns a unique handle. To end the
* range, pass the handle to `end_range()`.
*
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
*
* `start_range/end_range` are the most explicit and lowest level APIs
* provided for creating ranges. Use of `nvtx3::unique_range` should be
* preferred unless one is unable to tie the range to the lifetime of an object.
*
* Example:
* \code{.cpp}
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
* // Manually begin a range
* nvtx3::range_handle h = nvtx3::start_range(attr);
* ...
* nvtx3::end_range(h); // End the range
* \endcode
*
* @param[in] attr `event_attributes` that describes the desired attributes
* of the range.
* @return Unique handle to be passed to `end_range_in` to end the range.
*/
inline range_handle start_range(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
return start_range_in<domain::global>(attr);
#else
(void)attr;
return {};
#endif
}
/**
* @brief Manually begin an NVTX range in the global domain.
*
* Explicitly begins an NVTX range and returns a unique handle. To end the
* range, pass the handle to `end_range_in<D>()`.
*
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
*
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
* preferred unless one is unable to tie the range to the lifetime of an object.
*
* This overload uses `args...` to construct an `event_attributes` to
* associate with the range. For more detail, see `event_attributes`.
*
* Example:
* \code{cpp}
* // Manually begin a range
* nvtx3::range_handle h = nvtx3::start_range("msg", nvtx3::rgb{127,255,0});
* ...
* nvtx3::end_range(h); // Ends the range
* \endcode
*
* @param args[in] Variadic parameter pack of the arguments for an `event_attributes`.
* @return Unique handle to be passed to `end_range` to end the range.
*/
template <typename... Args>
inline range_handle start_range(Args const&... args) noexcept
{
#ifndef NVTX_DISABLE
return start_range_in<domain::global>(args...);
#else
return {};
#endif
}
/**
* @brief Manually end the range associated with the handle `r` in domain `D`.
*
* Explicitly ends the NVTX range indicated by the handle `r` returned from a
* prior call to `start_range_in<D>`. The range may end on a different thread
* from where it began.
*
* @tparam D Type containing `name` member used to identify the `domain` to
* which the range belongs. Else, `domain::global` to indicate that the global
* NVTX domain should be used.
* @param r Handle to a range started by a prior call to `start_range_in`.
*
* @warning The domain type specified as template parameter to this function
* must be the same that was specified on the associated `start_range_in` call.
*/
template <typename D = domain::global>
inline void end_range_in(range_handle r) noexcept
{
#ifndef NVTX_DISABLE
nvtxDomainRangeEnd(domain::get<D>(), r.get_value());
#else
(void)r;
#endif
}
/**
* @brief Manually end the range associated with the handle `r` in the global
* domain.
*
* Explicitly ends the NVTX range indicated by the handle `r` returned from a
* prior call to `start_range`. The range may end on a different thread from
* where it began.
*
* @param r Handle to a range started by a prior call to `start_range`.
*
* @warning The domain type specified as template parameter to this function
* must be the same that was specified on the associated `start_range` call.
*/
inline void end_range(range_handle r) noexcept
{
#ifndef NVTX_DISABLE
end_range_in<domain::global>(r);
#else
(void)r;
#endif
}
/**
* @brief A RAII object for creating a NVTX range within a domain that can
* be created and destroyed on different threads.
*
* When constructed, begins a NVTX range in the specified domain. Upon
* destruction, ends the NVTX range.
*
* Similar to `nvtx3::scoped_range_in`, with a few key differences:
* - `unique_range` objects can be destroyed in an order whereas `scoped_range` objects must be
* destroyed in exact reverse creation order
* - `unique_range` can start and end on different threads
* - `unique_range` is moveable
* - `unique_range` objects can be constructed as heap objects
*
* There is extra overhead associated with `unique_range` constructs and therefore use of
* `nvtx3::scoped_range_in` should be preferred.
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the `unique_range_in` belongs. Else, `domain::global` to
* indicate that the global NVTX domain should be used.
*/
template <typename D = domain::global>
class unique_range_in {
public:
/**
* @brief Construct a new unique_range_in object with the specified event attributes
*
* Example:
* \code{cpp}
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
* nvtx3::unique_range_in<my_domain> range{attr}; // Creates a range with message contents
* // "msg" and green color
* \endcode
*
* @param[in] attr `event_attributes` that describes the desired attributes
* of the range.
*/
explicit unique_range_in(event_attributes const& attr) noexcept
: handle_{start_range_in<D>(attr)}
{
}
/**
* @brief Constructs a `unique_range_in` from the constructor arguments
* of an `event_attributes`.
*
* Forwards the arguments `args...` to construct an
* `event_attributes` object. The `event_attributes` object is then
* associated with the `unique_range_in`.
*
* For more detail, see `event_attributes` documentation.
*
* Example:
* \code{.cpp}
* // Creates a range with message "message" and green color
* nvtx3::unique_range_in<> r{"message", nvtx3::rgb{127,255,0}};
* \endcode
*
* @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
* associated with this range.
*/
template <typename... Args>
explicit unique_range_in(Args const&... args) noexcept
: unique_range_in{event_attributes{args...}}
{
}
/**
* @brief Default constructor creates a `unique_range_in` with no
* message, color, payload, nor category.
*
*/
constexpr unique_range_in() noexcept : unique_range_in{event_attributes{}} {}
/**
* @brief Destroy the `unique_range_in` ending the range.
*
*/
~unique_range_in() noexcept = default;
/**
* @brief Move constructor allows taking ownership of the NVTX range from
* another `unique_range_in`.
*
* @param other The range to take ownership of
*/
unique_range_in(unique_range_in&& other) noexcept = default;
/**
* @brief Move assignment operator allows taking ownership of an NVTX range
* from another `unique_range_in`.
*
* @param other The range to take ownership of
*/
unique_range_in& operator=(unique_range_in&& other) noexcept = default;
/// Copy construction is not allowed to prevent multiple objects from owning
/// the same range handle
unique_range_in(unique_range_in const&) = delete;
/// Copy assignment is not allowed to prevent multiple objects from owning the
/// same range handle
unique_range_in& operator=(unique_range_in const&) = delete;
private:
struct end_range_handle {
using pointer = range_handle; /// Override the pointer type of the unique_ptr
void operator()(range_handle h) const noexcept { end_range_in<D>(h); }
};
/// Range handle used to correlate the start/end of the range
std::unique_ptr<range_handle, end_range_handle> handle_;
};
/**
* @brief Alias for a `unique_range_in` in the global NVTX domain.
*
*/
using unique_range = unique_range_in<domain::global>;
/**
* @brief Annotates an instantaneous point in time with a "marker", using the
* attributes specified by `attr`.
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
* nvtx3::mark_in<my_domain>(attr);
* }
* \endcode
*
* Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the `unique_range_in` belongs. Else, `domain::global` to
* indicate that the global NVTX domain should be used.
* @param[in] attr `event_attributes` that describes the desired attributes
* of the mark.
*/
template <typename D = domain::global>
inline void mark_in(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
nvtxDomainMarkEx(domain::get<D>(), attr.get());
#else
(void)(attr);
#endif
}
/**
* @brief Annotates an instantaneous point in time with a "marker", using the
* arguments to construct an `event_attributes`.
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::mark_in<my_domain>("operation failed!", nvtx3::rgb{255,0,0});
* }
* \endcode
*
* Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
*
* Forwards the arguments `args...` to construct an `event_attributes` object.
* The attributes are then associated with the marker. For more detail, see
* the `event_attributes` documentation.
*
* @tparam D Type containing `name` member used to identify the `domain`
* to which the `unique_range_in` belongs. Else `domain::global` to
* indicate that the global NVTX domain should be used.
* @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
* associated with this range.
*
*/
template <typename D = domain::global, typename... Args>
inline void mark_in(Args const&... args) noexcept
{
#ifndef NVTX_DISABLE
mark_in<D>(event_attributes{args...});
#endif
}
/**
* @brief Annotates an instantaneous point in time with a "marker", using the
* attributes specified by `attr`, in the global domain.
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
* nvtx3::mark(attr);
* }
* \endcode
*
* Note that nvtx3::mark is a function, not a class like scoped_range.
*
* @param[in] attr `event_attributes` that describes the desired attributes
* of the mark.
*/
inline void mark(event_attributes const& attr) noexcept
{
#ifndef NVTX_DISABLE
mark_in<domain::global>(attr);
#endif
}
/**
* @brief Annotates an instantaneous point in time with a "marker", using the
* arguments to construct an `event_attributes`, in the global domain.
*
* Unlike a "range" which has a beginning and an end, a marker is a single event
* in an application, such as detecting a problem:
*
* \code{.cpp}
* bool success = do_operation(...);
* if (!success) {
* nvtx3::mark("operation failed!", nvtx3::rgb{255,0,0});
* }
* \endcode
*
* Note that nvtx3::mark is a function, not a class like scoped_range.
*
* Forwards the arguments `args...` to construct an `event_attributes` object.
* The attributes are then associated with the marker. For more detail, see
* the `event_attributes` documentation.
*
* @param[in] args Variadic parameter pack of arguments to construct an
* `event_attributes` associated with this range.
*
*/
template <typename... Args>
inline void mark(Args const&... args) noexcept
{
#ifndef NVTX_DISABLE
mark_in<domain::global>(args...);
#endif
}
} // namespace NVTX3_VERSION_NAMESPACE
} // namespace nvtx3
#ifndef NVTX_DISABLE
/**
* @brief Convenience macro for generating a range in the specified `domain`
* from the lifetime of a function
*
* This macro is useful for generating an NVTX range in `domain` from
* the entry point of a function to its exit. It is intended to be the first
* line of the function.
*
* Constructs a static `registered_string_in` using the name of the immediately
* enclosing function returned by `__func__` and constructs a
* `nvtx3::scoped_range` using the registered function name as the range's
* message.
*
* Example:
* \code{.cpp}
* struct my_domain{static constexpr char const* name{"my_domain"};};
*
* void foo(...) {
* NVTX3_FUNC_RANGE_IN(my_domain); // Range begins on entry to foo()
* // do stuff
* ...
* } // Range ends on return from foo()
* \endcode
*
* @param[in] D Type containing `name` member used to identify the
* `domain` to which the `registered_string_in` belongs. Else,
* `domain::global` to indicate that the global NVTX domain should be used.
*/
#define NVTX3_V1_FUNC_RANGE_IN(D) \
static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
::nvtx3::v1::scoped_range_in<D> const nvtx3_range__{nvtx3_func_attr__};
/**
* @brief Convenience macro for generating a range in the specified `domain`
* from the lifetime of a function if the given boolean expression evaluates
* to true.
*
* Similar to `NVTX3_V1_FUNC_RANGE_IN(D)`, the only difference being that
* `NVTX3_V1_FUNC_RANGE_IF_IN(D, C)` only generates a range if the given boolean
* expression evaluates to true.
*
* @param[in] D Type containing `name` member used to identify the
* `domain` to which the `registered_string_in` belongs. Else,
* `domain::global` to indicate that the global NVTX domain should be used.
*
* @param[in] C Boolean expression used to determine if a range should be
* generated.
*/
#define NVTX3_V1_FUNC_RANGE_IF_IN(D, C) \
::nvtx3::v1::detail::optional_scoped_range_in<D> optional_nvtx3_range__; \
if (C) { \
static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
optional_nvtx3_range__.begin(nvtx3_func_attr__); \
}
#else
#define NVTX3_V1_FUNC_RANGE_IN(D)
#define NVTX3_V1_FUNC_RANGE_IF_IN(D, C)
#endif // NVTX_DISABLE
/**
* @brief Convenience macro for generating a range in the global domain from the
* lifetime of a function.
*
* This macro is useful for generating an NVTX range in the global domain from
* the entry point of a function to its exit. It is intended to be the first
* line of the function.
*
* Constructs a static `registered_string_in` using the name of the immediately
* enclosing function returned by `__func__` and constructs a
* `nvtx3::scoped_range` using the registered function name as the range's
* message.
*
* Example:
* \code{.cpp}
* void foo(...) {
* NVTX3_FUNC_RANGE(); // Range begins on entry to foo()
* // do stuff
* ...
* } // Range ends on return from foo()
* \endcode
*/
#define NVTX3_V1_FUNC_RANGE() NVTX3_V1_FUNC_RANGE_IN(::nvtx3::v1::domain::global)
/**
* @brief Convenience macro for generating a range in the global domain from the
* lifetime of a function if the given boolean expression evaluates to true.
*
* Similar to `NVTX3_V1_FUNC_RANGE()`, the only difference being that
* `NVTX3_V1_FUNC_RANGE_IF(C)` only generates a range if the given boolean
* expression evaluates to true.
*
* @param[in] C Boolean expression used to determine if a range should be
* generated.
*/
#define NVTX3_V1_FUNC_RANGE_IF(C) NVTX3_V1_FUNC_RANGE_IF_IN(::nvtx3::v1::domain::global, C)
/* When inlining this version, versioned macros must have unversioned aliases.
* For each NVTX3_Vx_ #define, make an NVTX3_ alias of it here.*/
#if defined(NVTX3_INLINE_THIS_VERSION)
/* clang format off */
#define NVTX3_FUNC_RANGE NVTX3_V1_FUNC_RANGE
#define NVTX3_FUNC_RANGE_IF NVTX3_V1_FUNC_RANGE_IF
#define NVTX3_FUNC_RANGE_IN NVTX3_V1_FUNC_RANGE_IN
#define NVTX3_FUNC_RANGE_IF_IN NVTX3_V1_FUNC_RANGE_IF_IN
/* clang format on */
#endif
#endif // NVTX3_CPP_DEFINITIONS_V1_0
/* Add functionality for new minor versions here, by copying the above section enclosed
* in #ifndef NVTX3_CPP_DEFINITIONS_Vx_y, and incrementing the minor version. This code
* is an example of how additions for version 1.2 would look, indented for clarity. Note
* that the versioned symbols and macros are always provided, and the unversioned symbols
* are only provided if NVTX3_INLINE_THIS_VERSION was defined at the top of this header.
*
* \code{.cpp}
* #ifndef NVTX3_CPP_DEFINITIONS_V1_2
* #define NVTX3_CPP_DEFINITIONS_V1_2
* namespace nvtx3 {
* NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE {
* class new_class {};
* inline void new_function() {}
* }
* }
*
* // Macros must have the major version in their names:
* #define NVTX3_V1_NEW_MACRO_A() ...
* #define NVTX3_V1_NEW_MACRO_B() ...
*
* // If inlining, make aliases for the macros with the version number omitted
* #if defined(NVTX3_INLINE_THIS_VERSION)
* #define NVTX3_NEW_MACRO_A NVTX3_V1_NEW_MACRO_A
* #define NVTX3_NEW_MACRO_B NVTX3_V1_NEW_MACRO_B
* #endif
* #endif // NVTX3_CPP_DEFINITIONS_V1_2
* \endcode
*/
/* Undefine all temporarily-defined unversioned macros, which would conflict with
* subsequent includes of different versions of this header. */
#undef NVTX3_CPP_VERSION_MAJOR
#undef NVTX3_CPP_VERSION_MINOR
#undef NVTX3_CONCAT
#undef NVTX3_NAMESPACE_FOR
#undef NVTX3_VERSION_NAMESPACE
#undef NVTX3_INLINE_IF_REQUESTED
#undef NVTX3_CONSTEXPR_IF_CPP14
#if defined(NVTX3_INLINE_THIS_VERSION)
#undef NVTX3_INLINE_THIS_VERSION
#endif
#if defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE)
#undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
#undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
#endif
#if defined(NVTX3_STATIC_ASSERT_DEFINED_HERE)
#undef NVTX3_STATIC_ASSERT_DEFINED_HERE
#undef NVTX3_STATIC_ASSERT
#endif
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
#endif
/* ---- Include required platform headers ---- */
#if defined(_WIN32)
#include <Windows.h>
#else
#include <unistd.h>
#if defined(__ANDROID__)
#include <android/api-level.h>
#endif
#if defined(__linux__) || defined(__CYGWIN__)
#include <sched.h>
#endif
#include <limits.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <pthread.h>
#include <stdlib.h>
#include <wchar.h>
#endif
/* ---- Define macros used in this file ---- */
#define NVTX_INIT_STATE_FRESH 0
#define NVTX_INIT_STATE_STARTED 1
#define NVTX_INIT_STATE_COMPLETE 2
#ifdef NVTX_DEBUG_PRINT
#ifdef __ANDROID__
#include <android/log.h>
#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
#else
#include <stdio.h>
#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
#endif
#else /* !defined(NVTX_DEBUG_PRINT) */
#define NVTX_ERR(...)
#define NVTX_INFO(...)
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifdef __GNUC__
#pragma GCC visibility push(hidden)
#endif
/* ---- Forward declare all functions referenced in globals ---- */
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
NvtxCallbackModule module,
NvtxFunctionTable* out_table,
unsigned int* out_size);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(
uint32_t version);
NVTX_LINKONCE_FWDDECL_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(
uint32_t exportTableId);
#include "nvtxInitDecls.h"
/* ---- Define all globals ---- */
typedef struct nvtxGlobals_t
{
volatile unsigned int initState;
NvtxExportTableCallbacks etblCallbacks;
NvtxExportTableVersionInfo etblVersionInfo;
/* Implementation function pointers */
nvtxMarkEx_impl_fntype nvtxMarkEx_impl_fnptr;
nvtxMarkA_impl_fntype nvtxMarkA_impl_fnptr;
nvtxMarkW_impl_fntype nvtxMarkW_impl_fnptr;
nvtxRangeStartEx_impl_fntype nvtxRangeStartEx_impl_fnptr;
nvtxRangeStartA_impl_fntype nvtxRangeStartA_impl_fnptr;
nvtxRangeStartW_impl_fntype nvtxRangeStartW_impl_fnptr;
nvtxRangeEnd_impl_fntype nvtxRangeEnd_impl_fnptr;
nvtxRangePushEx_impl_fntype nvtxRangePushEx_impl_fnptr;
nvtxRangePushA_impl_fntype nvtxRangePushA_impl_fnptr;
nvtxRangePushW_impl_fntype nvtxRangePushW_impl_fnptr;
nvtxRangePop_impl_fntype nvtxRangePop_impl_fnptr;
nvtxNameCategoryA_impl_fntype nvtxNameCategoryA_impl_fnptr;
nvtxNameCategoryW_impl_fntype nvtxNameCategoryW_impl_fnptr;
nvtxNameOsThreadA_impl_fntype nvtxNameOsThreadA_impl_fnptr;
nvtxNameOsThreadW_impl_fntype nvtxNameOsThreadW_impl_fnptr;
nvtxNameCuDeviceA_fakeimpl_fntype nvtxNameCuDeviceA_impl_fnptr;
nvtxNameCuDeviceW_fakeimpl_fntype nvtxNameCuDeviceW_impl_fnptr;
nvtxNameCuContextA_fakeimpl_fntype nvtxNameCuContextA_impl_fnptr;
nvtxNameCuContextW_fakeimpl_fntype nvtxNameCuContextW_impl_fnptr;
nvtxNameCuStreamA_fakeimpl_fntype nvtxNameCuStreamA_impl_fnptr;
nvtxNameCuStreamW_fakeimpl_fntype nvtxNameCuStreamW_impl_fnptr;
nvtxNameCuEventA_fakeimpl_fntype nvtxNameCuEventA_impl_fnptr;
nvtxNameCuEventW_fakeimpl_fntype nvtxNameCuEventW_impl_fnptr;
nvtxNameClDeviceA_fakeimpl_fntype nvtxNameClDeviceA_impl_fnptr;
nvtxNameClDeviceW_fakeimpl_fntype nvtxNameClDeviceW_impl_fnptr;
nvtxNameClContextA_fakeimpl_fntype nvtxNameClContextA_impl_fnptr;
nvtxNameClContextW_fakeimpl_fntype nvtxNameClContextW_impl_fnptr;
nvtxNameClCommandQueueA_fakeimpl_fntype nvtxNameClCommandQueueA_impl_fnptr;
nvtxNameClCommandQueueW_fakeimpl_fntype nvtxNameClCommandQueueW_impl_fnptr;
nvtxNameClMemObjectA_fakeimpl_fntype nvtxNameClMemObjectA_impl_fnptr;
nvtxNameClMemObjectW_fakeimpl_fntype nvtxNameClMemObjectW_impl_fnptr;
nvtxNameClSamplerA_fakeimpl_fntype nvtxNameClSamplerA_impl_fnptr;
nvtxNameClSamplerW_fakeimpl_fntype nvtxNameClSamplerW_impl_fnptr;
nvtxNameClProgramA_fakeimpl_fntype nvtxNameClProgramA_impl_fnptr;
nvtxNameClProgramW_fakeimpl_fntype nvtxNameClProgramW_impl_fnptr;
nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr;
nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr;
nvtxNameCudaDeviceA_impl_fntype nvtxNameCudaDeviceA_impl_fnptr;
nvtxNameCudaDeviceW_impl_fntype nvtxNameCudaDeviceW_impl_fnptr;
nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr;
nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr;
nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr;
nvtxNameCudaEventW_fakeimpl_fntype nvtxNameCudaEventW_impl_fnptr;
nvtxDomainMarkEx_impl_fntype nvtxDomainMarkEx_impl_fnptr;
nvtxDomainRangeStartEx_impl_fntype nvtxDomainRangeStartEx_impl_fnptr;
nvtxDomainRangeEnd_impl_fntype nvtxDomainRangeEnd_impl_fnptr;
nvtxDomainRangePushEx_impl_fntype nvtxDomainRangePushEx_impl_fnptr;
nvtxDomainRangePop_impl_fntype nvtxDomainRangePop_impl_fnptr;
nvtxDomainResourceCreate_impl_fntype nvtxDomainResourceCreate_impl_fnptr;
nvtxDomainResourceDestroy_impl_fntype nvtxDomainResourceDestroy_impl_fnptr;
nvtxDomainNameCategoryA_impl_fntype nvtxDomainNameCategoryA_impl_fnptr;
nvtxDomainNameCategoryW_impl_fntype nvtxDomainNameCategoryW_impl_fnptr;
nvtxDomainRegisterStringA_impl_fntype nvtxDomainRegisterStringA_impl_fnptr;
nvtxDomainRegisterStringW_impl_fntype nvtxDomainRegisterStringW_impl_fnptr;
nvtxDomainCreateA_impl_fntype nvtxDomainCreateA_impl_fnptr;
nvtxDomainCreateW_impl_fntype nvtxDomainCreateW_impl_fnptr;
nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr;
nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr;
nvtxDomainSyncUserCreate_impl_fntype nvtxDomainSyncUserCreate_impl_fnptr;
nvtxDomainSyncUserDestroy_impl_fntype nvtxDomainSyncUserDestroy_impl_fnptr;
nvtxDomainSyncUserAcquireStart_impl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr;
nvtxDomainSyncUserAcquireFailed_impl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr;
nvtxDomainSyncUserAcquireSuccess_impl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
nvtxDomainSyncUserReleasing_impl_fntype nvtxDomainSyncUserReleasing_impl_fnptr;
/* Tables of function pointers -- Extra null added to the end to ensure
* a crash instead of silent corruption if a tool reads off the end. */
NvtxFunctionPointer* functionTable_CORE [NVTX_CBID_CORE_SIZE + 1];
NvtxFunctionPointer* functionTable_CUDA [NVTX_CBID_CUDA_SIZE + 1];
NvtxFunctionPointer* functionTable_OPENCL[NVTX_CBID_OPENCL_SIZE + 1];
NvtxFunctionPointer* functionTable_CUDART[NVTX_CBID_CUDART_SIZE + 1];
NvtxFunctionPointer* functionTable_CORE2 [NVTX_CBID_CORE2_SIZE + 1];
NvtxFunctionPointer* functionTable_SYNC [NVTX_CBID_SYNC_SIZE + 1];
} nvtxGlobals_t;
NVTX_LINKONCE_DEFINE_GLOBAL nvtxGlobals_t NVTX_VERSIONED_IDENTIFIER(nvtxGlobals) =
{
NVTX_INIT_STATE_FRESH,
{
sizeof(NvtxExportTableCallbacks),
NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)
},
{
sizeof(NvtxExportTableVersionInfo),
NVTX_VERSION,
0,
NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)
},
/* Implementation function pointers */
NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init),
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init),
/* Tables of function pointers */
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr,
0
},
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr,
0
},
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr,
0
},
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr,
0
},
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr,
0
},
{
0,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr,
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr,
0
}
};
/* ---- Define static inline implementations of core API functions ---- */
#include "nvtxImplCore.h"
/* ---- Define implementations of export table functions ---- */
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
NvtxCallbackModule module,
NvtxFunctionTable* out_table,
unsigned int* out_size)
{
unsigned int bytes = 0;
NvtxFunctionTable table = (NvtxFunctionTable)0;
switch (module)
{
case NVTX_CB_MODULE_CORE:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE);
break;
case NVTX_CB_MODULE_CUDA:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA);
break;
case NVTX_CB_MODULE_OPENCL:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL);
break;
case NVTX_CB_MODULE_CUDART:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART);
break;
case NVTX_CB_MODULE_CORE2:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2);
break;
case NVTX_CB_MODULE_SYNC:
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC;
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC);
break;
default: return 0;
}
if (out_size)
*out_size = (bytes / (unsigned int)sizeof(NvtxFunctionPointer*)) - 1;
if (out_table)
*out_table = table;
return 1;
}
NVTX_LINKONCE_DEFINE_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(uint32_t exportTableId)
{
switch (exportTableId)
{
case NVTX_ETID_CALLBACKS: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblCallbacks;
case NVTX_ETID_VERSIONINFO: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblVersionInfo;
default: return 0;
}
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(uint32_t version)
{
/* Reserved for custom implementations to resolve problems with tools */
(void)version;
}
/* ---- Define implementations of init versions of all API functions ---- */
#include "nvtxInitDefs.h"
/* ---- Define implementations of initialization functions ---- */
#include "nvtxInit.h"
#ifdef __GNUC__
#pragma GCC visibility pop
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
if(local!=0)
(*local)(eventAttrib);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
{
#ifndef NVTX_DISABLE
nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
if(local!=0)
(*local)(message);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
{
#ifndef NVTX_DISABLE
nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
if(local!=0)
(*local)(message);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
if(local!=0)
return (*local)(eventAttrib);
else
#endif /*NVTX_DISABLE*/
return (nvtxRangeId_t)0;
}
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
{
#ifndef NVTX_DISABLE
nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (nvtxRangeId_t)0;
}
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
{
#ifndef NVTX_DISABLE
nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (nvtxRangeId_t)0;
}
NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
{
#ifndef NVTX_DISABLE
nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
if(local!=0)
(*local)(id);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
if(local!=0)
return (*local)(eventAttrib);
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
{
#ifndef NVTX_DISABLE
nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
{
#ifndef NVTX_DISABLE
nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
{
#ifndef NVTX_DISABLE
nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
if(local!=0)
return (*local)();
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
if(local!=0)
(*local)(category, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
if(local!=0)
(*local)(category, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
if(local!=0)
(*local)(threadId, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
if(local!=0)
(*local)(threadId, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
if(local!=0)
(*local)(domain, eventAttrib);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
if(local!=0)
return (*local)(domain, eventAttrib);
else
#endif /*NVTX_DISABLE*/
return (nvtxRangeId_t)0;
}
NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
{
#ifndef NVTX_DISABLE
nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
if(local!=0)
(*local)(domain, id);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
{
#ifndef NVTX_DISABLE
nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
if(local!=0)
return (*local)(domain, eventAttrib);
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
{
#ifndef NVTX_DISABLE
nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
if(local!=0)
return (*local)(domain);
else
#endif /*NVTX_DISABLE*/
return (int)NVTX_NO_PUSH_POP_TRACKING;
}
NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
{
#ifndef NVTX_DISABLE
nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
if(local!=0)
return (*local)(domain, attribs);
else
#endif /*NVTX_DISABLE*/
return (nvtxResourceHandle_t)0;
}
NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
{
#ifndef NVTX_DISABLE
nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
if(local!=0)
(*local)(resource);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
{
#ifndef NVTX_DISABLE
nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
if(local!=0)
(*local)(domain, category, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
if(local!=0)
(*local)(domain, category, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
{
#ifndef NVTX_DISABLE
nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
if(local!=0)
return (*local)(domain, string);
else
#endif /*NVTX_DISABLE*/
return (nvtxStringHandle_t)0;
}
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
{
#ifndef NVTX_DISABLE
nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
if(local!=0)
return (*local)(domain, string);
else
#endif /*NVTX_DISABLE*/
return (nvtxStringHandle_t)0;
}
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
{
#ifndef NVTX_DISABLE
nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (nvtxDomainHandle_t)0;
}
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
{
#ifndef NVTX_DISABLE
nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
if(local!=0)
return (*local)(message);
else
#endif /*NVTX_DISABLE*/
return (nvtxDomainHandle_t)0;
}
NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
{
#ifndef NVTX_DISABLE
nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
if(local!=0)
(*local)(domain);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
{
#ifndef NVTX_DISABLE
nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
if(local!=0)
(*local)(reserved);
#endif /*NVTX_DISABLE*/
}
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD_CUDART
#error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaDeviceA_impl_fntype local = (nvtxNameCudaDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaDeviceW_impl_fntype local = (nvtxNameCudaDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaStreamA_impl_fntype local = (nvtxNameCudaStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
if(local!=0)
(*local)(stream, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaStreamW_impl_fntype local = (nvtxNameCudaStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
if(local!=0)
(*local)(stream, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaEventA_impl_fntype local = (nvtxNameCudaEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
if(local!=0)
(*local)(event, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCudaEventW_impl_fntype local = (nvtxNameCudaEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
if(local!=0)
(*local)(event, name);
#endif /*NVTX_DISABLE*/
}
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD_CUDA
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuDeviceA_impl_fntype local = (nvtxNameCuDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuDeviceW_impl_fntype local = (nvtxNameCuDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuContextA_impl_fntype local = (nvtxNameCuContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
if(local!=0)
(*local)(context, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuContextW_impl_fntype local = (nvtxNameCuContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
if(local!=0)
(*local)(context, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuStreamA_impl_fntype local = (nvtxNameCuStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
if(local!=0)
(*local)(stream, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuStreamW_impl_fntype local = (nvtxNameCuStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
if(local!=0)
(*local)(stream, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuEventA_impl_fntype local = (nvtxNameCuEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
if(local!=0)
(*local)(event, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameCuEventW_impl_fntype local = (nvtxNameCuEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
if(local!=0)
(*local)(event, name);
#endif /*NVTX_DISABLE*/
}
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD_OPENCL
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClDeviceA_impl_fntype local = (nvtxNameClDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClDeviceW_impl_fntype local = (nvtxNameClDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
if(local!=0)
(*local)(device, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClContextA_impl_fntype local = (nvtxNameClContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
if(local!=0)
(*local)(context, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClContextW_impl_fntype local = (nvtxNameClContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
if(local!=0)
(*local)(context, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClCommandQueueA_impl_fntype local = (nvtxNameClCommandQueueA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
if(local!=0)
(*local)(command_queue, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClCommandQueueW_impl_fntype local = (nvtxNameClCommandQueueW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
if(local!=0)
(*local)(command_queue, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClMemObjectA_impl_fntype local = (nvtxNameClMemObjectA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
if(local!=0)
(*local)(memobj, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClMemObjectW_impl_fntype local = (nvtxNameClMemObjectW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
if(local!=0)
(*local)(memobj, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClSamplerA_impl_fntype local = (nvtxNameClSamplerA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
if(local!=0)
(*local)(sampler, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClSamplerW_impl_fntype local = (nvtxNameClSamplerW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
if(local!=0)
(*local)(sampler, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClProgramA_impl_fntype local = (nvtxNameClProgramA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
if(local!=0)
(*local)(program, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClProgramW_impl_fntype local = (nvtxNameClProgramW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
if(local!=0)
(*local)(program, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
{
#ifndef NVTX_DISABLE
nvtxNameClEventA_impl_fntype local = (nvtxNameClEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
if(local!=0)
(*local)(evnt, name);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
{
#ifndef NVTX_DISABLE
nvtxNameClEventW_impl_fntype local = (nvtxNameClEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
if(local!=0)
(*local)(evnt, name);
#endif /*NVTX_DISABLE*/
}
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD_SYNC
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserCreate_impl_fntype local = (nvtxDomainSyncUserCreate_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
if(local!=0)
return (*local)(domain, attribs);
else
#endif /*NVTX_DISABLE*/
return (nvtxSyncUser_t)0;
}
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserDestroy_impl_fntype local = (nvtxDomainSyncUserDestroy_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
if(local!=0)
(*local)(handle);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserAcquireStart_impl_fntype local = (nvtxDomainSyncUserAcquireStart_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
if(local!=0)
(*local)(handle);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserAcquireFailed_impl_fntype local = (nvtxDomainSyncUserAcquireFailed_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
if(local!=0)
(*local)(handle);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserAcquireSuccess_impl_fntype local = (nvtxDomainSyncUserAcquireSuccess_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
if(local!=0)
(*local)(handle);
#endif /*NVTX_DISABLE*/
}
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
{
#ifndef NVTX_DISABLE
nvtxDomainSyncUserReleasing_impl_fntype local = (nvtxDomainSyncUserReleasing_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
if(local!=0)
(*local)(handle);
#endif /*NVTX_DISABLE*/
}
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
#endif
/* ---- Platform-independent helper definitions and functions ---- */
/* Prefer macros over inline functions to reduce symbol resolution at link time */
#if defined(_WIN32)
#define NVTX_PATHCHAR wchar_t
#define NVTX_STR(x) L##x
#define NVTX_GETENV _wgetenv
#define NVTX_BUFSIZE MAX_PATH
#define NVTX_DLLHANDLE HMODULE
#define NVTX_DLLOPEN(x) LoadLibraryW(x)
#define NVTX_DLLFUNC GetProcAddress
#define NVTX_DLLCLOSE FreeLibrary
#define NVTX_YIELD() SwitchToThread()
#define NVTX_MEMBAR() MemoryBarrier()
#define NVTX_ATOMIC_WRITE_32(address, value) InterlockedExchange((volatile LONG*)address, value)
#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) old = InterlockedCompareExchange((volatile LONG*)address, exchange, comparand)
#elif defined(__GNUC__)
#define NVTX_PATHCHAR char
#define NVTX_STR(x) x
#define NVTX_GETENV getenv
#define NVTX_BUFSIZE PATH_MAX
#define NVTX_DLLHANDLE void*
#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY)
#define NVTX_DLLFUNC dlsym
#define NVTX_DLLCLOSE dlclose
#define NVTX_YIELD() sched_yield()
#define NVTX_MEMBAR() __sync_synchronize()
/* Ensure full memory barrier for atomics, to match Windows functions */
#define NVTX_ATOMIC_WRITE_32(address, value) __sync_synchronize(); __sync_lock_test_and_set(address, value)
#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand)
#else
#error The library does not support your configuration!
#endif
/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
#if defined(_WIN32)
/* TODO */
#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
#else
#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
#endif
/* Define this to 1 for platforms that support environment variables */
/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
/* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
#define NVTX_SUPPORT_ENV_VARS 1
/* Define this to 1 for platforms that support dynamic/shared libraries */
#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
/* Injection libraries implementing InitializeInjectionNvtx2 may be statically linked,
* and this will override any dynamic injection. Useful for platforms where dynamic
* injection is not available. Since weak symbols not explicitly marked extern are
* guaranteed to be initialized to zero if no definitions are found by the linker, the
* dynamic injection process proceeds normally if pfnInitializeInjectionNvtx2 is 0. */
#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
/* To statically inject an NVTX library, define InitializeInjectionNvtx2_fnptr as a normal
* symbol (not weak) pointing to the implementation of InitializeInjectionNvtx2 (which
* does not need to be named "InitializeInjectionNvtx2" as is necessary in a dynamic
* injection library. */
__attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr;
#else
#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
#endif
/* This function tries to find or load an NVTX injection library and get the
* address of its InitializeInjection2 function. If such a function pointer
* is found, it is called, and passed the address of this NVTX instance's
* nvtxGetExportTable function, so the injection can attach to this instance.
* If the initialization fails for any reason, any dynamic library loaded will
* be freed, and all NVTX implementation functions will be set to no-ops. If
* initialization succeeds, NVTX functions not attached to the tool will be set
* to no-ops. This is implemented as one function instead of several small
* functions to minimize the number of weak symbols the linker must resolve.
* Order of search is:
* - Pre-injected library exporting InitializeInjectionNvtx2
* - Loadable library exporting InitializeInjectionNvtx2
* - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
* - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
* - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr
*/
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void);
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void)
{
const char* const initFuncName = "InitializeInjectionNvtx2";
NvtxInitializeInjectionNvtxFunc_t init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)0;
NVTX_DLLHANDLE injectionLibraryHandle = (NVTX_DLLHANDLE)0;
int entryPointStatus = 0;
#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
/* Use POSIX global symbol chain to query for init function from any module */
init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(0, initFuncName);
#endif
#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
/* Try discovering dynamic injection library to load */
if (!init_fnptr)
{
#if NVTX_SUPPORT_ENV_VARS
/* If env var NVTX_INJECTION64_PATH is set, it should contain the path
* to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
? NVTX_STR("NVTX_INJECTION32_PATH")
: NVTX_STR("NVTX_INJECTION64_PATH");
#endif /* NVTX_SUPPORT_ENV_VARS */
NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
const NVTX_PATHCHAR* injectionLibraryPath = (const NVTX_PATHCHAR*)0;
/* Refer to this variable explicitly in case all references to it are #if'ed out */
(void)injectionLibraryPathBuf;
#if NVTX_SUPPORT_ENV_VARS
/* Disable the warning for getenv & _wgetenv -- this usage is safe because
* these functions are not called again before using the returned value. */
#if defined(_MSC_VER)
#pragma warning( push )
#pragma warning( disable : 4996 )
#endif
injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
#if defined(_MSC_VER)
#pragma warning( pop )
#endif
#endif
#if defined(__ANDROID__)
if (!injectionLibraryPath)
{
const char *bits = (sizeof(void*) == 4) ? "32" : "64";
char cmdlineBuf[32];
char pkgName[PATH_MAX];
int count;
int pid;
FILE *fp;
size_t bytesRead;
size_t pos;
pid = (int)getpid();
count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
if (count <= 0 || count >= (int)sizeof(cmdlineBuf))
{
NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
return NVTX_ERR_INIT_ACCESS_LIBRARY;
}
fp = fopen(cmdlineBuf, "r");
if (!fp)
{
NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
return NVTX_ERR_INIT_ACCESS_LIBRARY;
}
bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
fclose(fp);
if (bytesRead == 0)
{
NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
return NVTX_ERR_INIT_ACCESS_LIBRARY;
}
pkgName[bytesRead] = 0;
/* String can contain colon as a process separator. In this case the package name is before the colon. */
pos = 0;
while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
{
++pos;
}
pkgName[pos] = 0;
count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
if (count <= 0 || count >= NVTX_BUFSIZE)
{
NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
return NVTX_ERR_INIT_ACCESS_LIBRARY;
}
/* On Android, verify path is accessible due to aggressive file access restrictions. */
/* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
/* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
if (injectionLibraryPathBuf[0] == '/')
{
#if (__ANDROID_API__ < 21)
int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
#else
int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
#endif
if (access_err != 0)
{
NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
return NVTX_ERR_INIT_ACCESS_LIBRARY;
}
}
injectionLibraryPath = injectionLibraryPathBuf;
}
#endif
/* At this point, injectionLibraryPath is specified if a dynamic
* injection library was specified by a tool. */
if (injectionLibraryPath)
{
/* Load the injection library */
injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
if (!injectionLibraryHandle)
{
NVTX_ERR("Failed to load injection library\n");
return NVTX_ERR_INIT_LOAD_LIBRARY;
}
else
{
/* Attempt to get the injection library's entry-point */
init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(injectionLibraryHandle, initFuncName);
if (!init_fnptr)
{
NVTX_DLLCLOSE(injectionLibraryHandle);
NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n");
return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
}
}
}
}
#endif
#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
if (!init_fnptr)
{
/* Check weakly-defined function pointer. A statically-linked injection can define this as
* a normal symbol and it will take precedence over a dynamic injection. */
if (InitializeInjectionNvtx2_fnptr)
{
init_fnptr = InitializeInjectionNvtx2_fnptr;
}
}
#endif
/* At this point, if init_fnptr is not set, then no tool has specified
* an NVTX injection library -- return non-success result so all NVTX
* API functions will be set to no-ops. */
if (!init_fnptr)
{
return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
}
/* Invoke injection library's initialization function. If it returns
* 0 (failure) and a dynamic injection was loaded, unload it. */
entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable));
if (entryPointStatus == 0)
{
NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
if (injectionLibraryHandle)
{
NVTX_DLLCLOSE(injectionLibraryHandle);
}
return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT;
}
return NVTX_SUCCESS;
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void)
{
unsigned int old;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE)
{
return;
}
NVTX_ATOMIC_CAS_32(
old,
&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
NVTX_INIT_STATE_STARTED,
NVTX_INIT_STATE_FRESH);
if (old == NVTX_INIT_STATE_FRESH)
{
int result;
int forceAllToNoops;
/* Load & initialize injection library -- it will assign the function pointers */
result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)();
/* Set all pointers not assigned by the injection to null */
forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */
NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops);
/* Signal that initialization has finished, so now the assigned function pointers will be used */
NVTX_ATOMIC_WRITE_32(
&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
NVTX_INIT_STATE_COMPLETE);
}
else /* Spin-wait until initialization has finished */
{
NVTX_MEMBAR();
while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE)
{
NVTX_YIELD();
NVTX_MEMBAR();
}
}
}
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
#endif
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved);
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle);
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle);
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef NVTX_IMPL_GUARD
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
#endif
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxMarkEx(eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxMarkA(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxMarkW(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangeStartEx(eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangeStartA(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangeStartW(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxRangeEnd(id);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangePushEx(eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangePushA(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangePushW(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxRangePop();
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxNameCategoryA(category, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxNameCategoryW(category, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxNameOsThreadA(threadId, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxNameOsThreadW(threadId, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainMarkEx(domain, eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainRangeStartEx(domain, eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainRangeEnd(domain, id);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainRangePushEx(domain, eventAttrib);
}
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainRangePop(domain);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainResourceCreate(domain, attribs);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainResourceDestroy(resource);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainNameCategoryA(domain, category, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainNameCategoryW(domain, category, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainRegisterStringA(domain, string);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainRegisterStringW(domain, string);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainCreateA(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
return nvtxDomainCreateW(message);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxDomainDestroy(domain);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
nvtxInitialize(reserved);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
nvtxNameCuDeviceA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
nvtxNameCuDeviceW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
nvtxNameCuContextA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
if (local)
local(context, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
nvtxNameCuContextW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
if (local)
local(context, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
nvtxNameCuStreamA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
if (local)
local(stream, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
nvtxNameCuStreamW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
if (local)
local(stream, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
nvtxNameCuEventA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
if (local)
local(event, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
nvtxNameCuEventW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
if (local)
local(event, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
nvtxNameCudaDeviceA_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
nvtxNameCudaDeviceW_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
nvtxNameCudaStreamA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
if (local)
local(stream, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
nvtxNameCudaStreamW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
if (local)
local(stream, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
nvtxNameCudaEventA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
if (local)
local(event, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
nvtxNameCudaEventW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
if (local)
local(event, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
nvtxNameClDeviceA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
nvtxNameClDeviceW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
if (local)
local(device, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
nvtxNameClContextA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
if (local)
local(context, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
nvtxNameClContextW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
if (local)
local(context, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
nvtxNameClCommandQueueA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
if (local)
local(command_queue, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
nvtxNameClCommandQueueW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
if (local)
local(command_queue, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
nvtxNameClMemObjectA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
if (local)
local(memobj, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
nvtxNameClMemObjectW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
if (local)
local(memobj, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
nvtxNameClSamplerA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
if (local)
local(sampler, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
nvtxNameClSamplerW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
if (local)
local(sampler, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
nvtxNameClProgramA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
if (local)
local(program, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
nvtxNameClProgramW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
if (local)
local(program, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
nvtxNameClEventA_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
if (local)
local(evnt, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
nvtxNameClEventW_fakeimpl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
if (local)
local(evnt, name);
}
NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){
nvtxDomainSyncUserCreate_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
if (local) {
return local(domain, attribs);
}
return (nvtxSyncUser_t)0;
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){
nvtxDomainSyncUserDestroy_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
if (local)
local(handle);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){
nvtxDomainSyncUserAcquireStart_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
if (local)
local(handle);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){
nvtxDomainSyncUserAcquireFailed_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
if (local)
local(handle);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){
nvtxDomainSyncUserAcquireSuccess_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
if (local)
local(handle);
}
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){
nvtxDomainSyncUserReleasing_impl_fntype local;
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
if (local)
local(handle);
}
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
{
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NULL;
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NULL;
}
/*
* Copyright 2009-2022 NVIDIA Corporation. All rights reserved.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef __NVTX_LINKONCE_H__
#define __NVTX_LINKONCE_H__
/* This header defines macros to permit making definitions of global variables
* and functions in C/C++ header files which may be included multiple times in
* a translation unit or linkage unit. It allows authoring header-only libraries
* which can be used by multiple other header-only libraries (either as the same
* copy or multiple copies), and does not require any build changes, such as
* adding another .c file, linking a static library, or deploying a dynamic
* library. Globals defined with these macros have the property that they have
* the same address, pointing to a single instance, for the entire linkage unit.
* It is expected but not guaranteed that each linkage unit will have a separate
* instance.
*
* In some situations it is desirable to declare a variable without initializing
* it, refer to it in code or other variables' initializers, and then initialize
* it later. Similarly, functions can be prototyped, have their address taken,
* and then have their body defined later. In such cases, use the FWDDECL macros
* when forward-declaring LINKONCE global variables without initializers and
* function prototypes, and then use the DEFINE macros when later defining them.
* Although in many cases the FWDDECL macro is equivalent to the DEFINE macro,
* following this pattern makes code maximally portable.
*/
#if defined(__MINGW32__) /* MinGW */
#define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
#if defined(__cplusplus)
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline NVTX_LINKONCE_WEAK
#else
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
#endif
#elif defined(_MSC_VER) /* MSVC */
#if defined(__cplusplus)
#define NVTX_LINKONCE_DEFINE_GLOBAL extern "C" __declspec(selectany)
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
#else
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
#define NVTX_LINKONCE_DEFINE_FUNCTION __inline
#endif
#elif defined(__CYGWIN__) && defined(__clang__) /* Clang on Cygwin */
#define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
#if defined(__cplusplus)
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_WEAK
#else
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
#endif
#elif defined(__CYGWIN__) /* Assume GCC or compatible */
#define NVTX_LINKONCE_WEAK __attribute__((weak))
#if defined(__cplusplus)
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
#else
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
#endif
#else /* All others: Assume GCC, clang, or compatible */
#define NVTX_LINKONCE_WEAK __attribute__((weak))
#define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden")))
#if defined(__cplusplus)
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline
#else
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
#endif
#endif
#define NVTX_LINKONCE_FWDDECL_GLOBAL NVTX_LINKONCE_DEFINE_GLOBAL extern
#define NVTX_LINKONCE_FWDDECL_FUNCTION NVTX_LINKONCE_DEFINE_FUNCTION
#endif /* __NVTX_LINKONCE_H__ */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment