nvtx.pyx 6.62 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# distutils: language = c++

"""
Wrapper for NVIDIA Tools Extension Library (NVTX)

"""
from libc cimport string

cdef extern from '../../cupy_tx.h' nogil:
    cdef int NVTX_VERSION
    cdef enum nvtxColorType_t:
        NVTX_COLOR_UNKNOWN
        NVTX_COLOR_ARGB
    cdef enum nvtxMessageType_t:
        NVTX_MESSAGE_UNKNOWN
        NVTX_MESSAGE_TYPE_ASCII
        NVTX_MESSAGE_TYPE_UNICODE
    ctypedef   signed char         int8_t
    ctypedef   signed short       int16_t
    ctypedef   signed int         int32_t
    ctypedef   signed long long   int64_t
    ctypedef unsigned char        uint8_t
    ctypedef unsigned short      uint16_t
    ctypedef unsigned int        uint32_t
    ctypedef unsigned long long  uint64_t
    ctypedef          void        wchar_t
    cdef union payload_t:
        uint64_t  ullValue
        int64_t    llValue
        double      dValue
    cdef union message_t:
        char* ascii
        wchar_t* unicode
    cdef struct nvtxEventAttributes_v1:
        uint16_t   version
        uint16_t   size
        uint32_t   category
        int32_t    colorType
        uint32_t   color
        int32_t    payloadType
        int32_t    reserved0
        payload_t  payload
        int32_t    messageType
        message_t  message
    ctypedef nvtxEventAttributes_v1 nvtxEventAttributes_t
    ctypedef unsigned long long range_id_t
    void nvtxMarkA(const char *message)
    void nvtxMarkEx(const nvtxEventAttributes_t *eventAttrib)
    int nvtxRangePushA(const char *message)
    int nvtxRangePushEx(const nvtxEventAttributes_t *eventAttrib)
    int nvtxRangePop()
    range_id_t nvtxRangeStartEx(const nvtxEventAttributes_t *eventAttrib)
    void nvtxRangeEnd(range_id_t)

cdef int num_colors = 10
cdef uint32_t colors[10]
colors[0] = 0xFF00FF00
colors[1] = 0xFF007FFF
colors[2] = 0xFF7F00FF
colors[3] = 0xFFFF0000
colors[4] = 0xFF7FFF00
colors[5] = 0xFF00FF7F
colors[6] = 0xFF0000FF
colors[7] = 0xFFFF007F
colors[8] = 0xFFFF7F00
colors[9] = 0xFF7F7F7F

available = True


cdef nvtxEventAttributes_t make_event_attributes(message, color):
    cdef bytes b_message
    cdef nvtxEventAttributes_t attrib

    string.memset(&attrib, 0, sizeof(attrib))
    attrib.version = NVTX_VERSION
    attrib.size = sizeof(attrib)

    if color is None:
        attrib.colorType = NVTX_COLOR_UNKNOWN
    else:
        attrib.color = color
        attrib.colorType = NVTX_COLOR_ARGB

    if message is None:
        attrib.messageType = NVTX_MESSAGE_UNKNOWN
    else:
        attrib.messageType = NVTX_MESSAGE_TYPE_ASCII
        b_message = message.encode()
        attrib.message.ascii = b_message

    return attrib


cpdef MarkC(message, uint32_t color=0):
    """
    Marks an instantaneous event (marker) in the application.

    Markers are used to describe events at a specific time during execution of
    the application.

    Args:
        message (str): Name of a marker.
        color (uint32): Color code for a marker.
    """
    cdef bytes b_message = message.encode()
    if NVTX_VERSION != 1 and NVTX_VERSION != 2:
        nvtxMarkA(<const char*>b_message)
        return

    cdef nvtxEventAttributes_t attrib
    string.memset(&attrib, 0, sizeof(attrib))
    attrib.version = NVTX_VERSION
    attrib.size = sizeof(attrib)
    attrib.messageType = NVTX_MESSAGE_TYPE_ASCII
    attrib.message.ascii = b_message
    attrib.colorType = NVTX_COLOR_ARGB
    attrib.color = color

    nvtxMarkEx(&attrib)


cpdef Mark(message, int id_color=-1):
    """
    Marks an instantaneous event (marker) in the application.

    Markers are used to describe events at a specific time during execution of
    the application.

    Args:
        message (str): Name of a marker.
        id_color (int): ID of color for a marker.
    """
    cdef bytes b_message = message.encode()
    if id_color < 0 or (NVTX_VERSION != 1 and NVTX_VERSION != 2):
        nvtxMarkA(<const char*>b_message)
        return

    cdef uint32_t color = colors[id_color % num_colors]
    MarkC(message, color)


cpdef RangePushC(message, uint32_t color=0):
    """
    Starts a nested range.

    Ranges are used to describe events over a time span during execution of the
    application. This is particularly useful when profiling with Nsight Systems
    to help connect user-specified ranges with CuPy's internal CUDA-kernels.
    The duration of a range is defined by the corresponding pair of
    ``RangePushC()`` to ``RangePop()`` calls, which can be nested.

    .. code-block:: python

        from cupy.cuda.nvtx import RangePushC, RangePop

        RangePush("Nested Powers of A")
        for i in range(N):
            RangePushC("Iter {}: Double A".format(i))
            A = 2*A
            RangePop()
        RangePop()

    Args:
        message (str): Name of a range.
        color (uint32): ARGB color for a range.
    """
    cdef bytes b_message = message.encode()
    if NVTX_VERSION != 1 and NVTX_VERSION != 2:
        nvtxRangePushA(<const char*>b_message)
        return

    cdef nvtxEventAttributes_t attrib
    string.memset(&attrib, 0, sizeof(attrib))
    attrib.version = NVTX_VERSION
    attrib.size = sizeof(attrib)
    attrib.messageType = NVTX_MESSAGE_TYPE_ASCII
    attrib.message.ascii = b_message
    attrib.colorType = NVTX_COLOR_ARGB
    attrib.color = color

    nvtxRangePushEx(&attrib)


cpdef RangePush(message, int id_color=-1):
    """
    Starts a nested range.

    Ranges are used to describe events over a time span during execution of the
    application. This is particularly useful when profiling with Nsight Systems
    to help connect user-specified ranges with CuPy's internal CUDA-kernels.
    The duration of a range is defined by the corresponding pair of
    ``RangePush()`` to ``RangePop()`` calls, which can be nested.

    .. code-block:: python

        from cupy.cuda.nvtx import RangePush, RangePop

        RangePush("Nested Powers of A")
        for i in range(N):
            RangePush("Iter {}: Double A".format(i))
            A = 2*A
            RangePop()
        RangePop()

    Args:
        message (str): Name of a range.
        id_color (int): ID of color for a range.
    """
    cdef bytes b_message = message.encode()
    if id_color < 0 or (NVTX_VERSION != 1 and NVTX_VERSION != 2):
        nvtxRangePushA(<const char*>b_message)
        return

    cdef uint32_t color = colors[id_color % num_colors]
    RangePushC(message, color)


cpdef RangePop():
    """
    Ends a nested range started by a ``RangePush*()`` call.
    """
    nvtxRangePop()


cpdef unsigned long long RangeStart(message, color) except? 0:
    cdef nvtxEventAttributes_t attrib = make_event_attributes(message, color)
    return nvtxRangeStartEx(&attrib)


cpdef RangeEnd(unsigned long long range_id):
    nvtxRangeEnd(range_id)