_runtime_enum.pxd 12.6 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
cpdef enum:
    cudaMemoryTypeHost = 1
    cudaMemoryTypeDevice = 2

    cudaIpcMemLazyEnablePeerAccess = 1

    cudaMemAttachGlobal = 1
    cudaMemAttachHost = 2
    cudaMemAttachSingle = 4

    cudaMemAdviseSetReadMostly = 1
    cudaMemAdviseUnsetReadMostly = 2
    cudaMemAdviseSetPreferredLocation = 3
    cudaMemAdviseUnsetPreferredLocation = 4
    cudaMemAdviseSetAccessedBy = 5
    cudaMemAdviseUnsetAccessedBy = 6

    CUDA_R_32F = 0  # 32 bit real
    CUDA_R_64F = 1  # 64 bit real
    CUDA_R_16F = 2  # 16 bit real
    CUDA_R_8I = 3  # 8 bit real as a signed integer
    CUDA_C_32F = 4  # 32 bit complex
    CUDA_C_64F = 5  # 64 bit complex
    CUDA_C_16F = 6  # 16 bit complex
    CUDA_C_8I = 7  # 8 bit complex as a pair of signed integers
    CUDA_R_8U = 8  # 8 bit real as a signed integer
    CUDA_C_8U = 9  # 8 bit complex as a pair of signed integers

    # CUDA Limits
    cudaLimitStackSize = 0x00
    cudaLimitPrintfFifoSize = 0x01
    cudaLimitMallocHeapSize = 0x02
    cudaLimitDevRuntimeSyncDepth = 0x03
    cudaLimitDevRuntimePendingLaunchCount = 0x04
    cudaLimitMaxL2FetchGranularity = 0x05

    # cudaChannelFormatKind
    cudaChannelFormatKindSigned = 0
    cudaChannelFormatKindUnsigned = 1
    cudaChannelFormatKindFloat = 2
    cudaChannelFormatKindNone = 3

    # CUDA array flags
    cudaArrayDefault = 0
    # cudaArrayLayered = 1
    cudaArraySurfaceLoadStore = 2
    # cudaArrayCubemap = 4
    # cudaArrayTextureGather = 8

    # cudaResourceType
    cudaResourceTypeArray = 0
    cudaResourceTypeMipmappedArray = 1
    cudaResourceTypeLinear = 2
    cudaResourceTypePitch2D = 3

    # cudaTextureAddressMode
    cudaAddressModeWrap = 0
    cudaAddressModeClamp = 1
    cudaAddressModeMirror = 2
    cudaAddressModeBorder = 3

    # cudaTextureFilterMode
    cudaFilterModePoint = 0
    cudaFilterModeLinear = 1

    # cudaTextureReadMode
    cudaReadModeElementType = 0
    cudaReadModeNormalizedFloat = 1

    # cudaMemPoolAttr
    # ----- added since 11.2 -----
    cudaMemPoolReuseFollowEventDependencies = 0x1
    cudaMemPoolReuseAllowOpportunistic = 0x2
    cudaMemPoolReuseAllowInternalDependencies = 0x3
    cudaMemPoolAttrReleaseThreshold = 0x4
    # ----- added since 11.3 -----
    cudaMemPoolAttrReservedMemCurrent = 0x5
    cudaMemPoolAttrReservedMemHigh = 0x6
    cudaMemPoolAttrUsedMemCurrent = 0x7
    cudaMemPoolAttrUsedMemHigh = 0x8

    # cudaMemAllocationType
    cudaMemAllocationTypePinned = 0x1

    # cudaMemAllocationHandleType
    cudaMemHandleTypeNone = 0x0
    cudaMemHandleTypePosixFileDescriptor = 0x1
    # cudaMemHandleTypeWin32 = 0x2
    # cudaMemHandleTypeWin32Kmt = 0x4

    # cudaMemLocationType
    cudaMemLocationTypeDevice = 1


# This was a legacy mistake: the prefix "cuda" should have been removed
# so that we can directly assign their C counterparts here. Now because
# of backward compatibility and no flexible Cython macro (IF/ELSE), we
# have to duplicate the enum. (CUDA and HIP use different values!)
IF CUPY_HIP_VERSION > 0:
    # separate in groups of 10 for easier counting...
    cpdef enum:
        cudaDevAttrMaxThreadsPerBlock = 0
        cudaDevAttrMaxBlockDimX
        cudaDevAttrMaxBlockDimY
        cudaDevAttrMaxBlockDimZ
        cudaDevAttrMaxGridDimX
        cudaDevAttrMaxGridDimY
        cudaDevAttrMaxGridDimZ
        cudaDevAttrMaxSharedMemoryPerBlock
        cudaDevAttrTotalConstantMemory
        cudaDevAttrWarpSize

        cudaDevAttrMaxRegistersPerBlock
        cudaDevAttrClockRate
        cudaDevAttrMemoryClockRate
        cudaDevAttrGlobalMemoryBusWidth
        cudaDevAttrMultiProcessorCount
        cudaDevAttrComputeMode
        cudaDevAttrL2CacheSize
        cudaDevAttrMaxThreadsPerMultiProcessor
        # The following are exposed as "deviceAttributeCo..."
        # cudaDevAttrComputeCapabilityMajor
        # cudaDevAttrComputeCapabilityMinor

        cudaDevAttrConcurrentKernels = 20
        cudaDevAttrPciBusId
        cudaDevAttrPciDeviceId
        cudaDevAttrMaxSharedMemoryPerMultiprocessor
        cudaDevAttrIsMultiGpuBoard
        cudaDevAttrIntegrated
        cudaDevAttrCooperativeLaunch
        cudaDevAttrCooperativeMultiDeviceLaunch
        cudaDevAttrMaxTexture1DWidth
        cudaDevAttrMaxTexture2DWidth

        cudaDevAttrMaxTexture2DHeight
        cudaDevAttrMaxTexture3DWidth
        cudaDevAttrMaxTexture3DHeight
        cudaDevAttrMaxTexture3DDepth
        # The following attributes do not exist in CUDA and cause segfualts
        # if we try to access them
        # hipDeviceAttributeHdpMemFlushCntl
        # hipDeviceAttributeHdpRegFlushCntl
        cudaDevAttrMaxPitch = 36
        cudaDevAttrTextureAlignment
        cudaDevAttrTexturePitchAlignment
        cudaDevAttrKernelExecTimeout

        cudaDevAttrCanMapHostMemory
        cudaDevAttrEccEnabled
        cudaDevAttrMemoryPoolsSupported = 0
        # The following attributes do not exist in CUDA
        # hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc
        # hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim
        # hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim
        # hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem

        # The rest do not have HIP correspondence...
        # TODO(leofang): should we expose them anyway, with a value -1 to
        # indicate they cannot be used in HIP?
        # cudaDevAttrGpuOverlap
        # cudaDevAttrMaxTexture2DLayeredWidth
        # cudaDevAttrMaxTexture2DLayeredHeight
        # cudaDevAttrMaxTexture2DLayeredLayers
        # cudaDevAttrSurfaceAlignment
        # cudaDevAttrTccDriver
        # cudaDevAttrAsyncEngineCount
        # cudaDevAttrUnifiedAddressing
        # cudaDevAttrMaxTexture1DLayeredWidth
        # cudaDevAttrMaxTexture1DLayeredLayers
        # cudaDevAttrMaxTexture2DGatherWidth
        # cudaDevAttrMaxTexture2DGatherHeight
        # cudaDevAttrMaxTexture3DWidthAlt
        # cudaDevAttrMaxTexture3DHeightAlt
        # cudaDevAttrMaxTexture3DDepthAlt
        # cudaDevAttrPciDomainId
        # cudaDevAttrMaxTextureCubemapWidth
        # cudaDevAttrMaxTextureCubemapLayeredWidth
        # cudaDevAttrMaxTextureCubemapLayeredLayers
        # cudaDevAttrMaxSurface1DWidth
        # cudaDevAttrMaxSurface2DWidth
        # cudaDevAttrMaxSurface2DHeight
        # cudaDevAttrMaxSurface3DWidth
        # cudaDevAttrMaxSurface3DHeight
        # cudaDevAttrMaxSurface3DDepth
        # cudaDevAttrMaxSurface1DLayeredWidth
        # cudaDevAttrMaxSurface1DLayeredLayers
        # cudaDevAttrMaxSurface2DLayeredWidth
        # cudaDevAttrMaxSurface2DLayeredHeight
        # cudaDevAttrMaxSurface2DLayeredLayers
        # cudaDevAttrMaxSurfaceCubemapWidth
        # cudaDevAttrMaxSurfaceCubemapLayeredWidth
        # cudaDevAttrMaxSurfaceCubemapLayeredLayers
        # cudaDevAttrMaxTexture1DLinearWidth
        # cudaDevAttrMaxTexture2DLinearWidth
        # cudaDevAttrMaxTexture2DLinearHeight
        # cudaDevAttrMaxTexture2DLinearPitch
        # cudaDevAttrMaxTexture2DMipmappedWidth
        # cudaDevAttrMaxTexture2DMipmappedHeight
        # cudaDevAttrMaxTexture1DMipmappedWidth
        # cudaDevAttrStreamPrioritiesSupported
        # cudaDevAttrGlobalL1CacheSupported
        # cudaDevAttrLocalL1CacheSupported
        # cudaDevAttrMaxRegistersPerMultiprocessor
        # cudaDevAttrMultiGpuBoardGroupID
        # cudaDevAttrHostNativeAtomicSupported
        # cudaDevAttrSingleToDoublePrecisionPerfRatio
        # cudaDevAttrComputePreemptionSupported
        # cudaDevAttrCanUseHostPointerForRegisteredMem
        # cudaDevAttrReserved92
        # cudaDevAttrReserved93
        # cudaDevAttrReserved94
        # cudaDevAttrMaxSharedMemoryPerBlockOptin
        # cudaDevAttrCanFlushRemoteWrites
        # cudaDevAttrHostRegisterSupported
    IF CUPY_HIP_VERSION >= 310:
        cpdef enum:
            # hipDeviceAttributeAsicRevision  # does not exist in CUDA
            cudaDevAttrManagedMemory = 47
            cudaDevAttrDirectManagedMemAccessFromHost
            cudaDevAttrConcurrentManagedAccess

            cudaDevAttrPageableMemoryAccess
            cudaDevAttrPageableMemoryAccessUsesHostPageTables
ELSE:
    # For CUDA/RTD
    cpdef enum:
        cudaDevAttrMaxThreadsPerBlock = 1
        cudaDevAttrMaxBlockDimX
        cudaDevAttrMaxBlockDimY
        cudaDevAttrMaxBlockDimZ
        cudaDevAttrMaxGridDimX
        cudaDevAttrMaxGridDimY
        cudaDevAttrMaxGridDimZ
        cudaDevAttrMaxSharedMemoryPerBlock
        cudaDevAttrTotalConstantMemory
        cudaDevAttrWarpSize
        cudaDevAttrMaxPitch
        cudaDevAttrMaxRegistersPerBlock
        cudaDevAttrClockRate
        cudaDevAttrTextureAlignment
        cudaDevAttrGpuOverlap
        cudaDevAttrMultiProcessorCount
        cudaDevAttrKernelExecTimeout
        cudaDevAttrIntegrated
        cudaDevAttrCanMapHostMemory
        cudaDevAttrComputeMode
        cudaDevAttrMaxTexture1DWidth
        cudaDevAttrMaxTexture2DWidth
        cudaDevAttrMaxTexture2DHeight
        cudaDevAttrMaxTexture3DWidth
        cudaDevAttrMaxTexture3DHeight
        cudaDevAttrMaxTexture3DDepth
        cudaDevAttrMaxTexture2DLayeredWidth
        cudaDevAttrMaxTexture2DLayeredHeight
        cudaDevAttrMaxTexture2DLayeredLayers
        cudaDevAttrSurfaceAlignment
        cudaDevAttrConcurrentKernels
        cudaDevAttrEccEnabled
        cudaDevAttrPciBusId
        cudaDevAttrPciDeviceId
        cudaDevAttrTccDriver
        cudaDevAttrMemoryClockRate
        cudaDevAttrGlobalMemoryBusWidth
        cudaDevAttrL2CacheSize
        cudaDevAttrMaxThreadsPerMultiProcessor
        cudaDevAttrAsyncEngineCount
        cudaDevAttrUnifiedAddressing
        cudaDevAttrMaxTexture1DLayeredWidth
        cudaDevAttrMaxTexture1DLayeredLayers  # = 43; 44 is missing
        cudaDevAttrMaxTexture2DGatherWidth = 45
        cudaDevAttrMaxTexture2DGatherHeight
        cudaDevAttrMaxTexture3DWidthAlt
        cudaDevAttrMaxTexture3DHeightAlt
        cudaDevAttrMaxTexture3DDepthAlt
        cudaDevAttrPciDomainId
        cudaDevAttrTexturePitchAlignment
        cudaDevAttrMaxTextureCubemapWidth
        cudaDevAttrMaxTextureCubemapLayeredWidth
        cudaDevAttrMaxTextureCubemapLayeredLayers
        cudaDevAttrMaxSurface1DWidth
        cudaDevAttrMaxSurface2DWidth
        cudaDevAttrMaxSurface2DHeight
        cudaDevAttrMaxSurface3DWidth
        cudaDevAttrMaxSurface3DHeight
        cudaDevAttrMaxSurface3DDepth
        cudaDevAttrMaxSurface1DLayeredWidth
        cudaDevAttrMaxSurface1DLayeredLayers
        cudaDevAttrMaxSurface2DLayeredWidth
        cudaDevAttrMaxSurface2DLayeredHeight
        cudaDevAttrMaxSurface2DLayeredLayers
        cudaDevAttrMaxSurfaceCubemapWidth
        cudaDevAttrMaxSurfaceCubemapLayeredWidth
        cudaDevAttrMaxSurfaceCubemapLayeredLayers
        cudaDevAttrMaxTexture1DLinearWidth
        cudaDevAttrMaxTexture2DLinearWidth
        cudaDevAttrMaxTexture2DLinearHeight
        cudaDevAttrMaxTexture2DLinearPitch
        cudaDevAttrMaxTexture2DMipmappedWidth
        cudaDevAttrMaxTexture2DMipmappedHeight
        # The following are exposed as "deviceAttributeCo..."
        # cudaDevAttrComputeCapabilityMajor  # = 75
        # cudaDevAttrComputeCapabilityMinor  # = 76
        cudaDevAttrMaxTexture1DMipmappedWidth = 77
        cudaDevAttrStreamPrioritiesSupported
        cudaDevAttrGlobalL1CacheSupported
        cudaDevAttrLocalL1CacheSupported
        cudaDevAttrMaxSharedMemoryPerMultiprocessor
        cudaDevAttrMaxRegistersPerMultiprocessor
        cudaDevAttrManagedMemory
        cudaDevAttrIsMultiGpuBoard
        cudaDevAttrMultiGpuBoardGroupID
        cudaDevAttrHostNativeAtomicSupported
        cudaDevAttrSingleToDoublePrecisionPerfRatio
        cudaDevAttrPageableMemoryAccess
        cudaDevAttrConcurrentManagedAccess
        cudaDevAttrComputePreemptionSupported
        cudaDevAttrCanUseHostPointerForRegisteredMem
        cudaDevAttrReserved92
        cudaDevAttrReserved93
        cudaDevAttrReserved94
        cudaDevAttrCooperativeLaunch
        cudaDevAttrCooperativeMultiDeviceLaunch
        cudaDevAttrMaxSharedMemoryPerBlockOptin
        cudaDevAttrCanFlushRemoteWrites
        cudaDevAttrHostRegisterSupported
        cudaDevAttrPageableMemoryAccessUsesHostPageTables
        cudaDevAttrDirectManagedMemAccessFromHost  # = 101
        # added since CUDA 11.0
        cudaDevAttrMaxBlocksPerMultiprocessor = 106
        cudaDevAttrReservedSharedMemoryPerBlock = 111
        # added since CUDA 11.1
        cudaDevAttrSparseCudaArraySupported = 112
        cudaDevAttrHostRegisterReadOnlySupported = 113
        # added since CUDA 11.2
        cudaDevAttrMaxTimelineSemaphoreInteropSupported = 114
        cudaDevAttrMemoryPoolsSupported = 115
        # added since CUDA 11.3
        cudaDevAttrGPUDirectRDMASupported
        cudaDevAttrGPUDirectRDMAFlushWritesOptions
        cudaDevAttrGPUDirectRDMAWritesOrdering
        cudaDevAttrMemoryPoolSupportedHandleTypes