cupy_hip_runtime.h 13 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
#ifndef INCLUDE_GUARD_HIP_CUPY_RUNTIME_H
#define INCLUDE_GUARD_HIP_CUPY_RUNTIME_H

#include <hip/hip_runtime_api.h>
#include "cupy_hip_common.h"

extern "C" {

bool hip_environment = true;

// Error handling
const char* cudaGetErrorName(cudaError_t hipError) {
    return hipGetErrorName(hipError);
}

const char* cudaGetErrorString(cudaError_t hipError) {
    return hipGetErrorString(hipError);
}

cudaError_t cudaGetLastError() {
    return hipGetLastError();
}


// Initialization
cudaError_t cudaDriverGetVersion(int *driverVersion) {
    return hipDriverGetVersion(driverVersion);
}

cudaError_t cudaRuntimeGetVersion(int *runtimeVersion) {
    return hipRuntimeGetVersion(runtimeVersion);
}


// CUdevice operations
cudaError_t cudaGetDevice(int *deviceId) {
    return hipGetDevice(deviceId);
}

cudaError_t cudaDeviceGetAttribute(int* pi, cudaDeviceAttr attr,
                                   int deviceId) {
    return hipDeviceGetAttribute(pi, attr, deviceId);
}

cudaError_t cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) {
    return hipDeviceGetByPCIBusId(device, pciBusId);
}

cudaError_t cudaDeviceGetPCIBusId(char *pciBusId, int len, int device) {
    return hipDeviceGetPCIBusId(pciBusId, len, device);
}

cudaError_t cudaGetDeviceCount(int *count) {
    return hipGetDeviceCount(count);
}

cudaError_t cudaSetDevice(int deviceId) {
    return hipSetDevice(deviceId);
}

cudaError_t cudaDeviceSynchronize() {
    return hipDeviceSynchronize();
}

cudaError_t cudaDeviceCanAccessPeer(int* canAccessPeer, int deviceId,
                                    int peerDeviceId) {
    return hipDeviceCanAccessPeer(canAccessPeer, deviceId, peerDeviceId);
}

cudaError_t cudaDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
    return hipDeviceEnablePeerAccess(peerDeviceId, flags);
}

cudaError_t cudaDeviceDisablePeerAccess(int peerDeviceId) {
    return hipDeviceDisablePeerAccess(peerDeviceId);
}

cudaError_t cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) {
    return hipDeviceGetLimit(pValue, limit);
}

cudaError_t cudaDeviceSetLimit(cudaLimit limit, size_t value) {
    // see https://github.com/ROCm-Developer-Tools/HIP/issues/1632
    return hipErrorUnknown;
}

// IPC operations
cudaError_t cudaIpcCloseMemHandle(void* devPtr) {
    return hipIpcCloseMemHandle(devPtr);
}

cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) {
    return hipErrorUnknown;

    // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released;
    // as of ROCm 3.5.0 it is still not supported
    //return hipIpcGetEventHandle(handle, event);
}

cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) {
    return hipIpcGetMemHandle(handle, devPtr);
}

cudaError_t cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) {
    return hipErrorUnknown;

    // TODO(leofang): this is supported after ROCm-Developer-Tools/HIP#1996 is released;
    // as of ROCm 3.5.0 it is still not supported
    //return hipIpcOpenEventHandle(event, handle);
}

cudaError_t cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) {
    return hipIpcOpenMemHandle(devPtr, handle, flags);
}

// Memory management
enum cudaMemAllocationType {};  // stub
enum cudaMemAllocationHandleType {};  // stub
enum cudaMemLocationType {};  // stub
struct cudaMemLocation {  // stub
    int id;
    cudaMemLocationType type;
};
struct cudaMemPoolProps {  // stub
    cudaMemAllocationType allocType;
    cudaMemAllocationHandleType handleTypes;
    struct cudaMemLocation location;
    unsigned char reserved[64];
    void* win32SecurityAttributes;
};

cudaError_t cudaMalloc(void** ptr, size_t size) {
    return hipMalloc(ptr, size);
}

cudaError_t cudaMalloc3DArray(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMallocArray(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMallocAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaHostAlloc(void** ptr, size_t size, unsigned int flags) {
    return hipHostMalloc(ptr, size, flags);
}

cudaError_t cudaHostRegister(...) {
    return hipErrorUnknown;
}

cudaError_t cudaHostUnregister(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMallocManaged(void** ptr, size_t size, unsigned int flags) {
#if HIP_VERSION >= 40300000
    return hipMallocManaged(ptr, size, flags);
#else
    return hipErrorUnknown;
#endif
}

int cudaFree(void* ptr) {
    return hipFree(ptr);
}

cudaError_t cudaFreeArray(...) {
    return hipErrorUnknown;
}

cudaError_t cudaFreeHost(void* ptr) {
    return hipHostFree(ptr);
}

cudaError_t cudaFreeAsync(...) {
    return hipErrorUnknown;
}

int cudaMemGetInfo(size_t* free, size_t* total) {
    return hipMemGetInfo(free, total);
}

cudaError_t cudaMemcpy(void* dst, const void* src, size_t sizeBytes,
                       hipMemcpyKind kind) {
    return hipMemcpy(dst, src, sizeBytes, kind);
}

cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
                            cudaMemcpyKind kind, cudaStream_t stream) {
    return hipMemcpyAsync(dst, src, sizeBytes, kind, stream);
}

cudaError_t cudaMemcpyPeer(void* dst, int dstDeviceId, const void* src,
                           int srcDeviceId, size_t sizeBytes) {
    return hipMemcpyPeer(dst, dstDeviceId, src, srcDeviceId, sizeBytes);
}

cudaError_t cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src,
                                int srcDevice, size_t sizeBytes,
                                cudaStream_t stream) {
    return hipMemcpyPeerAsync(dst, dstDevice, src, srcDevice, sizeBytes,
                              stream);
}

cudaError_t cudaMemcpy2D(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy2DAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy2DFromArray(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy2DFromArrayAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy2DToArray(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy2DToArrayAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy3D(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemcpy3DAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemset(void* dst, int value, size_t sizeBytes) {
    return hipMemset(dst, value, sizeBytes);
}

cudaError_t cudaMemsetAsync(void* dst, int value, size_t sizeBytes,
                            cudaStream_t stream) {
    return hipMemsetAsync(dst, value, sizeBytes, stream);
}

cudaError_t cudaMemAdvise(const void *devPtr, size_t count,
                          cudaMemoryAdvise advice, int device) {
#if HIP_VERSION >= 40300000
    return hipMemAdvise(devPtr, count, advice, device);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaMemPrefetchAsync(const void *devPtr, size_t count,
				 int dstDevice, cudaStream_t stream) {
#if HIP_VERSION >= 40300000
    return hipMemPrefetchAsync(devPtr, count, dstDevice, stream);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaPointerGetAttributes(cudaPointerAttributes *attributes,
                                     const void* ptr) {
    cudaError_t status = hipPointerGetAttributes(attributes, ptr);
    if (status == cudaSuccess) {
        switch (attributes->memoryType) {
            case 0 /* hipMemoryTypeHost */:
                attributes->memoryType = (hipMemoryType)1; /* cudaMemoryTypeHost */
                return status;
            case 1 /* hipMemoryTypeDevice */:
                attributes->memoryType = (hipMemoryType)2; /* cudaMemoryTypeDevice */
                return status;
            default:
                /* we don't care the rest of possibilities */
                return status;
        }
    } else {
        return status;
    }
}

cudaError_t cudaGetDeviceProperties(cudaDeviceProp *prop, int device) {
    return hipGetDeviceProperties(prop, device);
}

cudaError_t cudaMallocFromPoolAsync(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemPoolCreate(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemPoolDestroy(...) {
    return hipErrorUnknown;
}

cudaError_t cudaDeviceGetDefaultMemPool(...) {
    return hipErrorUnknown;
}

cudaError_t cudaDeviceGetMemPool(...) {
    return hipErrorUnknown;
}

cudaError_t cudaDeviceSetMemPool(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemPoolTrimTo(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemPoolGetAttribute(...) {
    return hipErrorUnknown;
}

cudaError_t cudaMemPoolSetAttribute(...) {
    return hipErrorUnknown;
}


// Stream and Event
#if HIP_VERSION >= 40300000
typedef hipStreamCaptureMode cudaStreamCaptureMode;
typedef hipStreamCaptureStatus cudaStreamCaptureStatus;
#else
enum cudaStreamCaptureMode {};
enum cudaStreamCaptureStatus {};
#endif

cudaError_t cudaStreamCreate(cudaStream_t *stream) {
    return hipStreamCreate(stream);
}

cudaError_t cudaStreamCreateWithFlags(cudaStream_t *stream,
                                      unsigned int flags) {
    return hipStreamCreateWithFlags(stream, flags);
}

cudaError_t cudaStreamDestroy(cudaStream_t stream) {
    return hipStreamDestroy(stream);
}

cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
    return hipStreamSynchronize(stream);
}

cudaError_t cudaStreamAddCallback(cudaStream_t stream,
                                  cudaStreamCallback_t callback,
                                  void *userData, unsigned int flags) {
    return hipStreamAddCallback(stream, callback, userData, flags);
}

cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) {
    return hipErrorUnknown;
}

cudaError_t cudaStreamQuery(cudaStream_t stream) {
    return hipStreamQuery(stream);
}

cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event,
                                unsigned int flags) {
    return hipStreamWaitEvent(stream, event, flags);
}

cudaError_t cudaEventCreate(cudaEvent_t* event) {
    return hipEventCreate(event);
}

cudaError_t cudaEventCreateWithFlags(cudaEvent_t* event, unsigned flags) {
    return hipEventCreateWithFlags(event, flags);
}

cudaError_t cudaEventDestroy(cudaEvent_t event) {
    return hipEventDestroy(event);
}

cudaError_t cudaEventElapsedTime(float *ms, cudaEvent_t start,
                                 cudaEvent_t stop){
    return hipEventElapsedTime(ms, start, stop);
}

cudaError_t cudaEventQuery(cudaEvent_t event) {
    return hipEventQuery(event);
}

cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream) {
    return hipEventRecord(event, stream);
}

cudaError_t cudaEventSynchronize(cudaEvent_t event) {
    return hipEventSynchronize(event);
}

cudaError_t cudaStreamBeginCapture(cudaStream_t stream,
                                   cudaStreamCaptureMode mode) {
#if HIP_VERSION >= 40300000
    return hipStreamBeginCapture(stream, mode);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) {
#if HIP_VERSION >= 40300000
    return hipStreamEndCapture(stream, pGraph);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaStreamIsCapturing(cudaStream_t stream,
                                  cudaStreamCaptureStatus* pCaptureStatus) {
#if HIP_VERSION >= 50000000
    return hipStreamIsCapturing(stream, pCaptureStatus);
#else
    return hipErrorUnknown;
#endif
}


// Texture
cudaError_t cudaCreateTextureObject(...) {
    return cudaSuccess;
}

cudaError_t cudaDestroyTextureObject(...) {
    return cudaSuccess;
}

cudaError_t cudaGetChannelDesc(...) {
    return cudaSuccess;
}

cudaError_t cudaGetTextureObjectResourceDesc(...) {
    return cudaSuccess;
}

cudaError_t cudaGetTextureObjectTextureDesc(...) {
    return cudaSuccess;
}

cudaExtent make_cudaExtent(...) {
    cudaExtent ex = {};
    return ex;
}

cudaPitchedPtr make_cudaPitchedPtr(...) {
    cudaPitchedPtr ptr = {};
    return ptr;
}

cudaPos make_cudaPos(...) {
    cudaPos pos = {};
    return pos;
}

// Surface
cudaError_t cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject,
                                    const cudaResourceDesc* pResDesc) {
    return hipCreateSurfaceObject(pSurfObject, pResDesc);
}

cudaError_t cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) {
    return hipDestroySurfaceObject(surfObject);
}

// CUDA Graph
cudaError_t cudaGraphInstantiate(
	cudaGraphExec_t* pGraphExec,
	cudaGraph_t graph,
	cudaGraphNode_t* pErrorNode,
	char* pLogBuffer,
	size_t bufferSize) {
#if HIP_VERSION >= 40300000
    return hipGraphInstantiate(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaGraphDestroy(cudaGraph_t graph) {
#if HIP_VERSION >= 40300000
    return hipGraphDestroy(graph);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaGraphExecDestroy(cudaGraphExec_t graphExec) {
#if HIP_VERSION >= 40300000
    return hipGraphExecDestroy(graphExec);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) {
#if HIP_VERSION >= 40300000
    return hipGraphLaunch(graphExec, stream);
#else
    return hipErrorUnknown;
#endif
}

cudaError_t cudaGraphUpload(...) {
    return hipErrorUnknown;
}

} // extern "C"

#endif // #ifndef INCLUDE_GUARD_HIP_CUPY_RUNTIME_H