# distutils: language = c++ """Thin wrapper of CUDA Driver API. There are four differences compared to the original C API. 1. Not all functions are ported. 2. Errors are translated into CUDADriverError exceptions. 3. The 'cu' prefix of each API is omitted and the next character is set to lower case. 4. The resulting values are returned directly instead of references. """ cimport cython # NOQA from libc.stdint cimport intptr_t ############################################################################### # Extern and Constants ############################################################################### IF CUPY_USE_CUDA_PYTHON: from cuda.ccuda cimport * ELSE: include '_driver_extern.pxi' pass # for cython-lint cdef extern from '../../cupy_backend.h' nogil: # Build-time version # Note: CUDA_VERSION is defined either in CUDA Python or _driver_extern.pxi enum: HIP_VERSION # Provide access to constants from Python. from cupy_backends.cuda.api._driver_enum import * ############################################################################### # Error handling ############################################################################### class CUDADriverError(RuntimeError): def __init__(self, Result status): self.status = status cdef const char *name cdef const char *msg cuGetErrorName(status, &name) cuGetErrorString(status, &msg) cdef bytes s_name = name, s_msg = msg super(CUDADriverError, self).__init__( '%s: %s' % (s_name.decode(), s_msg.decode())) def __reduce__(self): return (type(self), (self.status,)) @cython.profile(False) cpdef inline check_status(int status): if status != 0: raise CUDADriverError(status) @cython.profile(False) cdef inline void check_attribute_status(int status, int* pi) except *: # set attribute to -1 on older versions of CUDA where it was undefined if status == CUDA_ERROR_INVALID_VALUE: pi[0] = -1 elif status != 0: raise CUDADriverError(status) ############################################################################### # Build-time version ############################################################################### cpdef get_build_version(): """Returns the CUDA_VERSION / HIP_VERSION constant. Note that when built with CUDA Python support, CUDA_VERSION will become a constant: https://github.com/NVIDIA/cuda-python/blob/v11.4.0/cuda/ccuda.pxd#L2268 In CuPy codebase, use `runtime.runtimeGetVersion()` instead of this function to change the behavior based on the target CUDA version. """ # The versions are mutually exclusive if CUPY_CUDA_VERSION > 0: return CUDA_VERSION elif CUPY_HIP_VERSION > 0: return HIP_VERSION else: return 0 cpdef bint _is_cuda_python(): return CUPY_USE_CUDA_PYTHON ############################################################################### # Primary context management ############################################################################### cpdef devicePrimaryCtxRelease(Device dev): with nogil: status = cuDevicePrimaryCtxRelease(dev) check_status(status) ############################################################################### # Context management ############################################################################### cpdef intptr_t ctxGetCurrent() except? 0: cdef Context ctx with nogil: status = cuCtxGetCurrent(&ctx) check_status(status) return ctx cpdef ctxSetCurrent(intptr_t ctx): with nogil: status = cuCtxSetCurrent(ctx) check_status(status) cpdef intptr_t ctxCreate(Device dev) except? 0: cdef Context ctx cdef unsigned int flags = 0 with nogil: status = cuCtxCreate(&ctx, flags, dev) check_status(status) return ctx cpdef ctxDestroy(intptr_t ctx): with nogil: status = cuCtxDestroy(ctx) check_status(status) cpdef int ctxGetDevice() except? -1: cdef Device dev with nogil: status = cuCtxGetDevice(&dev) check_status(status) return dev ############################################################################### # Module load and kernel execution ############################################################################### cpdef intptr_t linkCreate() except? 0: cdef LinkState state with nogil: status = cuLinkCreate(0, 0, 0, &state) check_status(status) return state cpdef linkAddData(intptr_t state, int input_type, bytes data, unicode name): cdef const char* data_ptr = data cdef size_t data_size = len(data) + 1 cdef bytes b_name = name.encode() cdef const char* b_name_ptr = b_name with nogil: status = cuLinkAddData( state, input_type, data_ptr, data_size, b_name_ptr, 0, 0, 0) check_status(status) cpdef linkAddFile(intptr_t state, int input_type, unicode path): cdef bytes b_path = path.encode() cdef const char* b_path_ptr = b_path with nogil: status = cuLinkAddFile(state, input_type, b_path_ptr, 0, 0, 0) check_status(status) cpdef bytes linkComplete(intptr_t state): cdef void* cubinOut cdef size_t sizeOut with nogil: status = cuLinkComplete(state, &cubinOut, &sizeOut) check_status(status) return bytes((cubinOut)[:sizeOut]) cpdef linkDestroy(intptr_t state): with nogil: status = cuLinkDestroy(state) check_status(status) cpdef intptr_t moduleLoad(str filename) except? 0: cdef Module module cdef bytes b_filename = filename.encode() cdef char* b_filename_ptr = b_filename with nogil: status = cuModuleLoad(&module, b_filename_ptr) check_status(status) return module cpdef intptr_t moduleLoadData(bytes image) except? 0: cdef Module module cdef char* image_ptr = image with nogil: status = cuModuleLoadData(&module, image_ptr) check_status(status) return module cpdef moduleUnload(intptr_t module): with nogil: status = cuModuleUnload(module) check_status(status) cpdef intptr_t moduleGetFunction(intptr_t module, str funcname) except? 0: cdef Function func cdef bytes b_funcname = funcname.encode() cdef char* b_funcname_ptr = b_funcname with nogil: status = cuModuleGetFunction(&func, module, b_funcname_ptr) check_status(status) return func cpdef intptr_t moduleGetGlobal(intptr_t module, str varname) except? 0: cdef Deviceptr var cdef size_t size cdef bytes b_varname = varname.encode() cdef char* b_varname_ptr = b_varname with nogil: status = cuModuleGetGlobal(&var, &size, module, b_varname_ptr) check_status(status) return var cpdef launchKernel( intptr_t f, unsigned int grid_dim_x, unsigned int grid_dim_y, unsigned int grid_dim_z, unsigned int block_dim_x, unsigned int block_dim_y, unsigned int block_dim_z, unsigned int shared_mem_bytes, intptr_t stream, intptr_t kernel_params, intptr_t extra): with nogil: status = cuLaunchKernel( f, grid_dim_x, grid_dim_y, grid_dim_z, block_dim_x, block_dim_y, block_dim_z, shared_mem_bytes, stream, kernel_params, extra) check_status(status) cpdef launchCooperativeKernel( intptr_t f, unsigned int grid_dim_x, unsigned int grid_dim_y, unsigned int grid_dim_z, unsigned int block_dim_x, unsigned int block_dim_y, unsigned int block_dim_z, unsigned int shared_mem_bytes, intptr_t stream, intptr_t kernel_params): with nogil: status = cuLaunchCooperativeKernel( f, grid_dim_x, grid_dim_y, grid_dim_z, block_dim_x, block_dim_y, block_dim_z, shared_mem_bytes, stream, kernel_params) check_status(status) ############################################################################### # Function attributes ############################################################################### # -1 is reserved by check_attribute_status cpdef int funcGetAttribute(int attribute, intptr_t f) except? -2: cdef int pi with nogil: status = cuFuncGetAttribute( &pi, attribute, f) check_attribute_status(status, &pi) return pi cpdef funcSetAttribute(intptr_t f, int attribute, int value): with nogil: status = cuFuncSetAttribute( f, attribute, value) check_status(status) ############################################################################### # Occupancy ############################################################################### cpdef int occupancyMaxActiveBlocksPerMultiprocessor( intptr_t func, int blockSize, size_t dynamicSMemSize): cdef int numBlocks with nogil: status = cuOccupancyMaxActiveBlocksPerMultiprocessor( &numBlocks, func, blockSize, dynamicSMemSize) check_status(status) return numBlocks cpdef occupancyMaxPotentialBlockSize(intptr_t func, size_t dynamicSMemSize, int blockSizeLimit): # CUoccupancyB2DSize is set to NULL as there is no way to pass in a # unary function from Python. cdef int minGridSize, blockSize with nogil: status = cuOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, func, NULL, dynamicSMemSize, blockSizeLimit) check_status(status) return minGridSize, blockSize ############################################################################### # Stream management ############################################################################### cpdef intptr_t streamGetCtx(intptr_t stream) except? 0: cdef Context ctx with nogil: status = cuStreamGetCtx(stream, &ctx) check_status(status) return ctx