# distutils: language = c++ """Thin wrapper of NVRTC API. There are four differences compared to the original C API. 1. Not all functions are ported. 2. Errors are translated into NVRTCError exceptions. 3. The 'nvrtc' prefix of each API is omitted and the next character is set to lower case. 4. The resulting values are returned directly instead of references. """ import sys as _sys # no-cython-lint cimport cython # NOQA from libcpp cimport vector from cupy_backends.cuda.api cimport runtime from cupy_backends.cuda._softlink cimport SoftLink ############################################################################### # Extern ############################################################################### IF CUPY_USE_CUDA_PYTHON: from cuda.cnvrtc cimport * ELSE: cdef extern from '../../cupy_rtc.h' nogil: const char *nvrtcGetErrorString(Result result) int nvrtcVersion(int *major, int *minor) int nvrtcCreateProgram( Program* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames) int nvrtcDestroyProgram(Program *prog) int nvrtcCompileProgram(Program prog, int numOptions, const char** options) int nvrtcGetPTXSize(Program prog, size_t *ptxSizeRet) int nvrtcGetPTX(Program prog, char *ptx) int nvrtcGetCUBINSize(Program prog, size_t *cubinSizeRet) int nvrtcGetCUBIN(Program prog, char *cubin) int nvrtcGetProgramLogSize(Program prog, size_t* logSizeRet) int nvrtcGetProgramLog(Program prog, char* log) int nvrtcAddNameExpression(Program, const char*) int nvrtcGetLoweredName(Program, const char*, const char**) ctypedef int (*f_type)(...) nogil # NOQA IF 11020 <= CUPY_CUDA_VERSION < 12000: if _sys.platform == 'linux': _libname = 'libnvrtc.so.11.2' else: _libname = 'nvrtc64_112_0.dll' ELIF 12000 <= CUPY_CUDA_VERSION < 13000: if _sys.platform == 'linux': _libname = 'libnvrtc.so.12' else: _libname = 'nvrtc64_120_0.dll' ELSE: _libname = None cdef SoftLink _lib = SoftLink(_libname, 'nvrtc') # APIs added after CUDA 11.2+. cdef f_type nvrtcGetNumSupportedArchs = _lib.get('GetNumSupportedArchs') # NOQA cdef f_type nvrtcGetSupportedArchs = _lib.get('GetSupportedArchs') # NOQA cdef f_type nvrtcGetNVVMSize = _lib.get('GetNVVMSize') cdef f_type nvrtcGetNVVM = _lib.get('GetNVVM') ############################################################################### # Error handling ############################################################################### class NVRTCError(RuntimeError): def __init__(self, status): self.status = status cdef bytes msg = nvrtcGetErrorString(status) super(NVRTCError, self).__init__( '{} ({})'.format(msg.decode(), status)) def __reduce__(self): return (type(self), (self.status,)) @cython.profile(False) cpdef inline check_status(int status): if status != 0: raise NVRTCError(status) cpdef tuple getVersion(): cdef int major, minor with nogil: status = nvrtcVersion(&major, &minor) check_status(status) return major, minor cpdef tuple getSupportedArchs(): cdef int status, num_archs cdef vector.vector[int] archs if runtime._is_hip_environment: raise RuntimeError("HIP does not support getSupportedArchs") if runtime.runtimeGetVersion() < 11020: raise RuntimeError("getSupportedArchs is supported since CUDA 11.2") with nogil: status = nvrtcGetNumSupportedArchs(&num_archs) if status == 0: archs.resize(num_archs) status = nvrtcGetSupportedArchs(archs.data()) check_status(status) return tuple(archs) ############################################################################### # Program ############################################################################### cpdef intptr_t createProgram(unicode src, unicode name, headers, include_names) except? 0: cdef Program prog cdef bytes b_src = src.encode() cdef const char* src_ptr = b_src cdef bytes b_name = name.encode() cdef const char* name_ptr if len(name) > 0: name_ptr = b_name else: name_ptr = NULL cdef int num_headers = len(headers) cdef vector.vector[const char*] header_vec cdef vector.vector[const char*] include_name_vec cdef const char** header_vec_ptr = NULL cdef const char** include_name_vec_ptr = NULL assert num_headers == len(include_names) for i in headers: header_vec.push_back(i) for i in include_names: include_name_vec.push_back(i) if num_headers > 0: header_vec_ptr = header_vec.data() include_name_vec_ptr = include_name_vec.data() with nogil: status = nvrtcCreateProgram( &prog, src_ptr, name_ptr, num_headers, header_vec_ptr, include_name_vec_ptr) check_status(status) return prog cpdef destroyProgram(intptr_t prog): cdef Program p = prog with nogil: status = nvrtcDestroyProgram(&p) check_status(status) cpdef compileProgram(intptr_t prog, options): cdef int option_num = len(options) cdef vector.vector[const char*] option_vec cdef option_list = [opt.encode() for opt in options] cdef const char** option_vec_ptr = NULL for i in option_list: option_vec.push_back(i) if option_num > 0: option_vec_ptr = option_vec.data() with nogil: status = nvrtcCompileProgram(prog, option_num, option_vec_ptr) check_status(status) cpdef bytes getPTX(intptr_t prog): cdef size_t ptxSizeRet cdef vector.vector[char] ptx cdef char* ptx_ptr = NULL with nogil: status = nvrtcGetPTXSize(prog, &ptxSizeRet) check_status(status) if ptxSizeRet == 0: return b'' ptx.resize(ptxSizeRet) ptx_ptr = ptx.data() with nogil: status = nvrtcGetPTX(prog, ptx_ptr) check_status(status) # Strip the trailing NULL. return ptx_ptr[:ptxSizeRet-1] cpdef bytes getCUBIN(intptr_t prog): cdef size_t cubinSizeRet = 0 cdef vector.vector[char] cubin cdef char* cubin_ptr = NULL if runtime._is_hip_environment: raise RuntimeError("HIP does not support getCUBIN") if runtime.runtimeGetVersion() < 11010: raise RuntimeError("getCUBIN is supported since CUDA 11.1") with nogil: status = nvrtcGetCUBINSize(prog, &cubinSizeRet) check_status(status) if cubinSizeRet <= 1: # On CUDA 11.1, cubinSizeRet=1 if -arch=compute_XX is used, but the # spec says it should be 0 in this case... raise RuntimeError('cubin is requested, but the real arch (sm_XX) is ' 'not provided') cubin.resize(cubinSizeRet) cubin_ptr = cubin.data() with nogil: status = nvrtcGetCUBIN(prog, cubin_ptr) check_status(status) # Strip the trailing NULL. return cubin_ptr[:cubinSizeRet-1] cpdef bytes getNVVM(intptr_t prog): if runtime._is_hip_environment: raise RuntimeError("HIP does not support getNVVM") if runtime.runtimeGetVersion() < 11040: raise RuntimeError("getNVVM is supported since CUDA 11.4") cdef size_t nvvmSizeRet = 0 cdef vector.vector[char] nvvm cdef char* nvvm_ptr = NULL with nogil: status = nvrtcGetNVVMSize(prog, &nvvmSizeRet) check_status(status) nvvm.resize(nvvmSizeRet) nvvm_ptr = nvvm.data() with nogil: status = nvrtcGetNVVM(prog, nvvm_ptr) check_status(status) # Strip the trailing NULL. return nvvm_ptr[:nvvmSizeRet-1] cpdef unicode getProgramLog(intptr_t prog): cdef size_t logSizeRet cdef vector.vector[char] log cdef char* log_ptr = NULL with nogil: status = nvrtcGetProgramLogSize(prog, &logSizeRet) check_status(status) if logSizeRet == 0: return '' log.resize(logSizeRet) log_ptr = log.data() with nogil: status = nvrtcGetProgramLog(prog, log_ptr) check_status(status) # Strip the trailing NULL. return log_ptr[:logSizeRet-1].decode('UTF-8') cpdef addNameExpression(intptr_t prog, str name): cdef bytes b_name = name.encode() cdef const char* c_name = b_name with nogil: status = nvrtcAddNameExpression(prog, c_name) check_status(status) cpdef str getLoweredName(intptr_t prog, str name): cdef bytes b_name = name.encode() cdef const char* c_name = b_name cdef const char* mangled_name with nogil: status = nvrtcGetLoweredName(prog, c_name, &mangled_name) check_status(status) b_name = mangled_name return b_name.decode('UTF-8')