# distutils: language = c++ """Thin wrapper of CUBLAS.""" cimport cython # NOQA from cupy_backends.cuda.api cimport runtime from cupy_backends.cuda cimport stream as stream_module ############################################################################### # Extern ############################################################################### cdef extern from '../../cupy_complex.h': ctypedef struct cuComplex 'cuComplex': float x, y ctypedef struct cuDoubleComplex 'cuDoubleComplex': double x, y cdef extern from '../../cupy_blas.h' nogil: ctypedef void* Stream 'cudaStream_t' ctypedef int DataType 'cudaDataType' # Context int cublasCreate(Handle* handle) int cublasDestroy(Handle handle) int cublasGetVersion(Handle handle, int* version) int cublasGetPointerMode(Handle handle, PointerMode* mode) int cublasSetPointerMode(Handle handle, PointerMode mode) # Stream int cublasSetStream(Handle handle, Stream streamId) int cublasGetStream(Handle handle, Stream* streamId) # Math Mode int cublasSetMathMode(Handle handle, Math mode) int cublasGetMathMode(Handle handle, Math* mode) # BLAS Level 1 int cublasIsamax(Handle handle, int n, float* x, int incx, int* result) int cublasIdamax(Handle handle, int n, double* x, int incx, int* result) int cublasIcamax(Handle handle, int n, cuComplex* x, int incx, int* result) int cublasIzamax(Handle handle, int n, cuDoubleComplex* x, int incx, int* result) int cublasIsamin(Handle handle, int n, float* x, int incx, int* result) int cublasIdamin(Handle handle, int n, double* x, int incx, int* result) int cublasIcamin(Handle handle, int n, cuComplex* x, int incx, int* result) int cublasIzamin(Handle handle, int n, cuDoubleComplex* x, int incx, int* result) int cublasSasum(Handle handle, int n, float* x, int incx, float* result) int cublasDasum(Handle handle, int n, double* x, int incx, double* result) int cublasScasum(Handle handle, int n, cuComplex* x, int incx, float* result) int cublasDzasum(Handle handle, int n, cuDoubleComplex* x, int incx, double* result) int cublasSaxpy(Handle handle, int n, float* alpha, float* x, int incx, float* y, int incy) int cublasDaxpy(Handle handle, int n, double* alpha, double* x, int incx, double* y, int incy) int cublasCaxpy(Handle handle, int n, cuComplex* alpha, cuComplex* x, int incx, cuComplex* y, int incy) int cublasZaxpy(Handle handle, int n, cuDoubleComplex* alpha, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy) int cublasSdot(Handle handle, int n, float* x, int incx, float* y, int incy, float* result) int cublasDdot(Handle handle, int n, double* x, int incx, double* y, int incy, double* result) int cublasCdotu(Handle handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, cuComplex* result) int cublasCdotc(Handle handle, int n, cuComplex* x, int incx, cuComplex* y, int incy, cuComplex* result) int cublasZdotu(Handle handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, cuDoubleComplex* result) int cublasZdotc(Handle handle, int n, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, cuDoubleComplex* result) int cublasSnrm2(Handle handle, int n, float* x, int incx, float* result) int cublasDnrm2(Handle handle, int n, double* x, int incx, double* result) int cublasScnrm2(Handle handle, int n, cuComplex* x, int incx, float* result) int cublasDznrm2(Handle handle, int n, cuDoubleComplex* x, int incx, double* result) int cublasSscal(Handle handle, int n, float* alpha, float* x, int incx) int cublasDscal(Handle handle, int n, double* alpha, double* x, int incx) int cublasCscal(Handle handle, int n, cuComplex* alpha, cuComplex* x, int incx) int cublasCsscal(Handle handle, int n, float* alpha, cuComplex* x, int incx) int cublasZscal(Handle handle, int n, cuDoubleComplex* alpha, cuDoubleComplex* x, int incx) int cublasZdscal(Handle handle, int n, double* alpha, cuDoubleComplex* x, int incx) # BLAS Level 2 int cublasSgemv( Handle handle, Operation trans, int m, int n, float* alpha, float* A, int lda, float* x, int incx, float* beta, float* y, int incy) int cublasDgemv( Handle handle, Operation trans, int m, int n, double* alpha, double* A, int lda, double* x, int incx, double* beta, double* y, int incy) int cublasCgemv( Handle handle, Operation trans, int m, int n, cuComplex* alpha, cuComplex* A, int lda, cuComplex* x, int incx, cuComplex* beta, cuComplex* y, int incy) int cublasZgemv( Handle handle, Operation trans, int m, int n, cuDoubleComplex* alpha, cuDoubleComplex* A, int lda, cuDoubleComplex* x, int incx, cuDoubleComplex* beta, cuDoubleComplex* y, int incy) int cublasSger( Handle handle, int m, int n, float* alpha, float* x, int incx, float* y, int incy, float* A, int lda) int cublasDger( Handle handle, int m, int n, double* alpha, double* x, int incx, double* y, int incy, double* A, int lda) int cublasCgeru( Handle handle, int m, int n, cuComplex* alpha, cuComplex* x, int incx, cuComplex* y, int incy, cuComplex* A, int lda) int cublasCgerc( Handle handle, int m, int n, cuComplex* alpha, cuComplex* x, int incx, cuComplex* y, int incy, cuComplex* A, int lda) int cublasZgeru( Handle handle, int m, int n, cuDoubleComplex* alpha, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda) int cublasZgerc( Handle handle, int m, int n, cuDoubleComplex* alpha, cuDoubleComplex* x, int incx, cuDoubleComplex* y, int incy, cuDoubleComplex* A, int lda) int cublasSsbmv( Handle handle, FillMode uplo, int n, int k, const float* alpha, const float* A, int lda, const float* x, int incx, const float* beta, float* y, int incy) int cublasDsbmv( Handle handle, FillMode uplo, int n, int k, const double* alpha, const double* A, int lda, const double* x, int incx, const double* beta, double* y, int incy) # BLAS Level 3 int cublasSgemm( Handle handle, Operation transa, Operation transb, int m, int n, int k, float* alpha, float* A, int lda, float* B, int ldb, float* beta, float* C, int ldc) int cublasDgemm( Handle handle, Operation transa, Operation transb, int m, int n, int k, double* alpha, double* A, int lda, double* B, int ldb, double* beta, double* C, int ldc) int cublasCgemm( Handle handle, Operation transa, Operation transb, int m, int n, int k, cuComplex* alpha, cuComplex* A, int lda, cuComplex* B, int ldb, cuComplex* beta, cuComplex* C, int ldc) int cublasZgemm( Handle handle, Operation transa, Operation transb, int m, int n, int k, cuDoubleComplex* alpha, cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb, cuDoubleComplex* beta, cuDoubleComplex* C, int ldc) int cublasSgemmBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const float* alpha, const float** Aarray, int lda, const float** Barray, int ldb, const float* beta, float** Carray, int ldc, int batchCount) int cublasDgemmBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const double* alpha, const double** Aarray, int lda, const double** Barray, int ldb, const double* beta, double** Carray, int ldc, int batchCount) int cublasCgemmBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const cuComplex* alpha, const cuComplex** Aarray, int lda, const cuComplex** Barray, int ldb, const cuComplex* beta, cuComplex** Carray, int ldc, int batchCount) int cublasZgemmBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex** Aarray, int lda, const cuDoubleComplex** Barray, int ldb, const cuDoubleComplex* beta, cuDoubleComplex** Carray, int ldc, int batchCount) int cublasSgemmStridedBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const float* alpha, const float* A, int lda, long long strideA, const float* B, int ldb, long long strideB, const float* beta, float* C, int ldc, long long strideC, int batchCount) int cublasDgemmStridedBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const double* alpha, const double* A, int lda, long long strideA, const double* B, int ldb, long long strideB, const double* beta, double* C, int ldc, long long strideC, int batchCount) int cublasCgemmStridedBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const cuComplex* alpha, const cuComplex* A, int lda, long long strideA, const cuComplex* B, int ldb, long long strideB, const cuComplex* beta, cuComplex* C, int ldc, long long strideC, int batchCount) int cublasZgemmStridedBatched( Handle handle, Operation transa, Operation transb, int m, int n, int k, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, long long strideA, const cuDoubleComplex* B, int ldb, long long strideB, const cuDoubleComplex* beta, cuDoubleComplex* C, int ldc, long long strideC, int batchCount) int cublasStrsm( Handle handle, SideMode size, FillMode uplo, Operation trans, DiagType diag, int m, int n, const float* alpha, const float* A, int lda, float* B, int ldb) int cublasDtrsm( Handle handle, SideMode size, FillMode uplo, Operation trans, DiagType diag, int m, int n, const double* alpha, const double* A, int lda, double* B, int ldb) int cublasCtrsm( Handle handle, SideMode size, FillMode uplo, Operation trans, DiagType diag, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, cuComplex* B, int ldb) int cublasZtrsm( Handle handle, SideMode size, FillMode uplo, Operation trans, DiagType diag, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, cuDoubleComplex* B, int ldb) int cublasSsyrk( Handle handle, FillMode uplo, Operation trans, int n, int k, float* alpha, float* A, int lda, float* beta, float* C, int ldc) int cublasDsyrk( Handle handle, FillMode uplo, Operation trans, int n, int k, double* alpha, double* A, int lda, double* beta, double* C, int ldc) int cublasCsyrk( Handle handle, FillMode uplo, Operation trans, int n, int k, cuComplex* alpha, cuComplex* A, int lda, cuComplex* beta, cuComplex* C, int ldc) int cublasZsyrk( Handle handle, FillMode uplo, Operation trans, int n, int k, cuDoubleComplex* alpha, cuDoubleComplex* A, int lda, cuDoubleComplex* beta, cuDoubleComplex* C, int ldc) # BLAS extension int cublasSgeam( Handle handle, Operation transa, Operation transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc) int cublasDgeam( Handle handle, Operation transa, Operation transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc) int cublasCgeam( Handle handle, Operation transa, Operation transb, int m, int n, const cuComplex* alpha, const cuComplex* A, int lda, const cuComplex* beta, const cuComplex* B, int ldb, cuComplex* C, int ldc) int cublasZgeam( Handle handle, Operation transa, Operation transb, int m, int n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int lda, const cuDoubleComplex* beta, const cuDoubleComplex* B, int ldb, cuDoubleComplex* C, int ldc) int cublasSdgmm( Handle handle, SideMode mode, int m, int n, const float* A, int lda, const float* x, int incx, float* C, int ldc) int cublasDdgmm( Handle handle, SideMode mode, int m, int n, const double* A, int lda, const double* x, int incx, double* C, int ldc) int cublasCdgmm( Handle handle, SideMode mode, int m, int n, const cuComplex* A, int lda, const cuComplex* x, int incx, cuComplex* C, int ldc) int cublasZdgmm( Handle handle, SideMode mode, int m, int n, const cuDoubleComplex* A, int lda, const cuDoubleComplex* x, int incx, cuDoubleComplex* C, int ldc) int cublasSgemmEx( Handle handle, Operation transa, Operation transb, int m, int n, int k, const float *alpha, const void *A, DataType Atype, int lda, const void *B, DataType Btype, int ldb, const float *beta, void *C, DataType Ctype, int ldc) int cublasSgetrfBatched( Handle handle, int n, float **Aarray, int lda, int *PivotArray, int *infoArray, int batchSize) int cublasDgetrfBatched( Handle handle, int n, double **Aarray, int lda, int *PivotArray, int *infoArray, int batchSize) int cublasCgetrfBatched( Handle handle, int n, cuComplex **Aarray, int lda, int *PivotArray, int *infoArray, int batchSize) int cublasZgetrfBatched( Handle handle, int n, cuDoubleComplex **Aarray, int lda, int *PivotArray, int *infoArray, int batchSize) int cublasSgetrsBatched( Handle handle, Operation trans, int n, int nrhs, const float **Aarray, int lda, const int *devIpiv, float **Barray, int ldb, int *info, int batchSize) int cublasDgetrsBatched( Handle handle, Operation trans, int n, int nrhs, const double **Aarray, int lda, const int *devIpiv, double **Barray, int ldb, int *info, int batchSize) int cublasCgetrsBatched( Handle handle, Operation trans, int n, int nrhs, const cuComplex **Aarray, int lda, const int *devIpiv, cuComplex **Barray, int ldb, int *info, int batchSize) int cublasZgetrsBatched( Handle handle, Operation trans, int n, int nrhs, const cuDoubleComplex **Aarray, int lda, const int *devIpiv, cuDoubleComplex **Barray, int ldb, int *info, int batchSize) int cublasSgetriBatched( Handle handle, int n, const float **Aarray, int lda, int *PivotArray, float *Carray[], int ldc, int *infoArray, int batchSize) int cublasDgetriBatched( Handle handle, int n, const double **Aarray, int lda, int *PivotArray, double *Carray[], int ldc, int *infoArray, int batchSize) int cublasCgetriBatched( Handle handle, int n, const cuComplex **Aarray, int lda, int *PivotArray, cuComplex *Carray[], int ldc, int *infoArray, int batchSize) int cublasZgetriBatched( Handle handle, int n, const cuDoubleComplex **Aarray, int lda, int *PivotArray, cuDoubleComplex *Carray[], int ldc, int *infoArray, int batchSize) int cublasGemmEx( Handle handle, Operation transa, Operation transb, int m, int n, int k, const void *alpha, const void *A, DataType Atype, int lda, const void *B, DataType Btype, int ldb, const void *beta, void *C, DataType Ctype, int ldc, DataType computetype, GemmAlgo algo) int cublasGemmEx_v11( Handle handle, Operation transa, Operation transb, int m, int n, int k, const void *alpha, const void *A, DataType Atype, int lda, const void *B, DataType Btype, int ldb, const void *beta, void *C, DataType Ctype, int ldc, ComputeType computetype, GemmAlgo algo) int cublasGemmStridedBatchedEx( Handle handle, Operation transa, Operation transb, int m, int n, int k, const void *alpha, const void *A, DataType Atype, int lda, long long strideA, const void *B, DataType Btype, int ldb, long long strideB, const void *beta, void *C, DataType Ctype, int ldc, long long strideC, int batchCount, DataType computetype, GemmAlgo algo) int cublasGemmStridedBatchedEx_v11( Handle handle, Operation transa, Operation transb, int m, int n, int k, const void *alpha, const void *A, DataType Atype, int lda, long long strideA, const void *B, DataType Btype, int ldb, long long strideB, const void *beta, void *C, DataType Ctype, int ldc, long long strideC, int batchCount, ComputeType computetype, GemmAlgo algo) int cublasStpttr( Handle handle, FillMode uplo, int n, const float *AP, float *A, int lda) int cublasDtpttr( Handle handle, FillMode uplo, int n, const double *AP, double *A, int lda) int cublasStrttp( Handle handle, FillMode uplo, int n, const float *A, int lda, float *AP) int cublasDtrttp( Handle handle, FillMode uplo, int n, const double *A, int lda, double *AP) ############################################################################### # Error handling ############################################################################### cdef dict STATUS = { 0: 'CUBLAS_STATUS_SUCCESS', 1: 'CUBLAS_STATUS_NOT_INITIALIZED', 3: 'CUBLAS_STATUS_ALLOC_FAILED', 7: 'CUBLAS_STATUS_INVALID_VALUE', 8: 'CUBLAS_STATUS_ARCH_MISMATCH', 11: 'CUBLAS_STATUS_MAPPING_ERROR', 13: 'CUBLAS_STATUS_EXECUTION_FAILED', 14: 'CUBLAS_STATUS_INTERNAL_ERROR', 15: 'CUBLAS_STATUS_NOT_SUPPORTED', 16: 'CUBLAS_STATUS_LICENSE_ERROR', } cdef dict HIP_STATUS = { 0: 'HIPBLAS_STATUS_SUCCESS', 1: 'HIPBLAS_STATUS_NOT_INITIALIZED', 2: 'HIPBLAS_STATUS_ALLOC_FAILED', 3: 'HIPBLAS_STATUS_INVALID_VALUE', 4: 'HIPBLAS_STATUS_MAPPING_ERROR', 5: 'HIPBLAS_STATUS_EXECUTION_FAILED', 6: 'HIPBLAS_STATUS_INTERNAL_ERROR', 7: 'HIPBLAS_STATUS_NOT_SUPPORTED', 8: 'HIPBLAS_STATUS_ARCH_MISMATCH', 9: 'HIPBLAS_STATUS_HANDLE_IS_NULLPTR', } class CUBLASError(RuntimeError): def __init__(self, status): self.status = status cdef str err if runtime._is_hip_environment: err = HIP_STATUS[status] else: err = STATUS[status] super(CUBLASError, self).__init__(err) def __reduce__(self): return (type(self), (self.status,)) @cython.profile(False) cpdef inline check_status(int status): if status != 0: raise CUBLASError(status) ############################################################################### # Context ############################################################################### cpdef intptr_t create() except? 0: cdef Handle handle with nogil: status = cublasCreate(&handle) check_status(status) return handle cpdef destroy(intptr_t handle): with nogil: status = cublasDestroy(handle) check_status(status) cpdef int getVersion(intptr_t handle) except? -1: cdef int version with nogil: status = cublasGetVersion(handle, &version) check_status(status) return version cpdef int getPointerMode(intptr_t handle) except? -1: cdef PointerMode mode with nogil: status = cublasGetPointerMode(handle, &mode) check_status(status) return mode cpdef setPointerMode(intptr_t handle, int mode): with nogil: status = cublasSetPointerMode(handle, mode) check_status(status) ############################################################################### # Stream ############################################################################### cpdef setStream(intptr_t handle, size_t stream): # TODO(leofang): It seems most of cuBLAS APIs support stream capture (as of # CUDA 11.5) under certain conditions, see # https://docs.nvidia.com/cuda/cublas/index.html#CUDA-graphs # Before we come up with a robust strategy to test the support conditions, # we disable this functionality. if not runtime._is_hip_environment and runtime.streamIsCapturing(stream): raise NotImplementedError( 'calling cuBLAS API during stream capture is currently ' 'unsupported') with nogil: status = cublasSetStream(handle, stream) check_status(status) cpdef size_t getStream(intptr_t handle) except? 0: cdef Stream stream with nogil: status = cublasGetStream(handle, &stream) check_status(status) return stream cdef _setStream(intptr_t handle): """Set current stream""" setStream(handle, stream_module.get_current_stream_ptr()) ############################################################################### # Math Mode ############################################################################### cpdef setMathMode(intptr_t handle, int mode): with nogil: status = cublasSetMathMode(handle, mode) check_status(status) cpdef int getMathMode(intptr_t handle) except? -1: cdef Math mode with nogil: status = cublasGetMathMode(handle, &mode) check_status(status) return mode ############################################################################### # BLAS Level 1 ############################################################################### cpdef isamax(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIsamax( handle, n, x, incx, result) check_status(status) cpdef idamax(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIdamax( handle, n, x, incx, result) check_status(status) cpdef icamax(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIcamax( handle, n, x, incx, result) check_status(status) cpdef izamax(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIzamax( handle, n, x, incx, result) check_status(status) cpdef isamin(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIsamin( handle, n, x, incx, result) check_status(status) cpdef idamin(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIdamin( handle, n, x, incx, result) check_status(status) cpdef icamin(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIcamin( handle, n, x, incx, result) check_status(status) cpdef izamin(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasIzamin( handle, n, x, incx, result) check_status(status) cpdef sasum(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasSasum( handle, n, x, incx, result) check_status(status) cpdef dasum(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasDasum( handle, n, x, incx, result) check_status(status) cpdef scasum(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasScasum( handle, n, x, incx, result) check_status(status) cpdef dzasum(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasDzasum( handle, n, x, incx, result) check_status(status) cpdef saxpy(intptr_t handle, int n, size_t alpha, size_t x, int incx, size_t y, int incy): _setStream(handle) with nogil: status = cublasSaxpy( handle, n, alpha, x, incx, y, incy) check_status(status) cpdef daxpy(intptr_t handle, int n, size_t alpha, size_t x, int incx, size_t y, int incy): _setStream(handle) with nogil: status = cublasDaxpy( handle, n, alpha, x, incx, y, incy) check_status(status) cpdef caxpy(intptr_t handle, int n, size_t alpha, size_t x, int incx, size_t y, int incy): _setStream(handle) with nogil: status = cublasCaxpy( handle, n, alpha, x, incx, y, incy) check_status(status) cpdef zaxpy(intptr_t handle, int n, size_t alpha, size_t x, int incx, size_t y, int incy): _setStream(handle) with nogil: status = cublasZaxpy( handle, n, alpha, x, incx, y, incy) check_status(status) cpdef sdot(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): _setStream(handle) with nogil: status = cublasSdot( handle, n, x, incx, y, incy, result) check_status(status) cpdef ddot(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): _setStream(handle) with nogil: status = cublasDdot( handle, n, x, incx, y, incy, result) check_status(status) cpdef cdotu(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): _setStream(handle) with nogil: status = cublasCdotu( handle, n, x, incx, y, incy, result) check_status(status) cpdef cdotc(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): _setStream(handle) with nogil: status = cublasCdotc( handle, n, x, incx, y, incy, result) check_status(status) cpdef zdotu(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): _setStream(handle) with nogil: status = cublasZdotu( handle, n, x, incx, y, incy, result) check_status(status) cpdef zdotc(intptr_t handle, int n, size_t x, int incx, size_t y, int incy, size_t result): with nogil: status = cublasZdotc( handle, n, x, incx, y, incy, result) check_status(status) cpdef snrm2(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasSnrm2(handle, n, x, incx, result) check_status(status) cpdef dnrm2(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasDnrm2(handle, n, x, incx, result) check_status(status) cpdef scnrm2(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasScnrm2(handle, n, x, incx, result) check_status(status) cpdef dznrm2(intptr_t handle, int n, size_t x, int incx, size_t result): _setStream(handle) with nogil: status = cublasDznrm2(handle, n, x, incx, result) check_status(status) cpdef sscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasSscal(handle, n, alpha, x, incx) check_status(status) cpdef dscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasDscal(handle, n, alpha, x, incx) check_status(status) cpdef cscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasCscal(handle, n, alpha, x, incx) check_status(status) cpdef csscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasCsscal(handle, n, alpha, x, incx) check_status(status) cpdef zscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasZscal(handle, n, alpha, x, incx) check_status(status) cpdef zdscal(intptr_t handle, int n, size_t alpha, size_t x, int incx): _setStream(handle) with nogil: status = cublasZdscal(handle, n, alpha, x, incx) check_status(status) ############################################################################### # BLAS Level 2 ############################################################################### cpdef sgemv(intptr_t handle, int trans, int m, int n, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasSgemv( handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) check_status(status) cpdef dgemv(intptr_t handle, int trans, int m, int n, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasDgemv( handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) check_status(status) cpdef cgemv(intptr_t handle, int trans, int m, int n, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasCgemv( handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) check_status(status) cpdef zgemv(intptr_t handle, int trans, int m, int n, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasZgemv( handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy) check_status(status) cpdef sger(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasSger( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef dger(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasDger( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef cgeru(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasCgeru( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef cgerc(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasCgerc( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef zgeru(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasZgeru( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef zgerc(intptr_t handle, int m, int n, size_t alpha, size_t x, int incx, size_t y, int incy, size_t A, int lda): _setStream(handle) with nogil: status = cublasZgerc( handle, m, n, alpha, x, incx, y, incy, A, lda) check_status(status) cpdef ssbmv(intptr_t handle, int uplo, int n, int k, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasSsbmv( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) check_status(status) cpdef dsbmv(intptr_t handle, int uplo, int n, int k, size_t alpha, size_t A, int lda, size_t x, int incx, size_t beta, size_t y, int incy): _setStream(handle) with nogil: status = cublasDsbmv( handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy) check_status(status) ############################################################################### # BLAS Level 3 ############################################################################### cpdef sgemm(intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, size_t B, int ldb, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasSgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) check_status(status) cpdef dgemm(intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, size_t B, int ldb, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasDgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) check_status(status) cpdef cgemm(intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, size_t B, int ldb, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasCgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) check_status(status) cpdef zgemm(intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, size_t B, int ldb, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasZgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) check_status(status) cpdef sgemmBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb, size_t beta, size_t Carray, int ldc, int batchCount): _setStream(handle) with nogil: status = cublasSgemmBatched( handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount) check_status(status) cpdef dgemmBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb, size_t beta, size_t Carray, int ldc, int batchCount): _setStream(handle) with nogil: status = cublasDgemmBatched( handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount) check_status(status) cpdef cgemmBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb, size_t beta, size_t Carray, int ldc, int batchCount): _setStream(handle) with nogil: status = cublasCgemmBatched( handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount) check_status(status) cpdef zgemmBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb, size_t beta, size_t Carray, int ldc, int batchCount): _setStream(handle) with nogil: status = cublasZgemmBatched( handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount) check_status(status) cpdef sgemmStridedBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, long long strideA, size_t B, int ldb, long long strideB, size_t beta, size_t C, int ldc, long long strideC, int batchCount): _setStream(handle) with nogil: status = cublasSgemmStridedBatched( handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount) check_status(status) cpdef dgemmStridedBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, long long strideA, size_t B, int ldb, long long strideB, size_t beta, size_t C, int ldc, long long strideC, int batchCount): _setStream(handle) with nogil: status = cublasDgemmStridedBatched( handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount) check_status(status) cpdef cgemmStridedBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, long long strideA, size_t B, int ldb, long long strideB, size_t beta, size_t C, int ldc, long long strideC, int batchCount): _setStream(handle) with nogil: status = cublasCgemmStridedBatched( handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount) check_status(status) cpdef zgemmStridedBatched( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int lda, long long strideA, size_t B, int ldb, long long strideB, size_t beta, size_t C, int ldc, long long strideC, int batchCount): _setStream(handle) with nogil: status = cublasZgemmStridedBatched( handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount) check_status(status) cpdef strsm( intptr_t handle, int side, int uplo, int trans, int diag, int m, int n, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb): _setStream(handle) with nogil: status = cublasStrsm( handle, side, uplo, trans, diag, m, n, alpha, Aarray, lda, Barray, ldb) check_status(status) cpdef dtrsm( intptr_t handle, int side, int uplo, int trans, int diag, int m, int n, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb): _setStream(handle) with nogil: status = cublasDtrsm( handle, side, uplo, trans, diag, m, n, alpha, Aarray, lda, Barray, ldb) check_status(status) cpdef ctrsm( intptr_t handle, int side, int uplo, int trans, int diag, int m, int n, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb): _setStream(handle) with nogil: status = cublasCtrsm( handle, side, uplo, trans, diag, m, n, alpha, Aarray, lda, Barray, ldb) check_status(status) cpdef ztrsm( intptr_t handle, int side, int uplo, int trans, int diag, int m, int n, size_t alpha, size_t Aarray, int lda, size_t Barray, int ldb): _setStream(handle) with nogil: status = cublasZtrsm( handle, side, uplo, trans, diag, m, n, alpha, Aarray, lda, Barray, ldb) check_status(status) cpdef ssyrk(intptr_t handle, int uplo, int trans, int n, int k, size_t alpha, size_t A, int lda, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasSsyrk( handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc) check_status(status) cpdef dsyrk(intptr_t handle, int uplo, int trans, int n, int k, size_t alpha, size_t A, int lda, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasDsyrk( handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc) check_status(status) cpdef csyrk(intptr_t handle, int uplo, int trans, int n, int k, size_t alpha, size_t A, int lda, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasCsyrk( handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc) check_status(status) cpdef zsyrk(intptr_t handle, int uplo, int trans, int n, int k, size_t alpha, size_t A, int lda, size_t beta, size_t C, int ldc): _setStream(handle) with nogil: status = cublasZsyrk( handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc) check_status(status) ############################################################################### # BLAS extension ############################################################################### cpdef sgeam(intptr_t handle, int transa, int transb, int m, int n, size_t alpha, size_t A, int lda, size_t beta, size_t B, int ldb, size_t C, int ldc): _setStream(handle) with nogil: status = cublasSgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc) check_status(status) cpdef dgeam(intptr_t handle, int transa, int transb, int m, int n, size_t alpha, size_t A, int lda, size_t beta, size_t B, int ldb, size_t C, int ldc): _setStream(handle) with nogil: status = cublasDgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc) check_status(status) cpdef cgeam(intptr_t handle, int transa, int transb, int m, int n, size_t alpha, size_t A, int lda, size_t beta, size_t B, int ldb, size_t C, int ldc): _setStream(handle) with nogil: status = cublasCgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc) check_status(status) cpdef zgeam(intptr_t handle, int transa, int transb, int m, int n, size_t alpha, size_t A, int lda, size_t beta, size_t B, int ldb, size_t C, int ldc): _setStream(handle) with nogil: status = cublasZgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc) check_status(status) cpdef sdgmm(intptr_t handle, int mode, int m, int n, size_t A, int lda, size_t x, int incx, size_t C, int ldc): _setStream(handle) with nogil: status = cublasSdgmm( handle, mode, m, n, A, lda, x, incx, C, ldc) check_status(status) cpdef ddgmm(intptr_t handle, int mode, int m, int n, size_t A, int lda, size_t x, int incx, size_t C, int ldc): _setStream(handle) with nogil: status = cublasDdgmm( handle, mode, m, n, A, lda, x, incx, C, ldc) check_status(status) cpdef cdgmm(intptr_t handle, int mode, int m, int n, size_t A, int lda, size_t x, int incx, size_t C, int ldc): _setStream(handle) with nogil: status = cublasCdgmm( handle, mode, m, n, A, lda, x, incx, C, ldc) check_status(status) cpdef zdgmm(intptr_t handle, int mode, int m, int n, size_t A, int lda, size_t x, int incx, size_t C, int ldc): _setStream(handle) with nogil: status = cublasZdgmm( handle, mode, m, n, A, lda, x, incx, C, ldc) check_status(status) cpdef sgemmEx( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int Atype, int lda, size_t B, int Btype, int ldb, size_t beta, size_t C, int Ctype, int ldc): _setStream(handle) with nogil: status = cublasSgemmEx( handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc) check_status(status) cpdef sgetrfBatched(intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasSgetrfBatched( handle, n, Aarray, lda, PivotArray, infoArray, batchSize) check_status(status) cpdef dgetrfBatched(intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasDgetrfBatched( handle, n, Aarray, lda, PivotArray, infoArray, batchSize) check_status(status) cpdef cgetrfBatched(intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasCgetrfBatched( handle, n, Aarray, lda, PivotArray, infoArray, batchSize) check_status(status) cpdef zgetrfBatched(intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasZgetrfBatched( handle, n, Aarray, lda, PivotArray, infoArray, batchSize) check_status(status) cpdef int sgetrsBatched(intptr_t handle, int trans, int n, int nrhs, size_t Aarray, int lda, size_t devIpiv, size_t Barray, int ldb, size_t info, int batchSize): _setStream(handle) with nogil: status = cublasSgetrsBatched( handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize) check_status(status) cpdef int dgetrsBatched(intptr_t handle, int trans, int n, int nrhs, size_t Aarray, int lda, size_t devIpiv, size_t Barray, int ldb, size_t info, int batchSize): _setStream(handle) with nogil: status = cublasDgetrsBatched( handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize) check_status(status) cpdef int cgetrsBatched(intptr_t handle, int trans, int n, int nrhs, size_t Aarray, int lda, size_t devIpiv, size_t Barray, int ldb, size_t info, int batchSize): _setStream(handle) with nogil: status = cublasCgetrsBatched( handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize) check_status(status) cpdef int zgetrsBatched(intptr_t handle, int trans, int n, int nrhs, size_t Aarray, int lda, size_t devIpiv, size_t Barray, int ldb, size_t info, int batchSize): _setStream(handle) with nogil: status = cublasZgetrsBatched( handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize) check_status(status) cpdef sgetriBatched( intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t Carray, int ldc, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasSgetriBatched( handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize) check_status(status) cpdef dgetriBatched( intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t Carray, int ldc, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasDgetriBatched( handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize) check_status(status) cpdef cgetriBatched( intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t Carray, int ldc, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasCgetriBatched( handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize) check_status(status) cpdef zgetriBatched( intptr_t handle, int n, size_t Aarray, int lda, size_t PivotArray, size_t Carray, int ldc, size_t infoArray, int batchSize): _setStream(handle) with nogil: status = cublasZgetriBatched( handle, n, Aarray, lda, PivotArray, Carray, ldc, infoArray, batchSize) check_status(status) cpdef gemmEx( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int Atype, int lda, size_t B, int Btype, int ldb, size_t beta, size_t C, int Ctype, int ldc, int computeType, int algo): _setStream(handle) with nogil: if computeType >= CUBLAS_COMPUTE_16F: status = cublasGemmEx_v11( handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc, computeType, algo) else: status = cublasGemmEx( handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc, computeType, algo) check_status(status) cpdef gemmStridedBatchedEx( intptr_t handle, int transa, int transb, int m, int n, int k, size_t alpha, size_t A, int Atype, int lda, long long strideA, size_t B, int Btype, int ldb, long long strideB, size_t beta, size_t C, int Ctype, int ldc, long long strideC, int batchCount, int computeType, int algo): _setStream(handle) with nogil: if computeType >= CUBLAS_COMPUTE_16F: status = cublasGemmStridedBatchedEx_v11( handle, transa, transb, m, n, k, alpha, A, Atype, lda, strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, batchCount, computeType, algo) else: status = cublasGemmStridedBatchedEx( handle, transa, transb, m, n, k, alpha, A, Atype, lda, strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, batchCount, computeType, algo) check_status(status) cpdef stpttr(intptr_t handle, int uplo, int n, size_t AP, size_t A, int lda): _setStream(handle) with nogil: status = cublasStpttr(handle, uplo, n, AP, A, lda) check_status(status) cpdef dtpttr(intptr_t handle, int uplo, int n, size_t AP, size_t A, int lda): _setStream(handle) with nogil: status = cublasDtpttr(handle, uplo, n, AP, A, lda) check_status(status) cpdef strttp(intptr_t handle, int uplo, int n, size_t A, int lda, size_t AP): _setStream(handle) with nogil: status = cublasStrttp(handle, uplo, n, A, lda, AP) check_status(status) cpdef dtrttp(intptr_t handle, int uplo, int n, size_t A, int lda, size_t AP): _setStream(handle) with nogil: status = cublasDtrttp(handle, uplo, n, A, lda, AP) check_status(status)