Commit 9bc9ca91 authored by Pan Zezhong's avatar Pan Zezhong
Browse files

issue/42: 创建infiniop handle不再传入device_id

parent 643fdd2b
...@@ -11,7 +11,7 @@ typedef struct InfiniopHandle { ...@@ -11,7 +11,7 @@ typedef struct InfiniopHandle {
typedef InfiniopHandle *infiniopHandle_t; typedef InfiniopHandle *infiniopHandle_t;
__C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDevice_t device, int device_id); __C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDevice_t device);
__C __export infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle); __C __export infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
......
#include "common_ascend.h" #include "common_ascend.h"
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr, infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr) {
int device_id) { int device_id = 0;
uint32_t device_count; auto ret = aclrtGetDevice(&device_id);
aclrtGetDeviceCount(&device_count);
if (device_id >= static_cast<int>(device_count)) {
return INFINIOP_STATUS_BAD_DEVICE;
}
auto ret = aclrtSetDevice(device_id);
CHECK_RET(ret == ACL_SUCCESS, CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret)); LOG_ERROR("aclrtGetDevice failed. ERROR: %d\n", ret));
*handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id}; *handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id};
......
...@@ -7,8 +7,7 @@ ...@@ -7,8 +7,7 @@
struct InfiniopAscendHandle; struct InfiniopAscendHandle;
typedef struct InfiniopAscendHandle *infiniopAscendHandle_t; typedef struct InfiniopAscendHandle *infiniopAscendHandle_t;
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr, infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr);
int device_id);
infiniopStatus_t destroyAscendHandle(infiniopAscendHandle_t handle_ptr); infiniopStatus_t destroyAscendHandle(infiniopAscendHandle_t handle_ptr);
......
...@@ -28,6 +28,13 @@ extern "C" { ...@@ -28,6 +28,13 @@ extern "C" {
printf(message, ##__VA_ARGS__); \ printf(message, ##__VA_ARGS__); \
} while (0) } while (0)
#define LOG_ERROR(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0)
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
......
...@@ -2,18 +2,13 @@ ...@@ -2,18 +2,13 @@
#include "common_bang.h" #include "common_bang.h"
#include <memory> #include <memory>
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr, infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr) {
int device_id) { int device_id = 0;
unsigned int device_count; if (cnrtGetDevice(&device_id) != cnrtSuccess) {
cnrtGetDeviceCount(&device_count);
if (device_id >= static_cast<int>(device_count)) {
return INFINIOP_STATUS_BAD_DEVICE; return INFINIOP_STATUS_BAD_DEVICE;
} }
auto pool = std::make_shared<Pool<cnnlHandle_t>>(); auto pool = std::make_shared<Pool<cnnlHandle_t>>();
if (cnrtSetDevice(device_id) != cnrtSuccess) {
return INFINIOP_STATUS_BAD_DEVICE;
}
cnnlHandle_t handle; cnnlHandle_t handle;
cnnlCreate(&handle); cnnlCreate(&handle);
pool->push(std::move(handle)); pool->push(std::move(handle));
......
...@@ -6,8 +6,7 @@ ...@@ -6,8 +6,7 @@
struct InfiniopBangHandle; struct InfiniopBangHandle;
typedef struct InfiniopBangHandle *infiniopBangHandle_t; typedef struct InfiniopBangHandle *infiniopBangHandle_t;
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr, infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr);
int device_id);
infiniopStatus_t destroyBangHandle(infiniopBangHandle_t handle); infiniopStatus_t destroyBangHandle(infiniopBangHandle_t handle);
#endif #endif
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
struct InfiniopBangHandle { struct InfiniopBangHandle {
infiniDevice_t device; infiniDevice_t device;
int device_id; int device_id;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handles; std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
}; };
inline cnnlDataType_t cnnlDataTypeConvert(infiniDtype_t dataType) { inline cnnlDataType_t cnnlDataTypeConvert(infiniDtype_t dataType) {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <iostream> #include <iostream>
#define checkCudaErrorWithCode(call, errorCode) \ #define CHECK_CUDA_OR_RETURN(call, errorCode) \
do { \ do { \
if (auto status = call; status != cudaSuccess) { \ if (auto status = call; status != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(status) \ std::cerr << "CUDA error: " << cudaGetErrorString(status) \
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
} \ } \
} while (0) } while (0)
#define checkCudaError(call) checkCudaErrorWithCode(call, INFINIOP_STATUS_BAD_DEVICE) #define CHECK_CUDA(call) CHECK_CUDA_OR_RETURN(call, INFINIOP_STATUS_INTERNAL_ERROR)
#define checkCudnnError(call) \ #define CHECK_CUDNN(call) \
do { \ do { \
if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \ if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \ std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \
...@@ -29,46 +29,44 @@ ...@@ -29,46 +29,44 @@
} \ } \
} while (0) } while (0)
#include "infinicore.h"
#include <cudnn.h>
#include <cublas_v2.h>
#include <memory>
#include "../pool.h" #include "../pool.h"
#include "cuda_handle.h" #include "cuda_handle.h"
#include "infinicore.h"
#include <cublas_v2.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <cudnn.h>
#include <memory>
struct InfiniopCudaHandle { struct InfiniopCudaHandle {
infiniDevice_t device; infiniDevice_t device;
int device_id; int device_id;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t; std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool;
std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handles_t; std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool;
cudaDeviceProp prop; cudaDeviceProp prop;
int compute_capability_major; int compute_capability_major;
int compute_capability_minor; int compute_capability_minor;
}; };
template<typename T> template<typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t, int device_id, cudaStream_t stream, T const &f) { void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cublas_handles_t->pop(); auto handle = cublas_handle_pool->pop();
if (!handle) { if (!handle) {
cudaSetDevice(device_id);
cublasCreate(&(*handle)); cublasCreate(&(*handle));
} }
cublasSetStream(*handle, (cudaStream_t) stream); cublasSetStream(*handle, (cudaStream_t) stream);
f(*handle); f(*handle);
cublas_handles_t->push(std::move(*handle)); cublas_handle_pool->push(std::move(*handle));
} }
template<typename T> template<typename T>
cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handles_t, int device_id, cudaStream_t stream, T const &f) { cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cudnn_handles_t->pop(); auto handle = cudnn_handle_pool->pop();
if (!handle) { if (!handle) {
cudaSetDevice(device_id);
cudnnCreate(&(*handle)); cudnnCreate(&(*handle));
} }
cudnnSetStream(*handle, stream); cudnnSetStream(*handle, stream);
cudnnStatus_t status = f(*handle); cudnnStatus_t status = f(*handle);
cudnn_handles_t->push(std::move(*handle)); cudnn_handle_pool->push(std::move(*handle));
return status; return status;
} }
...@@ -120,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim, ...@@ -120,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return res; return res;
} }
#endif // __INFINIOP_COMMON_CUDA_H__ #endif// __INFINIOP_COMMON_CUDA_H__
#include "./common_cuda.cuh" #include "./common_cuda.cuh"
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_id, infiniDevice_t cuda_device_type) { infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, infiniDevice_t cuda_device_type) {
// Check if device_id is valid
int device_count;
cudaGetDeviceCount(&device_count);
if (device_id >= device_count) {
return INFINIOP_STATUS_BAD_DEVICE;
}
// Create a new cublas handle pool // Create a new cublas handle pool
int device_id = 0;
CHECK_CUDA(cudaGetDevice(&device_id));
auto pool = std::make_shared<Pool<cublasHandle_t>>(); auto pool = std::make_shared<Pool<cublasHandle_t>>();
if (cudaSetDevice(device_id) != cudaSuccess) {
return INFINIOP_STATUS_BAD_DEVICE;
}
cublasHandle_t handle; cublasHandle_t handle;
cublasCreate(&handle); cublasCreate(&handle);
pool->push(std::move(handle)); pool->push(std::move(handle));
...@@ -20,7 +12,7 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i ...@@ -20,7 +12,7 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i
// create a cudnn handle pool // create a cudnn handle pool
auto cudnn_pool = std::make_shared<Pool<cudnnHandle_t>>(); auto cudnn_pool = std::make_shared<Pool<cudnnHandle_t>>();
cudnnHandle_t cudnn_handle; cudnnHandle_t cudnn_handle;
checkCudnnError(cudnnCreate(&cudnn_handle)); CHECK_CUDNN(cudnnCreate(&cudnn_handle));
cudnn_pool->push(std::move(cudnn_handle)); cudnn_pool->push(std::move(cudnn_handle));
// set CUDA device property // set CUDA device property
...@@ -47,8 +39,8 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i ...@@ -47,8 +39,8 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i
} }
infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr) { infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr) {
handle_ptr->cublas_handles_t = nullptr; handle_ptr->cublas_handle_pool = nullptr;
handle_ptr->cudnn_handles_t = nullptr; handle_ptr->cudnn_handle_pool = nullptr;
delete handle_ptr; delete handle_ptr;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
struct InfiniopCudaHandle; struct InfiniopCudaHandle;
typedef struct InfiniopCudaHandle *infiniopCudaHandle_t; typedef struct InfiniopCudaHandle *infiniopCudaHandle_t;
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_id, infiniDevice_t cuda_device_type); infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, infiniDevice_t cuda_device_type);
infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr); infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr);
......
...@@ -13,14 +13,10 @@ ...@@ -13,14 +13,10 @@
#endif #endif
__C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, __C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr,
infiniDevice_t device, infiniDevice_t device) {
int device_id) {
if (handle_ptr == nullptr) { if (handle_ptr == nullptr) {
return INFINIOP_STATUS_NULL_POINTER; return INFINIOP_STATUS_NULL_POINTER;
} }
if (device_id < 0) {
return INFINIOP_STATUS_BAD_DEVICE;
}
switch (device) { switch (device) {
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
...@@ -29,19 +25,17 @@ __C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, ...@@ -29,19 +25,17 @@ __C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr,
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_CUDA_API
case INFINI_DEVICE_NVIDIA: { case INFINI_DEVICE_NVIDIA: {
return createCudaHandle((infiniopCudaHandle_t *)handle_ptr, device_id, return createCudaHandle((infiniopCudaHandle_t *)handle_ptr, device);
device);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
case INFINI_DEVICE_CAMBRICON: { case INFINI_DEVICE_CAMBRICON: {
return createBangHandle((infiniopBangHandle_t *)handle_ptr, device_id); return createBangHandle((infiniopBangHandle_t *)handle_ptr);
} }
#endif #endif
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
case INFINI_DEVICE_ASCEND: { case INFINI_DEVICE_ASCEND: {
return createAscendHandle((infiniopAscendHandle_t *)handle_ptr, return createAscendHandle((infiniopAscendHandle_t *)handle_ptr);
device_id);
} }
#endif #endif
} }
......
...@@ -31,7 +31,7 @@ infiniopStatus_t bangCreateMatmulDescriptor( ...@@ -31,7 +31,7 @@ infiniopStatus_t bangCreateMatmulDescriptor(
cnnlSetMatMulDescAttr(opDesc, CNNL_MATMUL_USE_STRIDE, &use_stride, cnnlSetMatMulDescAttr(opDesc, CNNL_MATMUL_USE_STRIDE, &use_stride,
sizeof(int32_t)); sizeof(int32_t));
int count = 0; int count = 0;
use_cnnl(handle->cnnl_handles, [&](cnnlHandle_t _handle) { use_cnnl(handle->cnnl_handle_pool, [&](cnnlHandle_t _handle) {
cnnlGetBatchMatMulAlgoHeuristic(_handle, opDesc, aDesc, bDesc, cDesc, cnnlGetBatchMatMulAlgoHeuristic(_handle, opDesc, aDesc, bDesc, cDesc,
NULL, 1, &algoResult, &count); NULL, 1, &algoResult, &count);
}); });
...@@ -42,7 +42,7 @@ infiniopStatus_t bangCreateMatmulDescriptor( ...@@ -42,7 +42,7 @@ infiniopStatus_t bangCreateMatmulDescriptor(
handle->device_id, handle->device_id,
info, info,
c_desc->dtype, c_desc->dtype,
handle->cnnl_handles, handle->cnnl_handle_pool,
aDesc, aDesc,
bDesc, bDesc,
cDesc, cDesc,
...@@ -61,7 +61,7 @@ infiniopStatus_t bangGetMatmulWorkspaceSize(infiniopMatmulBangDescriptor_t desc, ...@@ -61,7 +61,7 @@ infiniopStatus_t bangGetMatmulWorkspaceSize(infiniopMatmulBangDescriptor_t desc,
infiniopStatus_t infiniopStatus_t
bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) { bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) {
desc->cnnl_handles = nullptr; desc->cnnl_handle_pool = nullptr;
cnnlDestroyTensorDescriptor(desc->aDesc); cnnlDestroyTensorDescriptor(desc->aDesc);
cnnlDestroyTensorDescriptor(desc->bDesc); cnnlDestroyTensorDescriptor(desc->bDesc);
cnnlDestroyTensorDescriptor(desc->cDesc); cnnlDestroyTensorDescriptor(desc->cDesc);
...@@ -80,7 +80,7 @@ void bangMatmulCnnl(infiniopMatmulBangDescriptor_t desc, void *workspace, void * ...@@ -80,7 +80,7 @@ void bangMatmulCnnl(infiniopMatmulBangDescriptor_t desc, void *workspace, void *
std::swap(a, b); std::swap(a, b);
} }
use_cnnl(desc->cnnl_handles, (cnrtQueue_t)stream, [&](cnnlHandle_t handle) { use_cnnl(desc->cnnl_handle_pool, (cnrtQueue_t)stream, [&](cnnlHandle_t handle) {
cnnlBatchMatMulBCast_v2(handle, desc->opDesc, desc->algo, &alpha, cnnlBatchMatMulBCast_v2(handle, desc->opDesc, desc->algo, &alpha,
desc->aDesc, a, desc->bDesc, b, &beta, desc->aDesc, a, desc->bDesc, b, &beta,
desc->cDesc, c, workspace, desc->cDesc, c, workspace,
......
...@@ -9,7 +9,7 @@ struct InfiniopMatmulBangDescriptor { ...@@ -9,7 +9,7 @@ struct InfiniopMatmulBangDescriptor {
int device_id; int device_id;
MatmulInfo info; MatmulInfo info;
infiniDtype_t dtype; infiniDtype_t dtype;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handles; std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
cnnlTensorDescriptor_t aDesc; cnnlTensorDescriptor_t aDesc;
cnnlTensorDescriptor_t bDesc; cnnlTensorDescriptor_t bDesc;
cnnlTensorDescriptor_t cDesc; cnnlTensorDescriptor_t cDesc;
......
...@@ -23,7 +23,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle, ...@@ -23,7 +23,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
dtype, dtype,
handle->device_id, handle->device_id,
info, info,
handle->cublas_handles_t}; handle->cublas_handle_pool};
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
...@@ -33,7 +33,7 @@ infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc, ...@@ -33,7 +33,7 @@ infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc,
} }
infiniopStatus_t cudaDestroyMatmulDescriptor(infiniopMatmulCudaDescriptor_t desc) { infiniopStatus_t cudaDestroyMatmulDescriptor(infiniopMatmulCudaDescriptor_t desc) {
desc->cublas_handles_t = nullptr; desc->cublas_handle_pool = nullptr;
delete desc; delete desc;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
...@@ -11,7 +11,7 @@ typedef struct InfiniopMatmulCudaDescriptor { ...@@ -11,7 +11,7 @@ typedef struct InfiniopMatmulCudaDescriptor {
infiniDtype_t dtype; infiniDtype_t dtype;
int device_id; int device_id;
MatmulInfo info; MatmulInfo info;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t; std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool;
} InfiniopMatmulCudaDescriptor; } InfiniopMatmulCudaDescriptor;
#endif// __INFINIOP_MATMUL_CUDA_H__ #endif// __INFINIOP_MATMUL_CUDA_H__
...@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c, ...@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T; auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T; auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(desc->cublas_handles_t, desc->device_id, (cudaStream_t) stream, use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t) stream,
[&](cublasHandle_t handle) { cublasGemmStridedBatchedEx( [&](cublasHandle_t handle) { cublasGemmStridedBatchedEx(
handle, handle,
op_a, op_a,
......
...@@ -378,7 +378,9 @@ def get_test_devices(args): ...@@ -378,7 +378,9 @@ def get_test_devices(args):
import torch_mlu import torch_mlu
devices_to_test.append(InfiniDeviceEnum.CAMBRICON) devices_to_test.append(InfiniDeviceEnum.CAMBRICON)
if args.ascend: if args.ascend:
import torch
import torch_npu import torch_npu
torch.npu.set_device(0) # Ascend NPU needs explicit device initialization
devices_to_test.append(InfiniDeviceEnum.ASCEND) devices_to_test.append(InfiniDeviceEnum.ASCEND)
if not devices_to_test: if not devices_to_test:
devices_to_test = [InfiniDeviceEnum.CPU] devices_to_test = [InfiniDeviceEnum.CPU]
......
...@@ -112,7 +112,7 @@ def test( ...@@ -112,7 +112,7 @@ def test(
def lib_matmul(): def lib_matmul():
check_error(lib.infiniopMatmul( check_error(lib.infiniopMatmul(
descriptor, descriptor,
workspace.data_ptr() if workspace else None, workspace.data_ptr() if workspace is not None else None,
workspace_size.value, workspace_size.value,
c_tensor.data, c_tensor.data,
a_tensor.data, a_tensor.data,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment