Commit 9bc9ca91 authored by Pan Zezhong's avatar Pan Zezhong
Browse files

issue/42: 创建infiniop handle不再传入device_id

parent 643fdd2b
......@@ -11,7 +11,7 @@ typedef struct InfiniopHandle {
typedef InfiniopHandle *infiniopHandle_t;
__C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDevice_t device, int device_id);
__C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDevice_t device);
__C __export infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
......
#include "common_ascend.h"
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr,
int device_id) {
uint32_t device_count;
aclrtGetDeviceCount(&device_count);
if (device_id >= static_cast<int>(device_count)) {
return INFINIOP_STATUS_BAD_DEVICE;
}
auto ret = aclrtSetDevice(device_id);
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr) {
int device_id = 0;
auto ret = aclrtGetDevice(&device_id);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret));
LOG_ERROR("aclrtGetDevice failed. ERROR: %d\n", ret));
*handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id};
......
......@@ -7,8 +7,7 @@
struct InfiniopAscendHandle;
typedef struct InfiniopAscendHandle *infiniopAscendHandle_t;
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr,
int device_id);
infiniopStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr);
infiniopStatus_t destroyAscendHandle(infiniopAscendHandle_t handle_ptr);
......
......@@ -28,6 +28,13 @@ extern "C" {
printf(message, ##__VA_ARGS__); \
} while (0)
#define LOG_ERROR(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0)
#ifdef __cplusplus
};
#endif
......
......@@ -2,18 +2,13 @@
#include "common_bang.h"
#include <memory>
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr,
int device_id) {
unsigned int device_count;
cnrtGetDeviceCount(&device_count);
if (device_id >= static_cast<int>(device_count)) {
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr) {
int device_id = 0;
if (cnrtGetDevice(&device_id) != cnrtSuccess) {
return INFINIOP_STATUS_BAD_DEVICE;
}
auto pool = std::make_shared<Pool<cnnlHandle_t>>();
if (cnrtSetDevice(device_id) != cnrtSuccess) {
return INFINIOP_STATUS_BAD_DEVICE;
}
cnnlHandle_t handle;
cnnlCreate(&handle);
pool->push(std::move(handle));
......
......@@ -6,8 +6,7 @@
struct InfiniopBangHandle;
typedef struct InfiniopBangHandle *infiniopBangHandle_t;
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr,
int device_id);
infiniopStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr);
infiniopStatus_t destroyBangHandle(infiniopBangHandle_t handle);
#endif
......@@ -17,7 +17,7 @@
struct InfiniopBangHandle {
infiniDevice_t device;
int device_id;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handles;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
};
inline cnnlDataType_t cnnlDataTypeConvert(infiniDtype_t dataType) {
......
......@@ -7,91 +7,89 @@
#include <iostream>
#define checkCudaErrorWithCode(call, errorCode) \
do { \
if (auto status = call; status != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return errorCode; \
} \
#define CHECK_CUDA_OR_RETURN(call, errorCode) \
do { \
if (auto status = call; status != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return errorCode; \
} \
} while (0)
#define checkCudaError(call) checkCudaErrorWithCode(call, INFINIOP_STATUS_BAD_DEVICE)
#define CHECK_CUDA(call) CHECK_CUDA_OR_RETURN(call, INFINIOP_STATUS_INTERNAL_ERROR)
#define checkCudnnError(call) \
do { \
if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return INFINIOP_STATUS_INTERNAL_ERROR; \
} \
#define CHECK_CUDNN(call) \
do { \
if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return INFINIOP_STATUS_INTERNAL_ERROR; \
} \
} while (0)
#include "infinicore.h"
#include <cudnn.h>
#include <cublas_v2.h>
#include <memory>
#include "../pool.h"
#include "cuda_handle.h"
#include "infinicore.h"
#include <cublas_v2.h>
#include <cuda_fp16.h>
#include <cudnn.h>
#include <memory>
struct InfiniopCudaHandle {
infiniDevice_t device;
int device_id;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t;
std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handles_t;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool;
std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool;
cudaDeviceProp prop;
int compute_capability_major;
int compute_capability_minor;
};
template<typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t, int device_id, cudaStream_t stream, T const &f) {
auto handle = cublas_handles_t->pop();
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cublas_handle_pool->pop();
if (!handle) {
cudaSetDevice(device_id);
cublasCreate(&(*handle));
}
cublasSetStream(*handle, (cudaStream_t) stream);
f(*handle);
cublas_handles_t->push(std::move(*handle));
cublas_handle_pool->push(std::move(*handle));
}
template<typename T>
cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handles_t, int device_id, cudaStream_t stream, T const &f) {
auto handle = cudnn_handles_t->pop();
cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cudnn_handle_pool->pop();
if (!handle) {
cudaSetDevice(device_id);
cudnnCreate(&(*handle));
}
cudnnSetStream(*handle, stream);
cudnnStatus_t status = f(*handle);
cudnn_handles_t->push(std::move(*handle));
cudnn_handle_pool->push(std::move(*handle));
return status;
}
inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
switch (dt) {
case INFINI_DTYPE_F16:
return CUDNN_DATA_HALF;
case INFINI_DTYPE_F32:
return CUDNN_DATA_FLOAT;
case INFINI_DTYPE_F64:
return CUDNN_DATA_DOUBLE;
case INFINI_DTYPE_BF16:
return CUDNN_DATA_BFLOAT16;
case INFINI_DTYPE_I8:
return CUDNN_DATA_INT8;
case INFINI_DTYPE_I32:
return CUDNN_DATA_INT32;
case INFINI_DTYPE_I64:
return CUDNN_DATA_INT64;
case INFINI_DTYPE_U8:
return CUDNN_DATA_UINT8;
default:
return CUDNN_DATA_FLOAT;
case INFINI_DTYPE_F16:
return CUDNN_DATA_HALF;
case INFINI_DTYPE_F32:
return CUDNN_DATA_FLOAT;
case INFINI_DTYPE_F64:
return CUDNN_DATA_DOUBLE;
case INFINI_DTYPE_BF16:
return CUDNN_DATA_BFLOAT16;
case INFINI_DTYPE_I8:
return CUDNN_DATA_INT8;
case INFINI_DTYPE_I32:
return CUDNN_DATA_INT32;
case INFINI_DTYPE_I64:
return CUDNN_DATA_INT64;
case INFINI_DTYPE_U8:
return CUDNN_DATA_UINT8;
default:
return CUDNN_DATA_FLOAT;
}
}
......@@ -120,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return res;
}
#endif // __INFINIOP_COMMON_CUDA_H__
#endif// __INFINIOP_COMMON_CUDA_H__
#include "./common_cuda.cuh"
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_id, infiniDevice_t cuda_device_type) {
// Check if device_id is valid
int device_count;
cudaGetDeviceCount(&device_count);
if (device_id >= device_count) {
return INFINIOP_STATUS_BAD_DEVICE;
}
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, infiniDevice_t cuda_device_type) {
// Create a new cublas handle pool
int device_id = 0;
CHECK_CUDA(cudaGetDevice(&device_id));
auto pool = std::make_shared<Pool<cublasHandle_t>>();
if (cudaSetDevice(device_id) != cudaSuccess) {
return INFINIOP_STATUS_BAD_DEVICE;
}
cublasHandle_t handle;
cublasCreate(&handle);
pool->push(std::move(handle));
......@@ -20,7 +12,7 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i
// create a cudnn handle pool
auto cudnn_pool = std::make_shared<Pool<cudnnHandle_t>>();
cudnnHandle_t cudnn_handle;
checkCudnnError(cudnnCreate(&cudnn_handle));
CHECK_CUDNN(cudnnCreate(&cudnn_handle));
cudnn_pool->push(std::move(cudnn_handle));
// set CUDA device property
......@@ -47,8 +39,8 @@ infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_i
}
infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr) {
handle_ptr->cublas_handles_t = nullptr;
handle_ptr->cudnn_handles_t = nullptr;
handle_ptr->cublas_handle_pool = nullptr;
handle_ptr->cudnn_handle_pool = nullptr;
delete handle_ptr;
return INFINIOP_STATUS_SUCCESS;
......
......@@ -6,7 +6,7 @@
struct InfiniopCudaHandle;
typedef struct InfiniopCudaHandle *infiniopCudaHandle_t;
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, int device_id, infiniDevice_t cuda_device_type);
infiniopStatus_t createCudaHandle(infiniopCudaHandle_t *handle_ptr, infiniDevice_t cuda_device_type);
infiniopStatus_t destroyCudaHandle(infiniopCudaHandle_t handle_ptr);
......
......@@ -13,14 +13,10 @@
#endif
__C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr,
infiniDevice_t device,
int device_id) {
infiniDevice_t device) {
if (handle_ptr == nullptr) {
return INFINIOP_STATUS_NULL_POINTER;
}
if (device_id < 0) {
return INFINIOP_STATUS_BAD_DEVICE;
}
switch (device) {
#ifdef ENABLE_CPU_API
......@@ -29,19 +25,17 @@ __C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr,
#endif
#ifdef ENABLE_CUDA_API
case INFINI_DEVICE_NVIDIA: {
return createCudaHandle((infiniopCudaHandle_t *)handle_ptr, device_id,
device);
return createCudaHandle((infiniopCudaHandle_t *)handle_ptr, device);
}
#endif
#ifdef ENABLE_CAMBRICON_API
case INFINI_DEVICE_CAMBRICON: {
return createBangHandle((infiniopBangHandle_t *)handle_ptr, device_id);
return createBangHandle((infiniopBangHandle_t *)handle_ptr);
}
#endif
#ifdef ENABLE_ASCEND_API
case INFINI_DEVICE_ASCEND: {
return createAscendHandle((infiniopAscendHandle_t *)handle_ptr,
device_id);
return createAscendHandle((infiniopAscendHandle_t *)handle_ptr);
}
#endif
}
......
......@@ -31,7 +31,7 @@ infiniopStatus_t bangCreateMatmulDescriptor(
cnnlSetMatMulDescAttr(opDesc, CNNL_MATMUL_USE_STRIDE, &use_stride,
sizeof(int32_t));
int count = 0;
use_cnnl(handle->cnnl_handles, [&](cnnlHandle_t _handle) {
use_cnnl(handle->cnnl_handle_pool, [&](cnnlHandle_t _handle) {
cnnlGetBatchMatMulAlgoHeuristic(_handle, opDesc, aDesc, bDesc, cDesc,
NULL, 1, &algoResult, &count);
});
......@@ -42,7 +42,7 @@ infiniopStatus_t bangCreateMatmulDescriptor(
handle->device_id,
info,
c_desc->dtype,
handle->cnnl_handles,
handle->cnnl_handle_pool,
aDesc,
bDesc,
cDesc,
......@@ -61,7 +61,7 @@ infiniopStatus_t bangGetMatmulWorkspaceSize(infiniopMatmulBangDescriptor_t desc,
infiniopStatus_t
bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) {
desc->cnnl_handles = nullptr;
desc->cnnl_handle_pool = nullptr;
cnnlDestroyTensorDescriptor(desc->aDesc);
cnnlDestroyTensorDescriptor(desc->bDesc);
cnnlDestroyTensorDescriptor(desc->cDesc);
......@@ -80,7 +80,7 @@ void bangMatmulCnnl(infiniopMatmulBangDescriptor_t desc, void *workspace, void *
std::swap(a, b);
}
use_cnnl(desc->cnnl_handles, (cnrtQueue_t)stream, [&](cnnlHandle_t handle) {
use_cnnl(desc->cnnl_handle_pool, (cnrtQueue_t)stream, [&](cnnlHandle_t handle) {
cnnlBatchMatMulBCast_v2(handle, desc->opDesc, desc->algo, &alpha,
desc->aDesc, a, desc->bDesc, b, &beta,
desc->cDesc, c, workspace,
......
......@@ -9,7 +9,7 @@ struct InfiniopMatmulBangDescriptor {
int device_id;
MatmulInfo info;
infiniDtype_t dtype;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handles;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
cnnlTensorDescriptor_t aDesc;
cnnlTensorDescriptor_t bDesc;
cnnlTensorDescriptor_t cDesc;
......
......@@ -23,7 +23,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
dtype,
handle->device_id,
info,
handle->cublas_handles_t};
handle->cublas_handle_pool};
return INFINIOP_STATUS_SUCCESS;
}
......@@ -33,7 +33,7 @@ infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc,
}
infiniopStatus_t cudaDestroyMatmulDescriptor(infiniopMatmulCudaDescriptor_t desc) {
desc->cublas_handles_t = nullptr;
desc->cublas_handle_pool = nullptr;
delete desc;
return INFINIOP_STATUS_SUCCESS;
}
......@@ -11,7 +11,7 @@ typedef struct InfiniopMatmulCudaDescriptor {
infiniDtype_t dtype;
int device_id;
MatmulInfo info;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handles_t;
std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool;
} InfiniopMatmulCudaDescriptor;
#endif// __INFINIOP_MATMUL_CUDA_H__
......@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(desc->cublas_handles_t, desc->device_id, (cudaStream_t) stream,
use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t) stream,
[&](cublasHandle_t handle) { cublasGemmStridedBatchedEx(
handle,
op_a,
......
......@@ -378,7 +378,9 @@ def get_test_devices(args):
import torch_mlu
devices_to_test.append(InfiniDeviceEnum.CAMBRICON)
if args.ascend:
import torch
import torch_npu
torch.npu.set_device(0) # Ascend NPU needs explicit device initialization
devices_to_test.append(InfiniDeviceEnum.ASCEND)
if not devices_to_test:
devices_to_test = [InfiniDeviceEnum.CPU]
......
......@@ -112,7 +112,7 @@ def test(
def lib_matmul():
check_error(lib.infiniopMatmul(
descriptor,
workspace.data_ptr() if workspace else None,
workspace.data_ptr() if workspace is not None else None,
workspace_size.value,
c_tensor.data,
a_tensor.data,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment