Unverified Commit fd0242ed authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #101 from PanZezhong1725/issue/89/bang

Issue/89/bang Refactor Handle, Runtime, and Matmul Implementation for Bang
parents 92ad2426 39b09a9e
#include "../pool.h" #include "../../tensor.h"
#include "common_bang.h" #include "common_bang.h"
#include "infiniop/tensor_descriptor.h"
#include <memory> #include <memory>
#include <vector>
infiniStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr) { namespace device::bang {
int device_id = 0;
if (cnrtGetDevice(&device_id) != cnrtSuccess) { Handle::Handle(infiniDevice_t device, int device_id)
return INFINI_STATUS_DEVICE_NOT_INITIALIZED; : InfiniopHandle{device, device_id},
_internal(std::make_shared<Handle::Internal>()) {}
auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal;
}
infiniStatus_t Handle::Internal::useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const {
auto handle = cnnl_handles.pop();
if (!handle) {
CHECK_BANG(cnnlCreate(&(*handle)));
} }
CHECK_BANG(cnnlSetQueue(*handle, queue));
CHECK_STATUS(f(*handle));
cnnl_handles.push(std::move(*handle));
return INFINI_STATUS_SUCCESS;
}
auto pool = std::make_shared<Pool<cnnlHandle_t>>(); cnnlDataType_t getCnnlDtype(infiniDtype_t dt) {
cnnlHandle_t handle; switch (dt) {
cnnlCreate(&handle); case INFINI_DTYPE_F32:
pool->push(std::move(handle)); return CNNL_DTYPE_FLOAT;
case INFINI_DTYPE_F64:
return CNNL_DTYPE_DOUBLE;
case INFINI_DTYPE_F16:
return CNNL_DTYPE_HALF;
case INFINI_DTYPE_I8:
return CNNL_DTYPE_INT8;
case INFINI_DTYPE_I32:
return CNNL_DTYPE_INT32;
case INFINI_DTYPE_U8:
return CNNL_DTYPE_UINT8;
case INFINI_DTYPE_BF16:
return CNNL_DTYPE_BFLOAT16;
case INFINI_DTYPE_I64:
return CNNL_DTYPE_INT64;
default:
return CNNL_DTYPE_INVALID;
}
}
*handle_ptr = new InfiniopBangHandle{INFINI_DEVICE_CAMBRICON, device_id, infiniStatus_t setCnnlTensor(cnnlTensorDescriptor_t desc,
std::move(pool)}; const InfiniopTensorDescriptor *layout) {
std::vector<int> dims(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) {
dims[i] = static_cast<int>(layout->shape()[i]);
}
CHECK_BANG(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_ARRAY,
getCnnlDtype(layout->dtype()), dims.size(),
dims.data()));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t setCnnlTensorEx(cnnlTensorDescriptor_t desc,
const InfiniopTensorDescriptor *layout) {
std::vector<int> dim_size(layout->ndim()), dim_stride(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) {
dim_size[i] = static_cast<int>(layout->shape()[i]);
dim_stride[i] = static_cast<int>(layout->strides()[i]);
}
CHECK_BANG(cnnlSetTensorDescriptorEx(
desc, CNNL_LAYOUT_ARRAY, getCnnlDtype(layout->dtype()),
dim_size.size(), dim_size.data(), dim_stride.data()));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
infiniStatus_t destroyBangHandle(infiniopBangHandle_t handle) { namespace cambricon {
delete handle;
Handle::Handle(int device_id)
: bang::Handle(INFINI_DEVICE_CAMBRICON, device_id) {}
infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
*handle_ptr = new Handle(device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace cambricon
} // namespace device::bang
#ifndef BANG_HANDLE_H #ifndef __INFINIOP_BANG_HANDLE_H__
#define BANG_HANDLE_H #define __INFINIOP_BANG_HANDLE_H__
#include "../../handle.h" #include "../../handle.h"
#include <memory>
struct InfiniopBangHandle; namespace device::bang {
typedef struct InfiniopBangHandle *infiniopBangHandle_t;
infiniStatus_t createBangHandle(infiniopBangHandle_t *handle_ptr); struct Handle : public InfiniopHandle {
infiniStatus_t destroyBangHandle(infiniopBangHandle_t handle); class Internal;
auto internal() const -> const std::shared_ptr<Internal> &;
#endif protected:
Handle(infiniDevice_t device, int device_id);
private:
std::shared_ptr<Internal> _internal;
};
namespace cambricon {
class Handle : public bang::Handle {
Handle(int device_id);
public:
static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id);
};
} // namespace cambricon
} // namespace device::bang
#endif // __INFINIOP_BANG_HANDLE_H__
...@@ -6,89 +6,32 @@ ...@@ -6,89 +6,32 @@
#include "bang_handle.h" #include "bang_handle.h"
#include "cnnl.h" #include "cnnl.h"
#include "cnrt.h" #include "cnrt.h"
#include "infiniop/tensor_descriptor.h" #include <functional>
#include <memory>
#include <vector>
// the maximum NRAM memory is 1024 * 768 #define CHECK_BANG(API) CHECK_INTERNAL(API, CNNL_STATUS_SUCCESS)
#define NRAM_MAX_SIZE (1024 * 256)
#define GDRAM_MAX_SIZE (1024 * 1024 * 1024) namespace device::bang {
struct InfiniopBangHandle { class Handle::Internal {
infiniDevice_t device; Pool<cnnlHandle_t> cnnl_handles;
int device_id;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
};
inline cnnlDataType_t cnnlDataTypeConvert(infiniDtype_t dataType) { template <typename T>
switch (dataType) { using Fn = std::function<infiniStatus_t(T)>;
case INFINI_DTYPE_F32:
return CNNL_DTYPE_FLOAT;
case INFINI_DTYPE_F64:
return CNNL_DTYPE_DOUBLE;
case INFINI_DTYPE_F16:
return CNNL_DTYPE_HALF;
case INFINI_DTYPE_I8:
return CNNL_DTYPE_INT8;
case INFINI_DTYPE_I32:
return CNNL_DTYPE_INT32;
case INFINI_DTYPE_U8:
return CNNL_DTYPE_UINT8;
case INFINI_DTYPE_BF16:
return CNNL_DTYPE_BFLOAT16;
case INFINI_DTYPE_I64:
return CNNL_DTYPE_INT64;
default:
return CNNL_DTYPE_INVALID;
}
}
template <typename T> public:
void use_cnnl(std::shared_ptr<Pool<cnnlHandle_t>> &pool, cnrtQueue_t queue, infiniStatus_t useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const;
T const &f) { };
auto handle = pool->pop();
if (!handle) {
cnnlCreate(&(*handle));
}
cnnlSetQueue(*handle, (cnrtQueue_t)queue);
f(*handle);
pool->push(std::move(*handle));
}
template <typename T> cnnlDataType_t getCnnlDtype(infiniDtype_t dt);
void use_cnnl(std::shared_ptr<Pool<cnnlHandle_t>> &pool, T const &f) {
auto handle = pool->pop();
if (!handle) {
cnnlCreate(&(*handle));
}
f(*handle);
pool->push(std::move(*handle));
}
// set cnnl tensor descriptor without strides11 // set cnnl tensor descriptor without strides
inline void setCnnlTensor(cnnlTensorDescriptor_t desc, infiniStatus_t setCnnlTensor(cnnlTensorDescriptor_t desc,
const infiniopTensorDescriptor_t layout) { const InfiniopTensorDescriptor *layout);
std::vector<int> dims(layout->ndim);
for (size_t i = 0; i < layout->ndim; i++) {
dims[i] = static_cast<int>(layout->shape[i]);
}
cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_ARRAY,
cnnlDataTypeConvert(layout->dtype), dims.size(),
dims.data());
}
// set cnnl tensor descriptor with strides // set cnnl tensor descriptor with strides
inline void setCnnlTensorEx(cnnlTensorDescriptor_t desc, infiniStatus_t setCnnlTensorEx(cnnlTensorDescriptor_t desc,
const infiniopTensorDescriptor_t layout) { const InfiniopTensorDescriptor *layout);
std::vector<int> dim_size(layout->ndim), dim_stride(layout->ndim);
for (size_t i = 0; i < layout->ndim; i++) { } // namespace device::bang
dim_size[i] = static_cast<int>(layout->shape[i]);
dim_stride[i] = static_cast<int>(layout->strides[i]);
}
cnnlSetTensorDescriptorEx(
desc, CNNL_LAYOUT_ARRAY, cnnlDataTypeConvert(layout->dtype),
dim_size.size(), dim_size.data(), dim_stride.data());
}
#endif // __COMMON_BANG_H__ #endif // __COMMON_BANG_H__
...@@ -39,9 +39,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { ...@@ -39,9 +39,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
CREATE(INFINI_DEVICE_NVIDIA, cuda::nvidia); CREATE(INFINI_DEVICE_NVIDIA, cuda::nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
case INFINI_DEVICE_CAMBRICON: { CREATE(INFINI_DEVICE_CAMBRICON, bang::cambricon);
return createBangHandle((infiniopBangHandle_t *)handle_ptr);
}
#endif #endif
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
case INFINI_DEVICE_ASCEND: { case INFINI_DEVICE_ASCEND: {
...@@ -76,9 +74,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { ...@@ -76,9 +74,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
DELETE(INFINI_DEVICE_NVIDIA, cuda::nvidia); DELETE(INFINI_DEVICE_NVIDIA, cuda::nvidia);
#endif #endif
#ifdef ENABLE_CAMBRICON_API #ifdef ENABLE_CAMBRICON_API
case INFINI_DEVICE_CAMBRICON: { DELETE(INFINI_DEVICE_CAMBRICON, bang::cambricon);
return destroyBangHandle((infiniopBangHandle_t)handle);
}
#endif #endif
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
case INFINI_DEVICE_ASCEND: { case INFINI_DEVICE_ASCEND: {
......
#include "matmul_bang.h" #include "matmul_bang.h"
#include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h" #include "../../../devices/bang/common_bang.h"
#include <cnnl_extra.h> #include <cnnl_extra.h>
...@@ -10,7 +9,7 @@ struct Descriptor::Opaque { ...@@ -10,7 +9,7 @@ struct Descriptor::Opaque {
cnnlMatMulAlgo_t algo; cnnlMatMulAlgo_t algo;
cnnlMatMulHeuristicResult_t algoResult; cnnlMatMulHeuristicResult_t algoResult;
cnnlTensorDescriptor_t a, b, c; cnnlTensorDescriptor_t a, b, c;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool; std::shared_ptr<device::bang::Handle::Internal> internal;
~Opaque() { ~Opaque() {
cnnlDestroyTensorDescriptor(a); cnnlDestroyTensorDescriptor(a);
...@@ -22,7 +21,7 @@ struct Descriptor::Opaque { ...@@ -22,7 +21,7 @@ struct Descriptor::Opaque {
} }
}; };
static void setMatrixTensorEx( static infiniStatus_t setMatrixTensorEx(
cnnlTensorDescriptor_t desc, cnnlTensorDescriptor_t desc,
const BlasMatrix &matrix, infiniDtype_t dtype, const BlasMatrix &matrix, infiniDtype_t dtype,
bool trans = false) { bool trans = false) {
...@@ -38,20 +37,21 @@ static void setMatrixTensorEx( ...@@ -38,20 +37,21 @@ static void setMatrixTensorEx(
case 3: { case 3: {
std::vector<int> dim_size = {batch, rows, cols}; std::vector<int> dim_size = {batch, rows, cols};
std::vector<int> dim_stride = {stride, row_stride, col_stride}; std::vector<int> dim_stride = {stride, row_stride, col_stride};
cnnlSetTensorDescriptorEx( CHECK_BANG(cnnlSetTensorDescriptorEx(
desc, CNNL_LAYOUT_ARRAY, desc, CNNL_LAYOUT_ARRAY,
cnnlDataTypeConvert(dtype), dim_size.size(), device::bang::getCnnlDtype(dtype), dim_size.size(),
dim_size.data(), dim_stride.data()); dim_size.data(), dim_stride.data()));
} break; } break;
case 2: { case 2: {
std::vector<int> dim_size = {rows, cols}; std::vector<int> dim_size = {rows, cols};
std::vector<int> dim_stride = {row_stride, col_stride}; std::vector<int> dim_stride = {row_stride, col_stride};
cnnlSetTensorDescriptorEx( CHECK_BANG(cnnlSetTensorDescriptorEx(
desc, CNNL_LAYOUT_ARRAY, desc, CNNL_LAYOUT_ARRAY,
cnnlDataTypeConvert(dtype), dim_size.size(), device::bang::getCnnlDtype(dtype), dim_size.size(),
dim_size.data(), dim_stride.data()); dim_size.data(), dim_stride.data()));
} break; } break;
} }
return INFINI_STATUS_SUCCESS;
} }
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -64,8 +64,8 @@ infiniStatus_t Descriptor::create( ...@@ -64,8 +64,8 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) { infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<infiniopBangHandle_t>(handle_); auto handle = reinterpret_cast<device::bang::cambricon::Handle *>(handle_);
auto dtype = c_desc->dtype; auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) { if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE; return INFINI_STATUS_BAD_TENSOR_DTYPE;
...@@ -78,48 +78,47 @@ infiniStatus_t Descriptor::create( ...@@ -78,48 +78,47 @@ infiniStatus_t Descriptor::create(
} }
cnnlTensorDescriptor_t a, b, c; cnnlTensorDescriptor_t a, b, c;
cnnlCreateTensorDescriptor(&a); CHECK_BANG(cnnlCreateTensorDescriptor(&a));
cnnlCreateTensorDescriptor(&b); CHECK_BANG(cnnlCreateTensorDescriptor(&b));
cnnlCreateTensorDescriptor(&c); CHECK_BANG(cnnlCreateTensorDescriptor(&c));
setMatrixTensorEx(a, info.a_matrix, a_desc->dtype); CHECK_STATUS(setMatrixTensorEx(a, info.a_matrix, a_desc->dtype()));
setMatrixTensorEx(b, info.b_matrix, b_desc->dtype); CHECK_STATUS(setMatrixTensorEx(b, info.b_matrix, b_desc->dtype()));
setMatrixTensorEx(c, info.c_matrix, c_desc->dtype); CHECK_STATUS(setMatrixTensorEx(c, info.c_matrix, c_desc->dtype()));
cnnlMatMulDescriptor_t op; cnnlMatMulDescriptor_t op;
cnnlMatMulAlgo_t algo; cnnlMatMulAlgo_t algo;
cnnlMatMulHeuristicResult_t algoResult; cnnlMatMulHeuristicResult_t algoResult;
cnnlMatMulDescCreate(&op); CHECK_BANG(cnnlMatMulDescCreate(&op));
cnnlMatMulAlgoCreate(&algo); CHECK_BANG(cnnlMatMulAlgoCreate(&algo));
cnnlCreateMatMulHeuristicResult(&algoResult); CHECK_BANG(cnnlCreateMatMulHeuristicResult(&algoResult));
int32_t use_stride = true; int32_t use_stride = true;
cnnlSetMatMulDescAttr( CHECK_BANG(cnnlSetMatMulDescAttr(
op, op,
CNNL_MATMUL_USE_STRIDE, CNNL_MATMUL_USE_STRIDE,
&use_stride, &use_stride,
sizeof(int32_t)); sizeof(int32_t)));
int count = 0; int count = 0;
use_cnnl(handle->cnnl_handle_pool,
[&](cnnlHandle_t _handle) { CHECK_STATUS(
cnnlGetBatchMatMulAlgoHeuristic( handle->internal()->useCnnl(
_handle, (cnrtQueue_t) nullptr,
op, a, b, c, [&](cnnlHandle_t _handle) {
NULL, 1, &algoResult, &count); CHECK_BANG(
}); cnnlGetBatchMatMulAlgoHeuristic(
_handle,
op, a, b, c,
NULL, 1, &algoResult, &count));
return INFINI_STATUS_SUCCESS;
}));
size_t workspace_size; size_t workspace_size;
cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size); CHECK_BANG(cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size));
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, info, workspace_size, dtype, info, workspace_size,
new Opaque{ new Opaque{
op, op, algo, algoResult, a, b, c, handle->internal()},
algo,
algoResult,
a,
b,
c,
handle->cnnl_handle_pool},
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
...@@ -137,21 +136,22 @@ infiniStatus_t Descriptor::calculate( ...@@ -137,21 +136,22 @@ infiniStatus_t Descriptor::calculate(
if (_info.is_transed) { if (_info.is_transed) {
std::swap(a, b); std::swap(a, b);
} }
use_cnnl(_opaque->cnnl_handle_pool, CHECK_STATUS(_opaque->internal->useCnnl(
(cnrtQueue_t)stream, (cnrtQueue_t)stream,
[&](cnnlHandle_t handle) { [&](cnnlHandle_t handle) {
cnnlBatchMatMulBCast_v2( CHECK_BANG(cnnlBatchMatMulBCast_v2(
handle, handle,
_opaque->op, _opaque->op,
_opaque->algo, _opaque->algo,
&alpha, &alpha,
_opaque->a, a, _opaque->a, a,
_opaque->b, b, _opaque->b, b,
&beta, &beta,
_opaque->c, c, _opaque->c, c,
workspace, workspace,
workspace_size); workspace_size));
}); return INFINI_STATUS_SUCCESS;
}));
cnrtQueueSync((cnrtQueue_t)stream); cnrtQueueSync((cnrtQueue_t)stream);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
......
#include "infinirt_bang.h"
#include "../../utils.h"
#include "cnrt.h"
#define CHECK_BANGRT(RT_API) CHECK_INTERNAL(RT_API, cnrtSuccess)
namespace infinirt::bang {
infiniStatus_t getDeviceCount(int *count) {
CHECK_BANGRT(cnrtGetDeviceCount(count));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t setDevice(int device_id) {
CHECK_BANGRT(cnrtSetDevice(device_id));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t deviceSynchronize() {
CHECK_BANGRT(cnrtSyncDevice());
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) {
cnrtQueue_t queue;
CHECK_BANGRT(cnrtQueueCreate(&stream));
*stream_ptr = queue;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamDestroy(infinirtStream_t stream) {
CHECK_BANGRT(cnrtQueueDestroy((cnrtQueue_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamSynchronize(infinirtStream_t stream) {
CHECK_BANGRT(cnrtQueueSync((cnrtQueue_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
CHECK_BANGRT(cnrtQueueWaitNotifier((cnrtNotifier_t)event, (cnrtQueue_t)stream, 0));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) {
cnrtNotifier_t notifier;
CHECK_BANGRT(cnrtNotifierCreate(&notifier));
*event_ptr = notifier;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) {
CHECK_BANGRT(cnrtPlaceNotifier((cnrtNotifier_t)event, (cnrtQueue_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) {
auto status = cnrtQueryNotifier((cnrtQueue_t)stream);
if (status == cnrtSuccess) {
*status_ptr = INFINIRT_EVENT_COMPLETE;
} else if (status == cnrtErrorBusy) {
*status_ptr = INFINIRT_EVENT_NOT_READY;
} else {
CHECK_BANGRT(status);
}
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventSynchronize(infinirtEvent_t event) {
CHECK_BANGRT(cnrtWaitNotifier((cnrtNotifier_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventDestroy(infinirtEvent_t event) {
CHECK_BANGRT(cnrtNotifierDestroy((cnrtNotifier_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocDevice(void **p_ptr, size_t size) {
CHECK_BANGRT(cnrtMalloc(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocHost(void **p_ptr, size_t size) {
CHECK_BANGRT(cnrtHostMalloc(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeDevice(void *ptr) {
CHECK_BANGRT(cnrtFree(ptr));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeHost(void *ptr) {
CHECK_BANGRT(cnrtFreeHost(ptr));
return INFINI_STATUS_SUCCESS;
}
cnrtMemTransDir_t toBangMemcpyKind(infinirtMemcpyKind_t kind) {
switch (kind) {
case INFINIRT_MEMCPY_H2D:
return cnrtMemcpyHostToDev;
case INFINIRT_MEMCPY_D2H:
return cnrtMemcpyDevToHost;
case INFINIRT_MEMCPY_D2D:
return cnrtMemcpyDevToDev;
case INFINIRT_MEMCPY_H2H:
return cnrtMemcpyHostToHost;
default:
return cnrtMemcpyNoDirection;
}
}
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) {
CHECK_BANGRT(cnrtMemcpy(dst, src, size, toBangMemcpyKind(kind)));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) {
CHECK_BANGRT(cnrtMemcpyAsync_V2(dst, src, size, (cnrtQueue_t)stream, toBangMemcpyKind(kind)));
return INFINI_STATUS_SUCCESS;
}
// Does not support async malloc. Use blocking-style malloc instead
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
CHECK_BANGRT(cnrtMalloc(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
// Does not support async free. Use blocking-style free instead
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_BANGRT(cnrtFree(ptr));
return INFINI_STATUS_SUCCESS;
}
} // namespace infinirt::bang
#ifndef __INFINIRT_BANG_H__
#define __INFINIRT_BANG_H__
#include "../infinirt_impl.h"
namespace infinirt::bang {
#ifdef ENABLE_BANG_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::bang
#endif // __INFINIRT_BANG_H__
#include "infinirt.h" #include "infinirt.h"
#include "../utils.h" #include "../utils.h"
#include "ascend/infinirt_ascend.h" #include "ascend/infinirt_ascend.h"
#include "bang/infinirt_bang.h"
#include "cpu/infinirt_cpu.h" #include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh" #include "cuda/infinirt_cuda.cuh"
...@@ -51,6 +52,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ ...@@ -51,6 +52,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
case INFINI_DEVICE_NVIDIA: \ case INFINI_DEVICE_NVIDIA: \
_status = infinirt::cuda::API PARAMS; \ _status = infinirt::cuda::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_CAMBRICON: \
_status = infinirt::bang::API PARAMS; \
break; \
case INFINI_DEVICE_ASCEND: \ case INFINI_DEVICE_ASCEND: \
_status = infinirt::ascend::API PARAMS; \ _status = infinirt::ascend::API PARAMS; \
break; \ break; \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment