Commit 978da5ec authored by Zimin Li's avatar Zimin Li
Browse files

issue/89 format code, change use_cnnl to useCnnl, change async malloc and free...

issue/89 format code, change use_cnnl to useCnnl, change async malloc and free to use blocking versions, etc.
parent e14dd2af
#ifndef __INFINIOP_BANG_INTERNAL_H__ #ifndef __INFINIOP_BANG_INTERNAL_H__
#define __INFINIOP_BANG_INTERNAL_H__ #define __INFINIOP_BANG_INTERNAL_H__
#include "../../../utils.h"
#include "../pool.h" #include "../pool.h"
#include "bang_handle.h" #include "bang_handle.h"
#include "cnnl.h" #include "cnnl.h"
#include "cnrt.h" #include "cnrt.h"
#include <functional> #include <functional>
#include "../../../utils.h"
namespace device::bang { namespace device::bang {
...@@ -17,7 +17,7 @@ class Handle::Internal { ...@@ -17,7 +17,7 @@ class Handle::Internal {
using Fn = std::function<infiniStatus_t(T)>; using Fn = std::function<infiniStatus_t(T)>;
public: public:
infiniStatus_t use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const; infiniStatus_t useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const;
}; };
cnnlDataType_t getCnnlDtype(infiniDtype_t dt); cnnlDataType_t getCnnlDtype(infiniDtype_t dt);
......
#include "../../tensor.h"
#include "../pool.h" #include "../pool.h"
#include "_internal.h"
#include "cnnl.h" #include "cnnl.h"
#include "common_bang.h"
#include "infiniop/tensor_descriptor.h" #include "infiniop/tensor_descriptor.h"
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "../../tensor.h"
#include "_internal.h"
#include "common_bang.h"
namespace device::bang { namespace device::bang {
...@@ -17,7 +17,7 @@ auto Handle::internal() const -> const std::shared_ptr<Internal> & { ...@@ -17,7 +17,7 @@ auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal; return _internal;
} }
infiniStatus_t Handle::Internal::use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const { infiniStatus_t Handle::Internal::useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const {
auto handle = cnnl_handles.pop(); auto handle = cnnl_handles.pop();
if (!handle) { if (!handle) {
cnnlCreate(&(*handle)); cnnlCreate(&(*handle));
...@@ -30,43 +30,43 @@ infiniStatus_t Handle::Internal::use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle ...@@ -30,43 +30,43 @@ infiniStatus_t Handle::Internal::use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle
cnnlDataType_t getCnnlDtype(infiniDtype_t dt) { cnnlDataType_t getCnnlDtype(infiniDtype_t dt) {
switch (dt) { switch (dt) {
case INFINI_DTYPE_F32: case INFINI_DTYPE_F32:
return CNNL_DTYPE_FLOAT; return CNNL_DTYPE_FLOAT;
case INFINI_DTYPE_F64: case INFINI_DTYPE_F64:
return CNNL_DTYPE_DOUBLE; return CNNL_DTYPE_DOUBLE;
case INFINI_DTYPE_F16: case INFINI_DTYPE_F16:
return CNNL_DTYPE_HALF; return CNNL_DTYPE_HALF;
case INFINI_DTYPE_I8: case INFINI_DTYPE_I8:
return CNNL_DTYPE_INT8; return CNNL_DTYPE_INT8;
case INFINI_DTYPE_I32: case INFINI_DTYPE_I32:
return CNNL_DTYPE_INT32; return CNNL_DTYPE_INT32;
case INFINI_DTYPE_U8: case INFINI_DTYPE_U8:
return CNNL_DTYPE_UINT8; return CNNL_DTYPE_UINT8;
case INFINI_DTYPE_BF16: case INFINI_DTYPE_BF16:
return CNNL_DTYPE_BFLOAT16; return CNNL_DTYPE_BFLOAT16;
case INFINI_DTYPE_I64: case INFINI_DTYPE_I64:
return CNNL_DTYPE_INT64; return CNNL_DTYPE_INT64;
default: default:
return CNNL_DTYPE_INVALID; return CNNL_DTYPE_INVALID;
} }
} }
// set cnnl tensor descriptor without strides11 // set cnnl tensor descriptor without strides11
inline infiniStatus_t setCnnlTensor(cnnlTensorDescriptor_t desc, inline infiniStatus_t setCnnlTensor(cnnlTensorDescriptor_t desc,
const InfiniopTensorDescriptor* layout) { const InfiniopTensorDescriptor *layout) {
std::vector<int> dims(layout->ndim()); std::vector<int> dims(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) { for (size_t i = 0; i < layout->ndim(); i++) {
dims[i] = static_cast<int>(layout->shape()[i]); dims[i] = static_cast<int>(layout->shape()[i]);
} }
CHECK_BANG(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_ARRAY, CHECK_BANG(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_ARRAY,
getCnnlDtype(layout->dtype()), dims.size(), getCnnlDtype(layout->dtype()), dims.size(),
dims.data())); dims.data()));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
// set cnnl tensor descriptor with strides // set cnnl tensor descriptor with strides
inline infiniStatus_t setCnnlTensorEx(cnnlTensorDescriptor_t desc, inline infiniStatus_t setCnnlTensorEx(cnnlTensorDescriptor_t desc,
const InfiniopTensorDescriptor* layout) { const InfiniopTensorDescriptor *layout) {
std::vector<int> dim_size(layout->ndim()), dim_stride(layout->ndim()); std::vector<int> dim_size(layout->ndim()), dim_stride(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) { for (size_t i = 0; i < layout->ndim(); i++) {
dim_size[i] = static_cast<int>(layout->shape()[i]); dim_size[i] = static_cast<int>(layout->shape()[i]);
......
#include "matmul_bang.h" #include "matmul_bang.h"
#include "../../../devices/bang/_internal.h"
#include "../../../devices/bang/bang_handle.h" #include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h" #include "../../../devices/bang/common_bang.h"
#include "../../../devices/bang/_internal.h"
#include <cnnl_extra.h> #include <cnnl_extra.h>
namespace op::matmul::bang { namespace op::matmul::bang {
...@@ -101,16 +101,15 @@ infiniStatus_t Descriptor::create( ...@@ -101,16 +101,15 @@ infiniStatus_t Descriptor::create(
sizeof(int32_t)); sizeof(int32_t));
int count = 0; int count = 0;
CHECK_STATUS(handle->internal()->use_cnnl((cnrtQueue_t)nullptr, CHECK_STATUS(handle->internal()->useCnnl((cnrtQueue_t) nullptr,
[&](cnnlHandle_t _handle) { [&](cnnlHandle_t _handle) {
CHECK_BANG( CHECK_BANG(
cnnlGetBatchMatMulAlgoHeuristic( cnnlGetBatchMatMulAlgoHeuristic(
_handle, _handle,
op, a, b, c, op, a, b, c,
NULL, 1, &algoResult, &count) NULL, 1, &algoResult, &count));
); return INFINI_STATUS_SUCCESS;
return INFINI_STATUS_SUCCESS; }));
}));
size_t workspace_size; size_t workspace_size;
CHECK_BANG(cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size)); CHECK_BANG(cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size));
...@@ -118,8 +117,7 @@ infiniStatus_t Descriptor::create( ...@@ -118,8 +117,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, info, workspace_size, dtype, info, workspace_size,
new Opaque{ new Opaque{
op, algo, algoResult, a, b, c, handle->internal() op, algo, algoResult, a, b, c, handle->internal()},
},
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
...@@ -137,7 +135,7 @@ infiniStatus_t Descriptor::calculate( ...@@ -137,7 +135,7 @@ infiniStatus_t Descriptor::calculate(
if (_info.is_transed) { if (_info.is_transed) {
std::swap(a, b); std::swap(a, b);
} }
CHECK_STATUS(_opaque->internal->use_cnnl( CHECK_STATUS(_opaque->internal->useCnnl(
(cnrtQueue_t)stream, (cnrtQueue_t)stream,
[&](cnnlHandle_t handle) { [&](cnnlHandle_t handle) {
CHECK_BANG(cnnlBatchMatMulBCast_v2( CHECK_BANG(cnnlBatchMatMulBCast_v2(
...@@ -151,7 +149,7 @@ infiniStatus_t Descriptor::calculate( ...@@ -151,7 +149,7 @@ infiniStatus_t Descriptor::calculate(
_opaque->c, c, _opaque->c, c,
workspace, workspace,
workspace_size)); workspace_size));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
})); }));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
......
#include "../../utils.h"
#include "infinirt_bang.h" #include "infinirt_bang.h"
#include "../../utils.h"
#include "cnrt.h" #include "cnrt.h"
#define CHECK_BANGRT(RT_API) CHECK_INTERNAL(RT_API, cnrtSuccess) #define CHECK_BANGRT(RT_API) CHECK_INTERNAL(RT_API, cnrtSuccess)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment