Commit 978da5ec authored by Zimin Li's avatar Zimin Li
Browse files

issue/89 format code, change use_cnnl to useCnnl, change async malloc and free...

issue/89 format code, change use_cnnl to useCnnl, change async malloc and free to use blocking versions, etc.
parent e14dd2af
#ifndef __INFINIOP_BANG_INTERNAL_H__
#define __INFINIOP_BANG_INTERNAL_H__
#include "../../../utils.h"
#include "../pool.h"
#include "bang_handle.h"
#include "cnnl.h"
#include "cnrt.h"
#include <functional>
#include "../../../utils.h"
namespace device::bang {
......@@ -17,7 +17,7 @@ class Handle::Internal {
using Fn = std::function<infiniStatus_t(T)>;
public:
infiniStatus_t use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const;
infiniStatus_t useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const;
};
cnnlDataType_t getCnnlDtype(infiniDtype_t dt);
......
#include "../../tensor.h"
#include "../pool.h"
#include "_internal.h"
#include "cnnl.h"
#include "common_bang.h"
#include "infiniop/tensor_descriptor.h"
#include <memory>
#include <vector>
#include "../../tensor.h"
#include "_internal.h"
#include "common_bang.h"
namespace device::bang {
......@@ -17,7 +17,7 @@ auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal;
}
infiniStatus_t Handle::Internal::use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const {
infiniStatus_t Handle::Internal::useCnnl(cnrtQueue_t queue, const Fn<cnnlHandle_t> &f) const {
auto handle = cnnl_handles.pop();
if (!handle) {
cnnlCreate(&(*handle));
......@@ -30,43 +30,43 @@ infiniStatus_t Handle::Internal::use_cnnl(cnrtQueue_t queue, const Fn<cnnlHandle
cnnlDataType_t getCnnlDtype(infiniDtype_t dt) {
switch (dt) {
case INFINI_DTYPE_F32:
return CNNL_DTYPE_FLOAT;
case INFINI_DTYPE_F64:
return CNNL_DTYPE_DOUBLE;
case INFINI_DTYPE_F16:
return CNNL_DTYPE_HALF;
case INFINI_DTYPE_I8:
return CNNL_DTYPE_INT8;
case INFINI_DTYPE_I32:
return CNNL_DTYPE_INT32;
case INFINI_DTYPE_U8:
return CNNL_DTYPE_UINT8;
case INFINI_DTYPE_BF16:
return CNNL_DTYPE_BFLOAT16;
case INFINI_DTYPE_I64:
return CNNL_DTYPE_INT64;
default:
return CNNL_DTYPE_INVALID;
case INFINI_DTYPE_F32:
return CNNL_DTYPE_FLOAT;
case INFINI_DTYPE_F64:
return CNNL_DTYPE_DOUBLE;
case INFINI_DTYPE_F16:
return CNNL_DTYPE_HALF;
case INFINI_DTYPE_I8:
return CNNL_DTYPE_INT8;
case INFINI_DTYPE_I32:
return CNNL_DTYPE_INT32;
case INFINI_DTYPE_U8:
return CNNL_DTYPE_UINT8;
case INFINI_DTYPE_BF16:
return CNNL_DTYPE_BFLOAT16;
case INFINI_DTYPE_I64:
return CNNL_DTYPE_INT64;
default:
return CNNL_DTYPE_INVALID;
}
}
// set cnnl tensor descriptor without strides11
inline infiniStatus_t setCnnlTensor(cnnlTensorDescriptor_t desc,
const InfiniopTensorDescriptor* layout) {
const InfiniopTensorDescriptor *layout) {
std::vector<int> dims(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) {
dims[i] = static_cast<int>(layout->shape()[i]);
}
CHECK_BANG(cnnlSetTensorDescriptor(desc, CNNL_LAYOUT_ARRAY,
getCnnlDtype(layout->dtype()), dims.size(),
dims.data()));
getCnnlDtype(layout->dtype()), dims.size(),
dims.data()));
return INFINI_STATUS_SUCCESS;
}
// set cnnl tensor descriptor with strides
inline infiniStatus_t setCnnlTensorEx(cnnlTensorDescriptor_t desc,
const InfiniopTensorDescriptor* layout) {
const InfiniopTensorDescriptor *layout) {
std::vector<int> dim_size(layout->ndim()), dim_stride(layout->ndim());
for (size_t i = 0; i < layout->ndim(); i++) {
dim_size[i] = static_cast<int>(layout->shape()[i]);
......
#include "matmul_bang.h"
#include "../../../devices/bang/_internal.h"
#include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h"
#include "../../../devices/bang/_internal.h"
#include <cnnl_extra.h>
namespace op::matmul::bang {
......@@ -101,16 +101,15 @@ infiniStatus_t Descriptor::create(
sizeof(int32_t));
int count = 0;
CHECK_STATUS(handle->internal()->use_cnnl((cnrtQueue_t)nullptr,
[&](cnnlHandle_t _handle) {
CHECK_BANG(
cnnlGetBatchMatMulAlgoHeuristic(
_handle,
op, a, b, c,
NULL, 1, &algoResult, &count)
);
return INFINI_STATUS_SUCCESS;
}));
CHECK_STATUS(handle->internal()->useCnnl((cnrtQueue_t) nullptr,
[&](cnnlHandle_t _handle) {
CHECK_BANG(
cnnlGetBatchMatMulAlgoHeuristic(
_handle,
op, a, b, c,
NULL, 1, &algoResult, &count));
return INFINI_STATUS_SUCCESS;
}));
size_t workspace_size;
CHECK_BANG(cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size));
......@@ -118,8 +117,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor(
dtype, info, workspace_size,
new Opaque{
op, algo, algoResult, a, b, c, handle->internal()
},
op, algo, algoResult, a, b, c, handle->internal()},
handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}
......@@ -137,7 +135,7 @@ infiniStatus_t Descriptor::calculate(
if (_info.is_transed) {
std::swap(a, b);
}
CHECK_STATUS(_opaque->internal->use_cnnl(
CHECK_STATUS(_opaque->internal->useCnnl(
(cnrtQueue_t)stream,
[&](cnnlHandle_t handle) {
CHECK_BANG(cnnlBatchMatMulBCast_v2(
......@@ -151,7 +149,7 @@ infiniStatus_t Descriptor::calculate(
_opaque->c, c,
workspace,
workspace_size));
return INFINI_STATUS_SUCCESS;
return INFINI_STATUS_SUCCESS;
}));
return INFINI_STATUS_SUCCESS;
......
#include "../../utils.h"
#include "infinirt_bang.h"
#include "../../utils.h"
#include "cnrt.h"
#define CHECK_BANGRT(RT_API) CHECK_INTERNAL(RT_API, cnrtSuccess)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment