Commit 52536c5a authored by YdrMaster's avatar YdrMaster
Browse files

issue/63/style: 整理代码,优化风格


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent f23aa206
#include "matmul_ascend.h"
#include "../../../devices/ascend/ascend_handle.h"
#include "../../../devices/ascend/tensor_aclnn.h"
#include "../../utils.h"
#include <acl/acl_base.h>
......@@ -10,16 +11,16 @@ namespace matmul::ascend {
struct Descriptor::Opaque {
mutable aclOpExecutor *executor;
aclnnTensorDescriptor_t cDesc, aDesc, bDesc;
aclnnTensorDescriptor_t c, a, b;
// cubeMathType
// see doc:
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha002/apiref/appdevgapi/context/aclnnBatchMatMul.md
int8_t mt;
~Opaque() {
delete cDesc;
delete aDesc;
delete bDesc;
delete c;
delete a;
delete b;
aclDestroyAclOpExecutor(executor);
}
};
......@@ -29,76 +30,77 @@ Descriptor::~Descriptor() {
}
infiniopStatus_t Descriptor::create(
infiniopAscendHandle_t handle,
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
infiniDtype_t dtype = c_desc->dtype;
auto handle = reinterpret_cast<infiniopAscendHandle_t>(handle_);
auto dtype = c_desc->dtype;
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINIOP_STATUS_BAD_TENSOR_DTYPE;
}
infiniopStatus_t status;
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
auto cDesc = new aclnnTensorDescriptor(),
aDesc = new aclnnTensorDescriptor(),
bDesc = new aclnnTensorDescriptor();
auto c = new aclnnTensorDescriptor(),
a = new aclnnTensorDescriptor(),
b = new aclnnTensorDescriptor();
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// operation
CHECK_STATUS(cDesc->setDescriptor(
CHECK_STATUS(c->setDescriptor(
toAclDataType(c_desc->dtype),
{static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(info.c_matrix.cols)},
{info.c_matrix.row_stride, info.c_matrix.col_stride}),
{static_cast<int64_t>(_info.c_matrix.rows),
static_cast<int64_t>(_info.c_matrix.cols)},
{_info.c_matrix.row_stride, _info.c_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(aDesc->setDescriptor(
CHECK_STATUS(a->setDescriptor(
toAclDataType(a_desc->dtype),
{static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(info.a_matrix.cols)},
{info.a_matrix.row_stride, info.a_matrix.col_stride}),
{static_cast<int64_t>(_info.a_matrix.rows),
static_cast<int64_t>(_info.a_matrix.cols)},
{_info.a_matrix.row_stride, _info.a_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(bDesc->setDescriptor(
CHECK_STATUS(b->setDescriptor(
toAclDataType(b_desc->dtype),
{static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(info.b_matrix.cols)},
{info.b_matrix.row_stride, info.b_matrix.col_stride}),
{static_cast<int64_t>(_info.b_matrix.rows),
static_cast<int64_t>(_info.b_matrix.cols)},
{_info.b_matrix.row_stride, _info.b_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(cDesc->createTensor(), INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(aDesc->createTensor(), INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(bDesc->createTensor(), INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(c->createTensor(), INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(a->createTensor(), INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(b->createTensor(), INFINIOP_STATUS_SUCCESS);
auto tc = cDesc->t,
ta = aDesc->t,
tb = bDesc->t;
auto tc = c->t,
ta = a->t,
tb = b->t;
aclOpExecutor *executor;
size_t workspaceSize;
size_t workspace_size;
// aclnnGemm support C = alpha * A @ B + beta * C
// see
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
// use alpha = 0.5, beta = 0.5 temporarily
int8_t mt = 1;
auto ret = aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspaceSize, &executor);
auto ret = aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspace_size, &executor);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINIOP_STATUS_INTERNAL_ERROR);
aclSetAclOpExecutorRepeatable(executor);
*desc_ptr = new Descriptor(
dtype, info, workspaceSize,
dtype, _info, workspace_size,
new Opaque{
executor,
cDesc,
aDesc,
bDesc,
c,
a,
b,
mt,
},
handle->device, handle->device_id);
......@@ -115,28 +117,29 @@ infiniopStatus_t Descriptor::calculate(
float alpha,
void *stream) const {
auto tc = _opaque->cDesc->t,
ta = _opaque->aDesc->t,
tb = _opaque->bDesc->t;
auto tc = _opaque->c->t,
ta = _opaque->a->t,
tb = _opaque->b->t;
size_t workspaceSize;
size_t workspace_size;
auto ret = aclnnGemmGetWorkspaceSize(
ta, tb, tc, alpha, beta, 0, 0, tc, _opaque->mt,
&workspaceSize, &(_opaque->executor));
&workspace_size, &(_opaque->executor));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINIOP_STATUS_INTERNAL_ERROR);
if (workspaceSize_ < workspaceSize) {
if (workspaceSize_ < workspace_size) {
return INFINIOP_STATUS_INSUFFICIENT_WORKSPACE;
}
aclSetAclOpExecutorRepeatable(_opaque->executor);
for (size_t i = 0; i < info.batch; ++i) {
AclSetTensorAddr(_opaque->executor, 0, ta, ((char *)a) + i * info.a_matrix.stride * infiniSizeof(dtype));
AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * info.b_matrix.stride * infiniSizeof(dtype));
AclSetTensorAddr(_opaque->executor, 2, tc, ((char *)c) + i * info.c_matrix.stride * infiniSizeof(dtype));
AclSetTensorAddr(_opaque->executor, 3, tc, ((char *)c) + i * info.c_matrix.stride * infiniSizeof(dtype));
ret = aclnnGemm(workspace, workspaceSize, _opaque->executor, stream);
auto unit = infiniSizeof(_dtype);
for (size_t i = 0; i < _info.batch; ++i) {
AclSetTensorAddr(_opaque->executor, 0, ta, ((char *)a) + i * _info.a_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 2, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 3, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
ret = aclnnGemm(workspace, workspace_size, _opaque->executor, stream);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret);
return INFINIOP_STATUS_INTERNAL_ERROR);
......
#ifndef __MATMUL_ASCEND_H__
#define __MATMUL_ASCEND_H__
#include "../../../devices/ascend/ascend_handle.h"
#include "../matmul.h"
DESCRIPTOR(ascend, infiniopAscendHandle_t)
DESCRIPTOR(ascend)
#endif // __MATMUL_ASCEND_H__
#include "matmul_bang.h"
#include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h"
#include "../../utils.h"
#include <cnnl_extra.h>
......@@ -6,17 +7,17 @@
namespace matmul::bang {
struct Descriptor::Opaque {
cnnlMatMulDescriptor_t opDesc;
cnnlMatMulDescriptor_t op;
cnnlMatMulAlgo_t algo;
cnnlMatMulHeuristicResult_t algoResult;
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
cnnlTensorDescriptor_t a, b, c;
std::shared_ptr<Pool<cnnlHandle_t>> cnnl_handle_pool;
~Opaque() {
cnnlDestroyTensorDescriptor(aDesc);
cnnlDestroyTensorDescriptor(bDesc);
cnnlDestroyTensorDescriptor(cDesc);
cnnlMatMulDescDestroy(opDesc);
cnnlDestroyTensorDescriptor(a);
cnnlDestroyTensorDescriptor(b);
cnnlDestroyTensorDescriptor(c);
cnnlMatMulDescDestroy(op);
cnnlMatMulAlgoDestroy(algo);
cnnlDestroyMatMulHeuristicResult(algoResult);
}
......@@ -59,41 +60,42 @@ Descriptor::~Descriptor() {
}
infiniopStatus_t Descriptor::create(
infiniopBangHandle_t handle,
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
infiniDtype_t dtype = c_desc->dtype;
auto handle = reinterpret_cast<infiniopBangHandle_t>(handle_);
auto dtype = c_desc->dtype;
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINIOP_STATUS_BAD_TENSOR_DTYPE;
}
infiniopStatus_t status;
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
cnnlTensorDescriptor_t aDesc, bDesc, cDesc;
cnnlCreateTensorDescriptor(&aDesc);
cnnlCreateTensorDescriptor(&bDesc);
cnnlCreateTensorDescriptor(&cDesc);
cnnlTensorDescriptor_t a, b, c;
cnnlCreateTensorDescriptor(&a);
cnnlCreateTensorDescriptor(&b);
cnnlCreateTensorDescriptor(&c);
setMatrixTensorEx(aDesc, info.a_matrix, a_desc->dtype);
setMatrixTensorEx(bDesc, info.b_matrix, b_desc->dtype);
setMatrixTensorEx(cDesc, info.c_matrix, c_desc->dtype);
setMatrixTensorEx(a, _info.a_matrix, a_desc->dtype);
setMatrixTensorEx(b, _info.b_matrix, b_desc->dtype);
setMatrixTensorEx(c, _info.c_matrix, c_desc->dtype);
cnnlMatMulDescriptor_t opDesc;
cnnlMatMulDescriptor_t op;
cnnlMatMulAlgo_t algo;
cnnlMatMulHeuristicResult_t algoResult;
cnnlMatMulDescCreate(&opDesc);
cnnlMatMulDescCreate(&op);
cnnlMatMulAlgoCreate(&algo);
cnnlCreateMatMulHeuristicResult(&algoResult);
int32_t use_stride = true;
cnnlSetMatMulDescAttr(
opDesc,
op,
CNNL_MATMUL_USE_STRIDE,
&use_stride,
sizeof(int32_t));
......@@ -102,7 +104,7 @@ infiniopStatus_t Descriptor::create(
[&](cnnlHandle_t _handle) {
cnnlGetBatchMatMulAlgoHeuristic(
_handle,
opDesc, aDesc, bDesc, cDesc,
op, a, b, c,
NULL, 1, &algoResult, &count);
});
......@@ -110,14 +112,14 @@ infiniopStatus_t Descriptor::create(
cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size);
*desc_ptr = new Descriptor(
dtype, info, workspace_size,
dtype, _info, workspace_size,
new Opaque{
opDesc,
op,
algo,
algoResult,
aDesc,
bDesc,
cDesc,
a,
b,
c,
handle->cnnl_handle_pool},
handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS;
......@@ -133,7 +135,7 @@ infiniopStatus_t Descriptor::calculate(
float alpha,
void *stream) const {
if (info.is_transed) {
if (_info.is_transed) {
std::swap(a, b);
}
use_cnnl(_opaque->cnnl_handle_pool,
......@@ -141,13 +143,13 @@ infiniopStatus_t Descriptor::calculate(
[&](cnnlHandle_t handle) {
cnnlBatchMatMulBCast_v2(
handle,
_opaque->opDesc,
_opaque->op,
_opaque->algo,
&alpha,
_opaque->aDesc, a,
_opaque->bDesc, b,
_opaque->a, a,
_opaque->b, b,
&beta,
_opaque->cDesc, c,
_opaque->c, c,
workspace,
workspace_size);
});
......
#ifndef __MATMUL_BANG_H__
#define __MATMUL_BANG_H__
#include "../../../devices/bang/bang_handle.h"
#include "../matmul.h"
DESCRIPTOR(bang, infiniopBangHandle_t)
DESCRIPTOR(bang)
#endif // __MATMUL_BANG_H__
#include "./matmul_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include "../../../devices/cpu/cpu_handle.h"
#include <iostream>
namespace matmul::cpu {
......@@ -7,25 +8,26 @@ namespace matmul::cpu {
Descriptor::~Descriptor() = default;
infiniopStatus_t Descriptor::create(
infiniopCpuHandle_t handle,
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
infiniDtype_t dtype = c_desc->dtype;
auto handle = reinterpret_cast<infiniopCpuHandle_t>(handle_);
auto dtype = c_desc->dtype;
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINIOP_STATUS_BAD_TENSOR_DTYPE;
}
infiniopStatus_t status;
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
*desc_ptr = new Descriptor(
dtype, info, 0,
dtype, _info, 0,
nullptr,
handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS;
......@@ -33,26 +35,24 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata>
void calculate(
Descriptor const *desc,
MatmulInfo const &_info,
void *c,
float beta,
void const *a,
void const *b,
float alpha) {
auto info = desc->info;
if (info.is_transed) {
if (_info.is_transed) {
std::swap(a, b);
}
for (size_t i = 0; i < info.batch; ++i) {
for (size_t m_ = 0; m_ < info.m; ++m_) {
for (size_t n_ = 0; n_ < info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride;
for (size_t i = 0; i < _info.batch; ++i) {
for (size_t m_ = 0; m_ < _info.m; ++m_) {
for (size_t n_ = 0; n_ < _info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + i * _info.c_matrix.stride + m_ * _info.c_matrix.row_stride + n_ * _info.c_matrix.col_stride;
float sum = 0;
for (size_t k_ = 0; k_ < info.k; ++k_) {
auto a_ = reinterpret_cast<Tdata const *>(a) + i * info.a_matrix.stride + m_ * info.a_matrix.row_stride + k_ * info.a_matrix.col_stride;
auto b_ = reinterpret_cast<Tdata const *>(b) + i * info.b_matrix.stride + n_ * info.b_matrix.col_stride + k_ * info.b_matrix.row_stride;
for (size_t k_ = 0; k_ < _info.k; ++k_) {
auto a_ = reinterpret_cast<Tdata const *>(a) + i * _info.a_matrix.stride + m_ * _info.a_matrix.row_stride + k_ * _info.a_matrix.col_stride;
auto b_ = reinterpret_cast<Tdata const *>(b) + i * _info.b_matrix.stride + n_ * _info.b_matrix.col_stride + k_ * _info.b_matrix.row_stride;
if constexpr (std::is_same<Tdata, uint16_t>::value) {
sum += f16_to_f32(*a_) * f16_to_f32(*b_);
} else {
......@@ -83,13 +83,13 @@ infiniopStatus_t Descriptor::calculate(
float alpha,
void *stream) const {
switch (dtype) {
switch (_dtype) {
case INFINI_DTYPE_F16:
cpu::calculate<uint16_t>(this, c, beta, a, b, alpha);
cpu::calculate<uint16_t>(_info, c, beta, a, b, alpha);
return INFINIOP_STATUS_SUCCESS;
case INFINI_DTYPE_F32:
cpu::calculate<float>(this, c, beta, a, b, alpha);
cpu::calculate<float>(_info, c, beta, a, b, alpha);
return INFINIOP_STATUS_SUCCESS;
default:
......
#ifndef __MATMUL_CPU_H__
#define __MATMUL_CPU_H__
#include "../../../devices/cpu/cpu_handle.h"
#include "../matmul.h"
DESCRIPTOR(cpu, infiniopCpuHandle_t)
DESCRIPTOR(cpu)
#endif // __MATMUL_CPU_H__
#include "../../../devices/cuda/common_cuda.cuh"
#include "../../utils.h"
#include "matmul_cuda.cuh"
......@@ -12,25 +13,26 @@ Descriptor::~Descriptor() {
}
infiniopStatus_t Descriptor::create(
infiniopCudaHandle_t handle,
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
infiniDtype_t dtype = c_desc->dtype;
auto handle = reinterpret_cast<infiniopCudaHandle_t>(handle_);
auto dtype = c_desc->dtype;
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINIOP_STATUS_BAD_TENSOR_DTYPE;
}
infiniopStatus_t status;
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
*desc_ptr = new Descriptor(
dtype, info, 0,
dtype, _info, 0,
new Opaque{handle->cublas_handle_pool},
handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS;
......@@ -38,7 +40,7 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata>
infiniopStatus_t calculate(
MatmulInfo const &info,
MatmulInfo const &_info,
std::shared_ptr<Pool<cublasHandle_t>> &cublas_handle_pool,
void *c,
float beta,
......@@ -47,7 +49,7 @@ infiniopStatus_t calculate(
float alpha,
cudaStream_t stream) {
if (info.is_transed) {
if (_info.is_transed) {
std::swap(a, b);
}
......@@ -65,8 +67,8 @@ infiniopStatus_t calculate(
#endif
}
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_a = _info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = _info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(cublas_handle_pool,
stream,
......@@ -75,24 +77,24 @@ infiniopStatus_t calculate(
handle,
op_a,
op_b,
static_cast<int>(info.m),
static_cast<int>(info.n),
static_cast<int>(info.k),
static_cast<int>(_info.m),
static_cast<int>(_info.n),
static_cast<int>(_info.k),
&alpha,
a,
a_type,
static_cast<int>(info.a_matrix.ld()),
info.a_matrix.stride,
static_cast<int>(_info.a_matrix.ld()),
_info.a_matrix.stride,
b,
b_type,
static_cast<int>(info.b_matrix.ld()),
info.b_matrix.stride,
static_cast<int>(_info.b_matrix.ld()),
_info.b_matrix.stride,
&beta,
c,
c_type,
static_cast<int>(info.c_matrix.ld()),
info.c_matrix.stride,
static_cast<int>(info.batch),
static_cast<int>(_info.c_matrix.ld()),
_info.c_matrix.stride,
static_cast<int>(_info.batch),
compute_type,
CUBLAS_GEMM_DEFAULT_TENSOR_OP);
});
......@@ -109,13 +111,13 @@ infiniopStatus_t Descriptor::calculate(
float alpha,
void *stream) const {
switch (dtype) {
switch (_dtype) {
case INFINI_DTYPE_F16:
cuda::calculate<uint16_t>(info, _opaque->cublas_handle_pool, c, beta, a, b, alpha, (cudaStream_t)stream);
cuda::calculate<uint16_t>(_info, _opaque->cublas_handle_pool, c, beta, a, b, alpha, (cudaStream_t)stream);
return INFINIOP_STATUS_SUCCESS;
case INFINI_DTYPE_F32:
cuda::calculate<float>(info, _opaque->cublas_handle_pool, c, beta, a, b, alpha, (cudaStream_t)stream);
cuda::calculate<float>(_info, _opaque->cublas_handle_pool, c, beta, a, b, alpha, (cudaStream_t)stream);
return INFINIOP_STATUS_SUCCESS;
default:
......
#ifndef __MATMUL_CUDA_CUH__
#define __MATMUL_CUDA_CUH__
#include "../../../devices/cuda/cuda_handle.h"
#include "../matmul.h"
DESCRIPTOR(cuda, infiniopCudaHandle_t)
DESCRIPTOR(cuda)
#endif // __MATMUL_CUDA_CUH__
......@@ -2,37 +2,38 @@
#define __MATMUL_H__
#include "blas.h"
#include "infiniop/handle.h"
#include "infiniop/operator.h"
#define DESCRIPTOR(NAMESPACE, HANDLE) \
#define DESCRIPTOR(NAMESPACE) \
\
namespace matmul::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
infiniDtype_t _dtype; \
MatmulInfo _info; \
\
Descriptor( \
infiniDtype_t dtype_, \
MatmulInfo info_, \
infiniDtype_t dtype, \
MatmulInfo info, \
size_t workspace_size_, \
Opaque *opaque, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
dtype(dtype_), \
info(info_), \
_dtype(dtype), \
_info(info), \
workspace_size(workspace_size_) {} \
\
public: \
infiniDtype_t dtype; \
MatmulInfo info; \
size_t workspace_size; \
\
~Descriptor(); \
\
static infiniopStatus_t create( \
HANDLE handle, \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t c_desc, \
infiniopTensorDescriptor_t a_desc, \
......
......@@ -20,10 +20,10 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
#define CREATE(CASE, HANDLE, NAMESPACE) \
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return matmul::NAMESPACE::Descriptor::create( \
reinterpret_cast<HANDLE>(handle), \
handle, \
reinterpret_cast<matmul::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc, \
a_desc, \
......@@ -32,16 +32,16 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
switch (handle->device) {
#ifdef ENABLE_CPU_API
CREATE(INFINI_DEVICE_CPU, infiniopCpuHandle_t, cpu);
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_CUDA_API
CREATE(INFINI_DEVICE_NVIDIA, infiniopCudaHandle_t, cuda);
CREATE(INFINI_DEVICE_NVIDIA, cuda);
#endif
#ifdef ENABLE_CAMBRICON_API
CREATE(INFINI_DEVICE_CAMBRICON, infiniopBangHandle_t, bang);
CREATE(INFINI_DEVICE_CAMBRICON, bang);
#endif
#ifdef ENABLE_ASCEND_API
CREATE(INFINI_DEVICE_ASCEND, infiniopAscendHandle_t, ascend);
CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif
default:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment