Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
52536c5a
Commit
52536c5a
authored
Feb 20, 2025
by
YdrMaster
Browse files
issue/63/style: 整理代码,优化风格
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
f23aa206
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
141 additions
and
137 deletions
+141
-137
src/infiniop/ops/matmul/ascend/matmul_ascend.cc
src/infiniop/ops/matmul/ascend/matmul_ascend.cc
+49
-46
src/infiniop/ops/matmul/ascend/matmul_ascend.h
src/infiniop/ops/matmul/ascend/matmul_ascend.h
+1
-2
src/infiniop/ops/matmul/bang/matmul_bang.cc
src/infiniop/ops/matmul/bang/matmul_bang.cc
+32
-30
src/infiniop/ops/matmul/bang/matmul_bang.h
src/infiniop/ops/matmul/bang/matmul_bang.h
+1
-2
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+18
-18
src/infiniop/ops/matmul/cpu/matmul_cpu.h
src/infiniop/ops/matmul/cpu/matmul_cpu.h
+1
-2
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
+23
-21
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
+1
-2
src/infiniop/ops/matmul/matmul.h
src/infiniop/ops/matmul/matmul.h
+9
-8
src/infiniop/ops/matmul/operator.cc
src/infiniop/ops/matmul/operator.cc
+6
-6
No files found.
src/infiniop/ops/matmul/ascend/matmul_ascend.cc
View file @
52536c5a
#include "matmul_ascend.h"
#include "matmul_ascend.h"
#include "../../../devices/ascend/ascend_handle.h"
#include "../../../devices/ascend/tensor_aclnn.h"
#include "../../../devices/ascend/tensor_aclnn.h"
#include "../../utils.h"
#include "../../utils.h"
#include <acl/acl_base.h>
#include <acl/acl_base.h>
...
@@ -10,16 +11,16 @@ namespace matmul::ascend {
...
@@ -10,16 +11,16 @@ namespace matmul::ascend {
struct
Descriptor
::
Opaque
{
struct
Descriptor
::
Opaque
{
mutable
aclOpExecutor
*
executor
;
mutable
aclOpExecutor
*
executor
;
aclnnTensorDescriptor_t
c
Desc
,
aDesc
,
bDesc
;
aclnnTensorDescriptor_t
c
,
a
,
b
;
// cubeMathType
// cubeMathType
// see doc:
// see doc:
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha002/apiref/appdevgapi/context/aclnnBatchMatMul.md
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha002/apiref/appdevgapi/context/aclnnBatchMatMul.md
int8_t
mt
;
int8_t
mt
;
~
Opaque
()
{
~
Opaque
()
{
delete
cDes
c
;
delete
c
;
delete
a
Desc
;
delete
a
;
delete
b
Desc
;
delete
b
;
aclDestroyAclOpExecutor
(
executor
);
aclDestroyAclOpExecutor
(
executor
);
}
}
};
};
...
@@ -29,76 +30,77 @@ Descriptor::~Descriptor() {
...
@@ -29,76 +30,77 @@ Descriptor::~Descriptor() {
}
}
infiniopStatus_t
Descriptor
::
create
(
infiniopStatus_t
Descriptor
::
create
(
infiniop
Ascend
Handle_t
handle
,
infiniopHandle_t
handle
_
,
Descriptor
**
desc_ptr
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
infiniopTensorDescriptor_t
b_desc
)
{
infiniDtype_t
dtype
=
c_desc
->
dtype
;
auto
handle
=
reinterpret_cast
<
infiniopAscendHandle_t
>
(
handle_
);
auto
dtype
=
c_desc
->
dtype
;
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
}
}
infiniopStatus_t
status
;
infiniopStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
auto
_
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
return
status
;
return
status
;
}
}
auto
c
Desc
=
new
aclnnTensorDescriptor
(),
auto
c
=
new
aclnnTensorDescriptor
(),
a
Desc
=
new
aclnnTensorDescriptor
(),
a
=
new
aclnnTensorDescriptor
(),
b
Desc
=
new
aclnnTensorDescriptor
();
b
=
new
aclnnTensorDescriptor
();
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// operation
// operation
CHECK_STATUS
(
c
Desc
->
setDescriptor
(
CHECK_STATUS
(
c
->
setDescriptor
(
toAclDataType
(
c_desc
->
dtype
),
toAclDataType
(
c_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
.
c_matrix
.
rows
),
{
static_cast
<
int64_t
>
(
_
info
.
c_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
.
c_matrix
.
cols
)},
static_cast
<
int64_t
>
(
_
info
.
c_matrix
.
cols
)},
{
info
.
c_matrix
.
row_stride
,
info
.
c_matrix
.
col_stride
}),
{
_
info
.
c_matrix
.
row_stride
,
_
info
.
c_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
a
Desc
->
setDescriptor
(
CHECK_STATUS
(
a
->
setDescriptor
(
toAclDataType
(
a_desc
->
dtype
),
toAclDataType
(
a_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
.
a_matrix
.
rows
),
{
static_cast
<
int64_t
>
(
_
info
.
a_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
.
a_matrix
.
cols
)},
static_cast
<
int64_t
>
(
_
info
.
a_matrix
.
cols
)},
{
info
.
a_matrix
.
row_stride
,
info
.
a_matrix
.
col_stride
}),
{
_
info
.
a_matrix
.
row_stride
,
_
info
.
a_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
b
Desc
->
setDescriptor
(
CHECK_STATUS
(
b
->
setDescriptor
(
toAclDataType
(
b_desc
->
dtype
),
toAclDataType
(
b_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
.
b_matrix
.
rows
),
{
static_cast
<
int64_t
>
(
_
info
.
b_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
.
b_matrix
.
cols
)},
static_cast
<
int64_t
>
(
_
info
.
b_matrix
.
cols
)},
{
info
.
b_matrix
.
row_stride
,
info
.
b_matrix
.
col_stride
}),
{
_
info
.
b_matrix
.
row_stride
,
_
info
.
b_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
c
Desc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
c
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
a
Desc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
a
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
b
Desc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
b
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
auto
tc
=
cDes
c
->
t
,
auto
tc
=
c
->
t
,
ta
=
a
Desc
->
t
,
ta
=
a
->
t
,
tb
=
b
Desc
->
t
;
tb
=
b
->
t
;
aclOpExecutor
*
executor
;
aclOpExecutor
*
executor
;
size_t
workspace
S
ize
;
size_t
workspace
_s
ize
;
// aclnnGemm support C = alpha * A @ B + beta * C
// aclnnGemm support C = alpha * A @ B + beta * C
// see
// see
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
// use alpha = 0.5, beta = 0.5 temporarily
// use alpha = 0.5, beta = 0.5 temporarily
int8_t
mt
=
1
;
int8_t
mt
=
1
;
auto
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
.5
,
.5
,
0
,
0
,
tc
,
mt
,
&
workspace
S
ize
,
&
executor
);
auto
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
.5
,
.5
,
0
,
0
,
tc
,
mt
,
&
workspace
_s
ize
,
&
executor
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
aclSetAclOpExecutorRepeatable
(
executor
);
aclSetAclOpExecutorRepeatable
(
executor
);
*
desc_ptr
=
new
Descriptor
(
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
workspace
S
ize
,
dtype
,
_
info
,
workspace
_s
ize
,
new
Opaque
{
new
Opaque
{
executor
,
executor
,
cDes
c
,
c
,
a
Desc
,
a
,
b
Desc
,
b
,
mt
,
mt
,
},
},
handle
->
device
,
handle
->
device_id
);
handle
->
device
,
handle
->
device_id
);
...
@@ -115,28 +117,29 @@ infiniopStatus_t Descriptor::calculate(
...
@@ -115,28 +117,29 @@ infiniopStatus_t Descriptor::calculate(
float
alpha
,
float
alpha
,
void
*
stream
)
const
{
void
*
stream
)
const
{
auto
tc
=
_opaque
->
c
Desc
->
t
,
auto
tc
=
_opaque
->
c
->
t
,
ta
=
_opaque
->
a
Desc
->
t
,
ta
=
_opaque
->
a
->
t
,
tb
=
_opaque
->
b
Desc
->
t
;
tb
=
_opaque
->
b
->
t
;
size_t
workspace
S
ize
;
size_t
workspace
_s
ize
;
auto
ret
=
aclnnGemmGetWorkspaceSize
(
auto
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
alpha
,
beta
,
0
,
0
,
tc
,
_opaque
->
mt
,
ta
,
tb
,
tc
,
alpha
,
beta
,
0
,
0
,
tc
,
_opaque
->
mt
,
&
workspace
S
ize
,
&
(
_opaque
->
executor
));
&
workspace
_s
ize
,
&
(
_opaque
->
executor
));
CHECK_RET
(
ret
==
ACL_SUCCESS
,
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
if
(
workspaceSize_
<
workspace
S
ize
)
{
if
(
workspaceSize_
<
workspace
_s
ize
)
{
return
INFINIOP_STATUS_INSUFFICIENT_WORKSPACE
;
return
INFINIOP_STATUS_INSUFFICIENT_WORKSPACE
;
}
}
aclSetAclOpExecutorRepeatable
(
_opaque
->
executor
);
aclSetAclOpExecutorRepeatable
(
_opaque
->
executor
);
for
(
size_t
i
=
0
;
i
<
info
.
batch
;
++
i
)
{
auto
unit
=
infiniSizeof
(
_dtype
);
AclSetTensorAddr
(
_opaque
->
executor
,
0
,
ta
,
((
char
*
)
a
)
+
i
*
info
.
a_matrix
.
stride
*
infiniSizeof
(
dtype
));
for
(
size_t
i
=
0
;
i
<
_info
.
batch
;
++
i
)
{
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tb
,
((
char
*
)
b
)
+
i
*
info
.
b_matrix
.
stride
*
infiniSizeof
(
dtype
));
AclSetTensorAddr
(
_opaque
->
executor
,
0
,
ta
,
((
char
*
)
a
)
+
i
*
_info
.
a_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
2
,
tc
,
((
char
*
)
c
)
+
i
*
info
.
c_matrix
.
stride
*
infiniSizeof
(
dtype
));
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tb
,
((
char
*
)
b
)
+
i
*
_info
.
b_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
tc
,
((
char
*
)
c
)
+
i
*
info
.
c_matrix
.
stride
*
infiniSizeof
(
dtype
));
AclSetTensorAddr
(
_opaque
->
executor
,
2
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
_opaque
->
executor
,
stream
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
ret
=
aclnnGemm
(
workspace
,
workspace_size
,
_opaque
->
executor
,
stream
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
...
...
src/infiniop/ops/matmul/ascend/matmul_ascend.h
View file @
52536c5a
#ifndef __MATMUL_ASCEND_H__
#ifndef __MATMUL_ASCEND_H__
#define __MATMUL_ASCEND_H__
#define __MATMUL_ASCEND_H__
#include "../../../devices/ascend/ascend_handle.h"
#include "../matmul.h"
#include "../matmul.h"
DESCRIPTOR
(
ascend
,
infiniopAscendHandle_t
)
DESCRIPTOR
(
ascend
)
#endif // __MATMUL_ASCEND_H__
#endif // __MATMUL_ASCEND_H__
src/infiniop/ops/matmul/bang/matmul_bang.cc
View file @
52536c5a
#
include
"matmul_bang.h"
#
include
"matmul_bang.h"
#include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h"
#include "../../../devices/bang/common_bang.h"
#include "../../utils.h"
#include "../../utils.h"
#include <cnnl_extra.h>
#include <cnnl_extra.h>
...
@@ -6,17 +7,17 @@
...
@@ -6,17 +7,17 @@
namespace
matmul
::
bang
{
namespace
matmul
::
bang
{
struct
Descriptor
::
Opaque
{
struct
Descriptor
::
Opaque
{
cnnlMatMulDescriptor_t
op
Desc
;
cnnlMatMulDescriptor_t
op
;
cnnlMatMulAlgo_t
algo
;
cnnlMatMulAlgo_t
algo
;
cnnlMatMulHeuristicResult_t
algoResult
;
cnnlMatMulHeuristicResult_t
algoResult
;
cnnlTensorDescriptor_t
a
Desc
,
bDesc
,
cDes
c
;
cnnlTensorDescriptor_t
a
,
b
,
c
;
std
::
shared_ptr
<
Pool
<
cnnlHandle_t
>>
cnnl_handle_pool
;
std
::
shared_ptr
<
Pool
<
cnnlHandle_t
>>
cnnl_handle_pool
;
~
Opaque
()
{
~
Opaque
()
{
cnnlDestroyTensorDescriptor
(
a
Desc
);
cnnlDestroyTensorDescriptor
(
a
);
cnnlDestroyTensorDescriptor
(
b
Desc
);
cnnlDestroyTensorDescriptor
(
b
);
cnnlDestroyTensorDescriptor
(
c
Desc
);
cnnlDestroyTensorDescriptor
(
c
);
cnnlMatMulDescDestroy
(
op
Desc
);
cnnlMatMulDescDestroy
(
op
);
cnnlMatMulAlgoDestroy
(
algo
);
cnnlMatMulAlgoDestroy
(
algo
);
cnnlDestroyMatMulHeuristicResult
(
algoResult
);
cnnlDestroyMatMulHeuristicResult
(
algoResult
);
}
}
...
@@ -59,41 +60,42 @@ Descriptor::~Descriptor() {
...
@@ -59,41 +60,42 @@ Descriptor::~Descriptor() {
}
}
infiniopStatus_t
Descriptor
::
create
(
infiniopStatus_t
Descriptor
::
create
(
infiniop
Bang
Handle_t
handle
,
infiniopHandle_t
handle
_
,
Descriptor
**
desc_ptr
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
infiniopTensorDescriptor_t
b_desc
)
{
infiniDtype_t
dtype
=
c_desc
->
dtype
;
auto
handle
=
reinterpret_cast
<
infiniopBangHandle_t
>
(
handle_
);
auto
dtype
=
c_desc
->
dtype
;
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
}
}
infiniopStatus_t
status
;
infiniopStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
auto
_
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
return
status
;
return
status
;
}
}
cnnlTensorDescriptor_t
a
Desc
,
bDesc
,
cDes
c
;
cnnlTensorDescriptor_t
a
,
b
,
c
;
cnnlCreateTensorDescriptor
(
&
a
Desc
);
cnnlCreateTensorDescriptor
(
&
a
);
cnnlCreateTensorDescriptor
(
&
b
Desc
);
cnnlCreateTensorDescriptor
(
&
b
);
cnnlCreateTensorDescriptor
(
&
c
Desc
);
cnnlCreateTensorDescriptor
(
&
c
);
setMatrixTensorEx
(
a
Desc
,
info
.
a_matrix
,
a_desc
->
dtype
);
setMatrixTensorEx
(
a
,
_
info
.
a_matrix
,
a_desc
->
dtype
);
setMatrixTensorEx
(
b
Desc
,
info
.
b_matrix
,
b_desc
->
dtype
);
setMatrixTensorEx
(
b
,
_
info
.
b_matrix
,
b_desc
->
dtype
);
setMatrixTensorEx
(
c
Desc
,
info
.
c_matrix
,
c_desc
->
dtype
);
setMatrixTensorEx
(
c
,
_
info
.
c_matrix
,
c_desc
->
dtype
);
cnnlMatMulDescriptor_t
op
Desc
;
cnnlMatMulDescriptor_t
op
;
cnnlMatMulAlgo_t
algo
;
cnnlMatMulAlgo_t
algo
;
cnnlMatMulHeuristicResult_t
algoResult
;
cnnlMatMulHeuristicResult_t
algoResult
;
cnnlMatMulDescCreate
(
&
op
Desc
);
cnnlMatMulDescCreate
(
&
op
);
cnnlMatMulAlgoCreate
(
&
algo
);
cnnlMatMulAlgoCreate
(
&
algo
);
cnnlCreateMatMulHeuristicResult
(
&
algoResult
);
cnnlCreateMatMulHeuristicResult
(
&
algoResult
);
int32_t
use_stride
=
true
;
int32_t
use_stride
=
true
;
cnnlSetMatMulDescAttr
(
cnnlSetMatMulDescAttr
(
op
Desc
,
op
,
CNNL_MATMUL_USE_STRIDE
,
CNNL_MATMUL_USE_STRIDE
,
&
use_stride
,
&
use_stride
,
sizeof
(
int32_t
));
sizeof
(
int32_t
));
...
@@ -102,7 +104,7 @@ infiniopStatus_t Descriptor::create(
...
@@ -102,7 +104,7 @@ infiniopStatus_t Descriptor::create(
[
&
](
cnnlHandle_t
_handle
)
{
[
&
](
cnnlHandle_t
_handle
)
{
cnnlGetBatchMatMulAlgoHeuristic
(
cnnlGetBatchMatMulAlgoHeuristic
(
_handle
,
_handle
,
op
Desc
,
aDesc
,
bDesc
,
cDes
c
,
op
,
a
,
b
,
c
,
NULL
,
1
,
&
algoResult
,
&
count
);
NULL
,
1
,
&
algoResult
,
&
count
);
});
});
...
@@ -110,14 +112,14 @@ infiniopStatus_t Descriptor::create(
...
@@ -110,14 +112,14 @@ infiniopStatus_t Descriptor::create(
cnnlGetBatchMatMulHeuristicResult
(
algoResult
,
algo
,
&
workspace_size
);
cnnlGetBatchMatMulHeuristicResult
(
algoResult
,
algo
,
&
workspace_size
);
*
desc_ptr
=
new
Descriptor
(
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
workspace_size
,
dtype
,
_
info
,
workspace_size
,
new
Opaque
{
new
Opaque
{
op
Desc
,
op
,
algo
,
algo
,
algoResult
,
algoResult
,
a
Desc
,
a
,
b
Desc
,
b
,
cDes
c
,
c
,
handle
->
cnnl_handle_pool
},
handle
->
cnnl_handle_pool
},
handle
->
device
,
handle
->
device_id
);
handle
->
device
,
handle
->
device_id
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
...
@@ -133,7 +135,7 @@ infiniopStatus_t Descriptor::calculate(
...
@@ -133,7 +135,7 @@ infiniopStatus_t Descriptor::calculate(
float
alpha
,
float
alpha
,
void
*
stream
)
const
{
void
*
stream
)
const
{
if
(
info
.
is_transed
)
{
if
(
_
info
.
is_transed
)
{
std
::
swap
(
a
,
b
);
std
::
swap
(
a
,
b
);
}
}
use_cnnl
(
_opaque
->
cnnl_handle_pool
,
use_cnnl
(
_opaque
->
cnnl_handle_pool
,
...
@@ -141,13 +143,13 @@ infiniopStatus_t Descriptor::calculate(
...
@@ -141,13 +143,13 @@ infiniopStatus_t Descriptor::calculate(
[
&
](
cnnlHandle_t
handle
)
{
[
&
](
cnnlHandle_t
handle
)
{
cnnlBatchMatMulBCast_v2
(
cnnlBatchMatMulBCast_v2
(
handle
,
handle
,
_opaque
->
op
Desc
,
_opaque
->
op
,
_opaque
->
algo
,
_opaque
->
algo
,
&
alpha
,
&
alpha
,
_opaque
->
a
Desc
,
a
,
_opaque
->
a
,
a
,
_opaque
->
b
Desc
,
b
,
_opaque
->
b
,
b
,
&
beta
,
&
beta
,
_opaque
->
c
Desc
,
c
,
_opaque
->
c
,
c
,
workspace
,
workspace
,
workspace_size
);
workspace_size
);
});
});
...
...
src/infiniop/ops/matmul/bang/matmul_bang.h
View file @
52536c5a
#ifndef __MATMUL_BANG_H__
#ifndef __MATMUL_BANG_H__
#define __MATMUL_BANG_H__
#define __MATMUL_BANG_H__
#include "../../../devices/bang/bang_handle.h"
#include "../matmul.h"
#include "../matmul.h"
DESCRIPTOR
(
bang
,
infiniopBangHandle_t
)
DESCRIPTOR
(
bang
)
#endif // __MATMUL_BANG_H__
#endif // __MATMUL_BANG_H__
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
View file @
52536c5a
#include "./matmul_cpu.h"
#include "./matmul_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include "../../../devices/cpu/cpu_handle.h"
#include <iostream>
#include <iostream>
namespace
matmul
::
cpu
{
namespace
matmul
::
cpu
{
...
@@ -7,25 +8,26 @@ namespace matmul::cpu {
...
@@ -7,25 +8,26 @@ namespace matmul::cpu {
Descriptor
::~
Descriptor
()
=
default
;
Descriptor
::~
Descriptor
()
=
default
;
infiniopStatus_t
Descriptor
::
create
(
infiniopStatus_t
Descriptor
::
create
(
infiniop
Cpu
Handle_t
handle
,
infiniopHandle_t
handle
_
,
Descriptor
**
desc_ptr
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
infiniopTensorDescriptor_t
b_desc
)
{
infiniDtype_t
dtype
=
c_desc
->
dtype
;
auto
handle
=
reinterpret_cast
<
infiniopCpuHandle_t
>
(
handle_
);
auto
dtype
=
c_desc
->
dtype
;
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
}
}
infiniopStatus_t
status
;
infiniopStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
auto
_
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
return
status
;
return
status
;
}
}
*
desc_ptr
=
new
Descriptor
(
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
_
info
,
0
,
nullptr
,
nullptr
,
handle
->
device
,
handle
->
device_id
);
handle
->
device
,
handle
->
device_id
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
...
@@ -33,26 +35,24 @@ infiniopStatus_t Descriptor::create(
...
@@ -33,26 +35,24 @@ infiniopStatus_t Descriptor::create(
template
<
typename
Tdata
>
template
<
typename
Tdata
>
void
calculate
(
void
calculate
(
Descriptor
const
*
desc
,
MatmulInfo
const
&
_info
,
void
*
c
,
void
*
c
,
float
beta
,
float
beta
,
void
const
*
a
,
void
const
*
a
,
void
const
*
b
,
void
const
*
b
,
float
alpha
)
{
float
alpha
)
{
auto
info
=
desc
->
info
;
if
(
_info
.
is_transed
)
{
if
(
info
.
is_transed
)
{
std
::
swap
(
a
,
b
);
std
::
swap
(
a
,
b
);
}
}
for
(
size_t
i
=
0
;
i
<
info
.
batch
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_
info
.
batch
;
++
i
)
{
for
(
size_t
m_
=
0
;
m_
<
info
.
m
;
++
m_
)
{
for
(
size_t
m_
=
0
;
m_
<
_
info
.
m
;
++
m_
)
{
for
(
size_t
n_
=
0
;
n_
<
info
.
n
;
++
n_
)
{
for
(
size_t
n_
=
0
;
n_
<
_
info
.
n
;
++
n_
)
{
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
info
.
c_matrix
.
stride
+
m_
*
info
.
c_matrix
.
row_stride
+
n_
*
info
.
c_matrix
.
col_stride
;
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
_
info
.
c_matrix
.
stride
+
m_
*
_
info
.
c_matrix
.
row_stride
+
n_
*
_
info
.
c_matrix
.
col_stride
;
float
sum
=
0
;
float
sum
=
0
;
for
(
size_t
k_
=
0
;
k_
<
info
.
k
;
++
k_
)
{
for
(
size_t
k_
=
0
;
k_
<
_
info
.
k
;
++
k_
)
{
auto
a_
=
reinterpret_cast
<
Tdata
const
*>
(
a
)
+
i
*
info
.
a_matrix
.
stride
+
m_
*
info
.
a_matrix
.
row_stride
+
k_
*
info
.
a_matrix
.
col_stride
;
auto
a_
=
reinterpret_cast
<
Tdata
const
*>
(
a
)
+
i
*
_
info
.
a_matrix
.
stride
+
m_
*
_
info
.
a_matrix
.
row_stride
+
k_
*
_
info
.
a_matrix
.
col_stride
;
auto
b_
=
reinterpret_cast
<
Tdata
const
*>
(
b
)
+
i
*
info
.
b_matrix
.
stride
+
n_
*
info
.
b_matrix
.
col_stride
+
k_
*
info
.
b_matrix
.
row_stride
;
auto
b_
=
reinterpret_cast
<
Tdata
const
*>
(
b
)
+
i
*
_
info
.
b_matrix
.
stride
+
n_
*
_
info
.
b_matrix
.
col_stride
+
k_
*
_
info
.
b_matrix
.
row_stride
;
if
constexpr
(
std
::
is_same
<
Tdata
,
uint16_t
>::
value
)
{
if
constexpr
(
std
::
is_same
<
Tdata
,
uint16_t
>::
value
)
{
sum
+=
f16_to_f32
(
*
a_
)
*
f16_to_f32
(
*
b_
);
sum
+=
f16_to_f32
(
*
a_
)
*
f16_to_f32
(
*
b_
);
}
else
{
}
else
{
...
@@ -83,13 +83,13 @@ infiniopStatus_t Descriptor::calculate(
...
@@ -83,13 +83,13 @@ infiniopStatus_t Descriptor::calculate(
float
alpha
,
float
alpha
,
void
*
stream
)
const
{
void
*
stream
)
const
{
switch
(
dtype
)
{
switch
(
_
dtype
)
{
case
INFINI_DTYPE_F16
:
case
INFINI_DTYPE_F16
:
cpu
::
calculate
<
uint16_t
>
(
this
,
c
,
beta
,
a
,
b
,
alpha
);
cpu
::
calculate
<
uint16_t
>
(
_info
,
c
,
beta
,
a
,
b
,
alpha
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
case
INFINI_DTYPE_F32
:
case
INFINI_DTYPE_F32
:
cpu
::
calculate
<
float
>
(
this
,
c
,
beta
,
a
,
b
,
alpha
);
cpu
::
calculate
<
float
>
(
_info
,
c
,
beta
,
a
,
b
,
alpha
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
default:
default:
...
...
src/infiniop/ops/matmul/cpu/matmul_cpu.h
View file @
52536c5a
#ifndef __MATMUL_CPU_H__
#ifndef __MATMUL_CPU_H__
#define __MATMUL_CPU_H__
#define __MATMUL_CPU_H__
#include "../../../devices/cpu/cpu_handle.h"
#include "../matmul.h"
#include "../matmul.h"
DESCRIPTOR
(
cpu
,
infiniopCpuHandle_t
)
DESCRIPTOR
(
cpu
)
#endif // __MATMUL_CPU_H__
#endif // __MATMUL_CPU_H__
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
View file @
52536c5a
#include "../../../devices/cuda/common_cuda.cuh"
#include "../../utils.h"
#include "../../utils.h"
#include "matmul_cuda.cuh"
#include "matmul_cuda.cuh"
...
@@ -12,25 +13,26 @@ Descriptor::~Descriptor() {
...
@@ -12,25 +13,26 @@ Descriptor::~Descriptor() {
}
}
infiniopStatus_t
Descriptor
::
create
(
infiniopStatus_t
Descriptor
::
create
(
infiniop
Cuda
Handle_t
handle
,
infiniopHandle_t
handle
_
,
Descriptor
**
desc_ptr
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
infiniopTensorDescriptor_t
b_desc
)
{
infiniDtype_t
dtype
=
c_desc
->
dtype
;
auto
handle
=
reinterpret_cast
<
infiniopCudaHandle_t
>
(
handle_
);
auto
dtype
=
c_desc
->
dtype
;
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
if
(
dtype
!=
INFINI_DTYPE_F16
&&
dtype
!=
INFINI_DTYPE_F32
)
{
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
return
INFINIOP_STATUS_BAD_TENSOR_DTYPE
;
}
}
infiniopStatus_t
status
;
infiniopStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
auto
_
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
return
status
;
return
status
;
}
}
*
desc_ptr
=
new
Descriptor
(
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
_
info
,
0
,
new
Opaque
{
handle
->
cublas_handle_pool
},
new
Opaque
{
handle
->
cublas_handle_pool
},
handle
->
device
,
handle
->
device_id
);
handle
->
device
,
handle
->
device_id
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
...
@@ -38,7 +40,7 @@ infiniopStatus_t Descriptor::create(
...
@@ -38,7 +40,7 @@ infiniopStatus_t Descriptor::create(
template
<
typename
Tdata
>
template
<
typename
Tdata
>
infiniopStatus_t
calculate
(
infiniopStatus_t
calculate
(
MatmulInfo
const
&
info
,
MatmulInfo
const
&
_
info
,
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
&
cublas_handle_pool
,
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
&
cublas_handle_pool
,
void
*
c
,
void
*
c
,
float
beta
,
float
beta
,
...
@@ -47,7 +49,7 @@ infiniopStatus_t calculate(
...
@@ -47,7 +49,7 @@ infiniopStatus_t calculate(
float
alpha
,
float
alpha
,
cudaStream_t
stream
)
{
cudaStream_t
stream
)
{
if
(
info
.
is_transed
)
{
if
(
_
info
.
is_transed
)
{
std
::
swap
(
a
,
b
);
std
::
swap
(
a
,
b
);
}
}
...
@@ -65,8 +67,8 @@ infiniopStatus_t calculate(
...
@@ -65,8 +67,8 @@ infiniopStatus_t calculate(
#endif
#endif
}
}
auto
op_a
=
info
.
a_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_a
=
_
info
.
a_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_b
=
info
.
b_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_b
=
_
info
.
b_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
use_cublas
(
cublas_handle_pool
,
use_cublas
(
cublas_handle_pool
,
stream
,
stream
,
...
@@ -75,24 +77,24 @@ infiniopStatus_t calculate(
...
@@ -75,24 +77,24 @@ infiniopStatus_t calculate(
handle
,
handle
,
op_a
,
op_a
,
op_b
,
op_b
,
static_cast
<
int
>
(
info
.
m
),
static_cast
<
int
>
(
_
info
.
m
),
static_cast
<
int
>
(
info
.
n
),
static_cast
<
int
>
(
_
info
.
n
),
static_cast
<
int
>
(
info
.
k
),
static_cast
<
int
>
(
_
info
.
k
),
&
alpha
,
&
alpha
,
a
,
a
,
a_type
,
a_type
,
static_cast
<
int
>
(
info
.
a_matrix
.
ld
()),
static_cast
<
int
>
(
_
info
.
a_matrix
.
ld
()),
info
.
a_matrix
.
stride
,
_
info
.
a_matrix
.
stride
,
b
,
b
,
b_type
,
b_type
,
static_cast
<
int
>
(
info
.
b_matrix
.
ld
()),
static_cast
<
int
>
(
_
info
.
b_matrix
.
ld
()),
info
.
b_matrix
.
stride
,
_
info
.
b_matrix
.
stride
,
&
beta
,
&
beta
,
c
,
c
,
c_type
,
c_type
,
static_cast
<
int
>
(
info
.
c_matrix
.
ld
()),
static_cast
<
int
>
(
_
info
.
c_matrix
.
ld
()),
info
.
c_matrix
.
stride
,
_
info
.
c_matrix
.
stride
,
static_cast
<
int
>
(
info
.
batch
),
static_cast
<
int
>
(
_
info
.
batch
),
compute_type
,
compute_type
,
CUBLAS_GEMM_DEFAULT_TENSOR_OP
);
CUBLAS_GEMM_DEFAULT_TENSOR_OP
);
});
});
...
@@ -109,13 +111,13 @@ infiniopStatus_t Descriptor::calculate(
...
@@ -109,13 +111,13 @@ infiniopStatus_t Descriptor::calculate(
float
alpha
,
float
alpha
,
void
*
stream
)
const
{
void
*
stream
)
const
{
switch
(
dtype
)
{
switch
(
_
dtype
)
{
case
INFINI_DTYPE_F16
:
case
INFINI_DTYPE_F16
:
cuda
::
calculate
<
uint16_t
>
(
info
,
_opaque
->
cublas_handle_pool
,
c
,
beta
,
a
,
b
,
alpha
,
(
cudaStream_t
)
stream
);
cuda
::
calculate
<
uint16_t
>
(
_
info
,
_opaque
->
cublas_handle_pool
,
c
,
beta
,
a
,
b
,
alpha
,
(
cudaStream_t
)
stream
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
case
INFINI_DTYPE_F32
:
case
INFINI_DTYPE_F32
:
cuda
::
calculate
<
float
>
(
info
,
_opaque
->
cublas_handle_pool
,
c
,
beta
,
a
,
b
,
alpha
,
(
cudaStream_t
)
stream
);
cuda
::
calculate
<
float
>
(
_
info
,
_opaque
->
cublas_handle_pool
,
c
,
beta
,
a
,
b
,
alpha
,
(
cudaStream_t
)
stream
);
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
default:
default:
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
View file @
52536c5a
#ifndef __MATMUL_CUDA_CUH__
#ifndef __MATMUL_CUDA_CUH__
#define __MATMUL_CUDA_CUH__
#define __MATMUL_CUDA_CUH__
#include "../../../devices/cuda/cuda_handle.h"
#include "../matmul.h"
#include "../matmul.h"
DESCRIPTOR
(
cuda
,
infiniopCudaHandle_t
)
DESCRIPTOR
(
cuda
)
#endif // __MATMUL_CUDA_CUH__
#endif // __MATMUL_CUDA_CUH__
src/infiniop/ops/matmul/matmul.h
View file @
52536c5a
...
@@ -2,37 +2,38 @@
...
@@ -2,37 +2,38 @@
#define __MATMUL_H__
#define __MATMUL_H__
#include "blas.h"
#include "blas.h"
#include "infiniop/handle.h"
#include "infiniop/operator.h"
#include "infiniop/operator.h"
#define DESCRIPTOR(NAMESPACE
, HANDLE)
\
#define DESCRIPTOR(NAMESPACE
)
\
\
\
namespace matmul::NAMESPACE { \
namespace matmul::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
struct Opaque; \
Opaque *_opaque; \
Opaque *_opaque; \
infiniDtype_t _dtype; \
MatmulInfo _info; \
\
\
Descriptor( \
Descriptor( \
infiniDtype_t dtype
_
, \
infiniDtype_t dtype,
\
MatmulInfo info
_
, \
MatmulInfo info,
\
size_t workspace_size_, \
size_t workspace_size_, \
Opaque *opaque, \
Opaque *opaque, \
infiniDevice_t device_type, \
infiniDevice_t device_type, \
int device_id) \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_opaque(opaque), \
dtype(dtype
_
), \
_
dtype(dtype), \
info(info
_
), \
_
info(info), \
workspace_size(workspace_size_) {} \
workspace_size(workspace_size_) {} \
\
\
public: \
public: \
infiniDtype_t dtype; \
MatmulInfo info; \
size_t workspace_size; \
size_t workspace_size; \
\
\
~Descriptor(); \
~Descriptor(); \
\
\
static infiniopStatus_t create( \
static infiniopStatus_t create( \
HANDLE handle,
\
infiniopHandle_t handle,
\
Descriptor **desc_ptr, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t c_desc, \
infiniopTensorDescriptor_t c_desc, \
infiniopTensorDescriptor_t a_desc, \
infiniopTensorDescriptor_t a_desc, \
...
...
src/infiniop/ops/matmul/operator.cc
View file @
52536c5a
...
@@ -20,10 +20,10 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
...
@@ -20,10 +20,10 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
infiniopTensorDescriptor_t
b_desc
)
{
#define CREATE(CASE,
HANDLE,
NAMESPACE) \
#define CREATE(CASE, NAMESPACE)
\
case CASE: \
case CASE: \
return matmul::NAMESPACE::Descriptor::create( \
return matmul::NAMESPACE::Descriptor::create( \
reinterpret_cast<HANDLE>(handle),
\
handle,
\
reinterpret_cast<matmul::NAMESPACE::Descriptor **>(desc_ptr), \
reinterpret_cast<matmul::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc, \
c_desc, \
a_desc, \
a_desc, \
...
@@ -32,16 +32,16 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
...
@@ -32,16 +32,16 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
switch
(
handle
->
device
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
infiniopCpuHandle_t
,
cpu
);
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#endif
#ifdef ENABLE_CUDA_API
#ifdef ENABLE_CUDA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
infiniopCudaHandle_t
,
cuda
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#endif
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
CREATE
(
INFINI_DEVICE_CAMBRICON
,
infiniopBangHandle_t
,
bang
);
CREATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
#endif
#endif
#ifdef ENABLE_ASCEND_API
#ifdef ENABLE_ASCEND_API
CREATE
(
INFINI_DEVICE_ASCEND
,
infiniopAscendHandle_t
,
ascend
);
CREATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#endif
default:
default:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment