Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
58c0de0c
"nndet/io/patching.py" did not exist on "a28153dd958a51464a61bbc5500a87665ce1850b"
Commit
58c0de0c
authored
Feb 12, 2025
by
Pan Zezhong
Browse files
feat: ascend matmul
parent
46da1a27
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
256 additions
and
190 deletions
+256
-190
src/infiniop/devices/ascend/CMakeLists.txt
src/infiniop/devices/ascend/CMakeLists.txt
+1
-1
src/infiniop/devices/ascend/ascend_handle.cc
src/infiniop/devices/ascend/ascend_handle.cc
+7
-7
src/infiniop/devices/ascend/ascend_handle.h
src/infiniop/devices/ascend/ascend_handle.h
+9
-17
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+18
-18
src/infiniop/devices/ascend/common_ascend.h
src/infiniop/devices/ascend/common_ascend.h
+11
-5
src/infiniop/devices/ascend/tensor_aclnn.cc
src/infiniop/devices/ascend/tensor_aclnn.cc
+9
-9
src/infiniop/devices/ascend/tensor_aclnn.h
src/infiniop/devices/ascend/tensor_aclnn.h
+1
-3
src/infiniop/devices/handle.cc
src/infiniop/devices/handle.cc
+4
-4
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
+73
-56
src/infiniop/ops/matmul/ascend/matmul_aclnn.h
src/infiniop/ops/matmul/ascend/matmul_aclnn.h
+5
-32
src/infiniop/ops/matmul/ascend/matmul_aclnn_api.h
src/infiniop/ops/matmul/ascend/matmul_aclnn_api.h
+30
-0
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+1
-1
src/infiniop/ops/matmul/operator.cc
src/infiniop/ops/matmul/operator.cc
+23
-28
test/infiniop/libinfiniop/liboperators.py
test/infiniop/libinfiniop/liboperators.py
+0
-1
test/infiniop/matmul.py
test/infiniop/matmul.py
+7
-7
xmake.lua
xmake.lua
+2
-1
xmake/ascend.lua
xmake/ascend.lua
+55
-0
No files found.
src/infiniop/devices/ascend/CMakeLists.txt
View file @
58c0de0c
...
...
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16.0)
# project information
project
(
Ascend_C
)
set
(
SOC_VERSION
"Ascend910B3"
CACHE STRING
"system on chip type"
)
set
(
ASCEND_CANN_PACKAGE_PATH
"/usr/local/Ascend/ascend-toolkit/latest"
CACHE PATH
"ASCEND CANN package installation directory"
)
set
(
ASCEND_CANN_PACKAGE_PATH
$ENV{ASCEND_HOME}
CACHE PATH
"ASCEND CANN package installation directory"
)
set
(
RUN_MODE
"npu"
CACHE STRING
"run mode: npu"
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
"Build type Release/Debug (default Debug)"
FORCE
)
set
(
CMAKE_INSTALL_PREFIX
"
${
CMAKE_CURRENT_LIST_DIR
}
/out"
CACHE STRING
"path for install()"
FORCE
)
...
...
src/infiniop/devices/ascend/ascend_handle.cc
View file @
58c0de0c
#include "
ascend_handle
.h"
#include "
common_ascend
.h"
infiniopStatus_t
createAscendHandle
(
AscendHandle_t
*
handle_ptr
,
int
device_id
)
{
infiniopStatus_t
createAscendHandle
(
infiniop
AscendHandle_t
*
handle_ptr
,
int
device_id
)
{
uint32_t
device_count
;
aclrtGetDeviceCount
(
&
device_count
);
if
(
device_id
>=
static_cast
<
int
>
(
device_count
))
{
return
STATUS_BAD_DEVICE
;
return
INFINIOP_
STATUS_BAD_DEVICE
;
}
auto
ret
=
aclrtSetDevice
(
device_id
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtSetDevice failed. ERROR: %d
\n
"
,
ret
));
*
handle_ptr
=
new
AscendContext
{
DevAscendNpu
,
device_id
};
*
handle_ptr
=
new
InfiniopAscendHandle
{
INFINI_DEVICE_ASCEND
,
device_id
};
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
deleteAscendHandle
(
AscendHandle_t
handle_ptr
)
{
infiniopStatus_t
deleteAscendHandle
(
infiniop
AscendHandle_t
handle_ptr
)
{
delete
handle_ptr
;
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
src/infiniop/devices/ascend/ascend_handle.h
View file @
58c0de0c
#ifndef ASCEND_HANDLE_H
#define ASCEND_HANDLE_H
#ifndef
__INFINIOP_
ASCEND_HANDLE_H
__
#define
__INFINIOP_
ASCEND_HANDLE_H
__
#include "common_ascend.h"
#include "device.h"
#include "status.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <acl/acl_rt.h>
#include <aclnn/acl_meta.h>
#include <memory>
struct
AscendContext
{
Device
device
;
int
device_id
;
};
typedef
struct
AscendContext
*
AscendHandle_t
;
#include "infinicore.h"
#include "infiniop/handle.h"
infiniopStatus_t
createAscendHandle
(
AscendHandle_t
*
handle_ptr
,
int
device_id
);
struct
InfiniopAscendHandle
;
typedef
struct
InfiniopAscendHandle
*
infiniopAscendHandle_t
;
infiniopStatus_t
deleteAscendHandle
(
AscendHandle_t
handle_ptr
);
infiniopStatus_t
createAscendHandle
(
infiniopAscendHandle_t
*
handle_ptr
,
int
device_id
);
infiniopStatus_t
deleteAscendHandle
(
infiniopAscendHandle_t
handle_ptr
);
#endif
src/infiniop/devices/ascend/common_ascend.cc
View file @
58c0de0c
...
...
@@ -8,16 +8,16 @@ int64_t numElements(const int64_t *shape, int64_t num) {
return
numEle
;
}
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
uint64
_t
workspaceSize
)
{
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
size
_t
workspaceSize
)
{
*
workspaceAddr
=
nullptr
;
if
(
workspaceSize
>
0
)
{
auto
ret
=
aclrtMalloc
(
workspaceAddr
,
workspaceSize
,
ACL_MEM_MALLOC_HUGE_FIRST
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtMalloc failed. ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
}
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
freeWorkspace
(
void
*
workspaceAddr
)
{
...
...
@@ -25,35 +25,35 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
auto
ret
=
aclrtFree
(
workspaceAddr
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtFree failed, ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
}
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
aclDataType
toAclDataType
(
DT
dt
)
{
if
(
dt
==
I8
)
aclDataType
toAclDataType
(
infiniDtype_t
dt
)
{
if
(
dt
==
INFINI_DTYPE_
I8
)
return
aclDataType
::
ACL_INT8
;
else
if
(
dt
==
I16
)
else
if
(
dt
==
INFINI_DTYPE_
I16
)
return
aclDataType
::
ACL_INT16
;
else
if
(
dt
==
I32
)
else
if
(
dt
==
INFINI_DTYPE_
I32
)
return
aclDataType
::
ACL_INT32
;
else
if
(
dt
==
I64
)
else
if
(
dt
==
INFINI_DTYPE_
I64
)
return
aclDataType
::
ACL_INT64
;
else
if
(
dt
==
U8
)
else
if
(
dt
==
INFINI_DTYPE_
U8
)
return
aclDataType
::
ACL_UINT8
;
else
if
(
dt
==
U16
)
else
if
(
dt
==
INFINI_DTYPE_
U16
)
return
aclDataType
::
ACL_UINT16
;
else
if
(
dt
==
U32
)
else
if
(
dt
==
INFINI_DTYPE_
U32
)
return
aclDataType
::
ACL_UINT32
;
else
if
(
dt
==
U64
)
else
if
(
dt
==
INFINI_DTYPE_
U64
)
return
aclDataType
::
ACL_UINT64
;
else
if
(
dt
==
F16
)
else
if
(
dt
==
INFINI_DTYPE_
F16
)
return
aclDataType
::
ACL_FLOAT16
;
else
if
(
dt
==
BF16
)
else
if
(
dt
==
INFINI_DTYPE_
BF16
)
return
aclDataType
::
ACL_BF16
;
else
if
(
dt
==
F32
)
else
if
(
dt
==
INFINI_DTYPE_
F32
)
return
aclDataType
::
ACL_FLOAT
;
else
if
(
dt
==
F64
)
else
if
(
dt
==
INFINI_DTYPE_
F64
)
return
aclDataType
::
ACL_DOUBLE
;
else
return
aclDataType
::
ACL_DT_UNDEFINED
;
...
...
src/infiniop/devices/ascend/common_ascend.h
View file @
58c0de0c
#ifndef __COMMON_ASCEND_H__
#define __COMMON_ASCEND_H__
#ifndef __
INFINIOP_
COMMON_ASCEND_H__
#define __
INFINIOP_
COMMON_ASCEND_H__
#include "
operators
.h"
#include "
ascend_handle
.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <acl/acl_rt.h>
#include <aclnn/acl_meta.h>
#include <cstdio>
#include <functional>
#include <inttypes.h>
...
...
@@ -31,11 +32,16 @@ extern "C" {
};
#endif
struct
InfiniopAscendHandle
{
infiniDevice_t
device
;
int
device_id
;
};
int64_t
numElements
(
const
int64_t
*
shape
,
int64_t
num
);
const
char
*
dataTypeToString
(
aclDataType
dtype
);
const
char
*
formatToString
(
aclFormat
format
);
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
uint64
_t
workspaceSize
);
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
size
_t
workspaceSize
);
infiniopStatus_t
freeWorkspace
(
void
*
workspaceAddr
);
aclDataType
toAclDataType
(
DT
dt
);
aclDataType
toAclDataType
(
infiniDtype_t
dt
);
#endif
src/infiniop/devices/ascend/tensor_aclnn.cc
View file @
58c0de0c
...
...
@@ -4,7 +4,7 @@
infiniopStatus_t
aclnnTensorDescriptor
::
setDescriptor
(
aclDataType
dtype
,
const
std
::
vector
<
int64_t
>
&
shape
,
const
std
::
vector
<
int64_t
>
&
strides
)
{
if
(
shape
.
size
()
!=
strides
.
size
())
{
return
STATUS_BAD_
PARAM
;
return
INFINIOP_
STATUS_BAD_
TENSOR_STRIDES
;
}
this
->
ndim
=
shape
.
size
();
this
->
shape
=
std
::
vector
<
int64_t
>
(
shape
);
...
...
@@ -16,9 +16,9 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
aclFormat
format
=
aclFormat
::
ACL_FORMAT_ND
;
this
->
format
=
format
;
CHECK_STATUS
(
this
->
inferStorageShape
(),
STATUS_SUCCESS
);
CHECK_STATUS
(
this
->
inferStorageShape
(),
INFINIOP_
STATUS_SUCCESS
);
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
...
...
@@ -30,7 +30,7 @@ infiniopStatus_t aclnnTensorDescriptor::inferStorageShape() {
this
->
storageNdim
=
1
;
this
->
storageShape
=
std
::
vector
<
int64_t
>
({
this
->
shape
[
max_stride_index
]
*
this
->
strides
[
max_stride_index
]});
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
/// @brief Set aclnnTensorDescriptor from infiniopTensorDescriptor
...
...
@@ -45,7 +45,7 @@ infiniopStatus_t aclnnTensorDescriptor::fromInfiniOpTensorDescriptor(infiniopTen
shape
[
i
]
=
static_cast
<
int64_t
>
(
y
->
shape
[
i
]);
strides
[
i
]
=
y
->
strides
[
i
];
}
return
setDescriptor
(
toAclDataType
(
y
->
dt
),
shape
,
strides
);
return
setDescriptor
(
toAclDataType
(
y
->
dt
ype
),
shape
,
strides
);
}
/// @brief Wrapper of aclCreateTensor. Create aclTensor.
...
...
@@ -56,7 +56,7 @@ infiniopStatus_t aclnnTensorDescriptor::fromInfiniOpTensorDescriptor(infiniopTen
/// @return
infiniopStatus_t
aclnnTensorDescriptor
::
createTensor
(
void
*
data
)
{
if
(
this
->
t
)
{
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
this
->
t
=
aclCreateTensor
(
this
->
shape
.
data
(),
this
->
ndim
,
...
...
@@ -67,17 +67,17 @@ infiniopStatus_t aclnnTensorDescriptor::createTensor(void *data) {
this
->
storageShape
.
data
(),
this
->
storageNdim
,
data
);
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
aclnnTensorDescriptor
::
destroyTensor
()
{
auto
ret
=
aclDestroyTensor
(
this
->
t
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclDesctroyTensor failed, ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
t
=
nullptr
;
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
aclnnTensorDescriptor
::~
aclnnTensorDescriptor
()
{
...
...
src/infiniop/devices/ascend/tensor_aclnn.h
View file @
58c0de0c
...
...
@@ -2,9 +2,7 @@
#define __ACLNN_TENSOR__
#include "./common_ascend.h"
#include "operators.h"
#include "tensor.h"
#include "tensor/tensor_descriptor.h"
#include "infiniop/operator.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <aclnn/acl_meta.h>
...
...
src/infiniop/devices/handle.cc
View file @
58c0de0c
...
...
@@ -8,7 +8,7 @@
#ifdef ENABLE_CAMBRICON_MLU
#include "./bang/bang_handle.h"
#endif
#ifdef ENABLE_ASCEND_
NPU
#ifdef ENABLE_ASCEND_
API
#include "./ascend/ascend_handle.h"
#endif
...
...
@@ -37,7 +37,7 @@ __C infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDe
}
#endif
#ifdef ENABLE_ASCEND_API
case
DevAscendNpu
:
{
case
INFINI_DEVICE_ASCEND
:
{
return
createAscendHandle
((
infiniopAscendHandle_t
*
)
handle_ptr
,
device_id
);
}
#endif
...
...
@@ -64,8 +64,8 @@ __C infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
return
STATUS_SUCCESS
;
}
#endif
#ifdef ENABLE_ASCEND_
NPU
case
DevAscendNpu
:
{
#ifdef ENABLE_ASCEND_
API
case
INFINI_DEVICE_ASCEND
:
{
return
deleteAscendHandle
((
infiniopAscendHandle_t
)
handle
);
}
#endif
...
...
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
View file @
58c0de0c
#include "matmul_aclnn.h"
MatmulAclnnDescriptor
::
MatmulAclnnDescriptor
(
Device
_device
)
{
MatmulAclnnDescriptor
::
MatmulAclnnDescriptor
(
infini
Device
_t
_device
)
{
device
=
_device
;
device_id
=
0
;
executor
=
nullptr
;
...
...
@@ -8,35 +8,29 @@ MatmulAclnnDescriptor::MatmulAclnnDescriptor(Device _device) {
cDesc
=
new
aclnnTensorDescriptor
();
aDesc
=
new
aclnnTensorDescriptor
();
bDesc
=
new
aclnnTensorDescriptor
();
alpha
=
1.0
;
beta
=
0
;
mt
=
1
;
workspaceSize
=
0
;
}
infiniopStatus_t
aclnnCreateMatmulDescriptor
(
AscendHandle_t
handle
,
infiniopStatus_t
aclnnCreateMatmulDescriptor
(
infiniop
AscendHandle_t
handle
,
MatmulAclnnDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
float
alpha
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
,
float
beta
,
int8_t
mt
)
{
DT
dtype
=
c_desc
->
dt
;
if
(
dtype
!=
F16
&&
dtype
!=
F32
)
{
return
STATUS_BAD_TENSOR_DTYPE
;
infiniDtype_t
dtype
=
c_desc
->
dt
ype
;
if
(
dtype
!=
INFINI_DTYPE_
F16
&&
dtype
!=
INFINI_DTYPE_
F32
)
{
return
INFINIOP_
STATUS_BAD_TENSOR_DTYPE
;
}
*
desc_ptr
=
new
MatmulAclnnDescriptor
(
handle
->
device
);
(
*
desc_ptr
)
->
device_id
=
handle
->
device_id
;
(
*
desc_ptr
)
->
dtype
=
dtype
;
(
*
desc_ptr
)
->
mt
=
mt
;
(
*
desc_ptr
)
->
alpha
=
alpha
;
(
*
desc_ptr
)
->
beta
=
beta
;
infiniopStatus_t
*
status
=
new
infiniopStatus_t
{
STATUS_EXECUTION_FAILED
};
auto
info
=
new
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
status
,
false
);
if
(
*
status
!=
STATUS_SUCCESS
)
{
return
*
status
;
infiniopStatus_t
status
;
auto
info
=
new
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
false
);
if
(
status
!=
INFINIOP_STATUS_SUCCESS
)
{
return
status
;
}
(
*
desc_ptr
)
->
info
=
info
;
...
...
@@ -44,15 +38,30 @@ infiniopStatus_t aclnnCreateMatmulDescriptor(AscendHandle_t handle,
auto
&
aDesc
=
(
*
desc_ptr
)
->
aDesc
;
auto
&
bDesc
=
(
*
desc_ptr
)
->
bDesc
;
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched operation
CHECK_STATUS
(
cDesc
->
setDescriptor
(
toAclDataType
(
c_desc
->
dt
),
{
info
->
c_matrix
.
rows
,
info
->
c_matrix
.
cols
},
{
info
->
c_matrix
.
row_stride
,
info
->
c_matrix
.
col_stride
}),
STATUS_SUCCESS
);
CHECK_STATUS
(
aDesc
->
setDescriptor
(
toAclDataType
(
a_desc
->
dt
),
{
info
->
a_matrix
.
rows
,
info
->
a_matrix
.
cols
},
{
info
->
a_matrix
.
row_stride
,
info
->
a_matrix
.
col_stride
}),
STATUS_SUCCESS
);
CHECK_STATUS
(
bDesc
->
setDescriptor
(
toAclDataType
(
b_desc
->
dt
),
{
info
->
b_matrix
.
rows
,
info
->
b_matrix
.
cols
},
{
info
->
b_matrix
.
row_stride
,
info
->
b_matrix
.
col_stride
}),
STATUS_SUCCESS
);
CHECK_STATUS
(
cDesc
->
createTensor
(),
STATUS_SUCCESS
);
CHECK_STATUS
(
aDesc
->
createTensor
(),
STATUS_SUCCESS
);
CHECK_STATUS
(
bDesc
->
createTensor
(),
STATUS_SUCCESS
);
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// operation
CHECK_STATUS
(
cDesc
->
setDescriptor
(
toAclDataType
(
c_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
->
c_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
->
c_matrix
.
cols
)},
{
info
->
c_matrix
.
row_stride
,
info
->
c_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
aDesc
->
setDescriptor
(
toAclDataType
(
a_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
->
a_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
->
a_matrix
.
cols
)},
{
info
->
a_matrix
.
row_stride
,
info
->
a_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
bDesc
->
setDescriptor
(
toAclDataType
(
b_desc
->
dtype
),
{
static_cast
<
int64_t
>
(
info
->
b_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
->
b_matrix
.
cols
)},
{
info
->
b_matrix
.
row_stride
,
info
->
b_matrix
.
col_stride
}),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
cDesc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
aDesc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
CHECK_STATUS
(
bDesc
->
createTensor
(),
INFINIOP_STATUS_SUCCESS
);
auto
&
workspaceSize
=
(
*
desc_ptr
)
->
workspaceSize
;
auto
&
executor
=
(
*
desc_ptr
)
->
executor
;
...
...
@@ -63,33 +72,31 @@ infiniopStatus_t aclnnCreateMatmulDescriptor(AscendHandle_t handle,
aclnnStatus
ret
;
int64_t
transA
=
0
;
int64_t
transB
=
0
;
// aclnnGemm support C = alpha * A @ B + beta * C
// see https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
(
*
desc_ptr
)
->
alpha
,
(
*
desc_ptr
)
->
beta
,
transA
,
transB
,
tc
,
// see
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
// use alpha = 0.5, beta = 0.5 temporarily
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
0.5
f
,
0.5
f
,
transA
,
transB
,
tc
,
(
*
desc_ptr
)
->
mt
,
&
workspaceSize
,
&
executor
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
aclSetAclOpExecutorRepeatable
(
executor
);
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
aclnnGetMatmulWorkspaceSize
(
MatmulAclnnDescriptor_t
desc
,
uint64
_t
*
size
)
{
size
_t
*
size
)
{
*
size
=
desc
->
workspaceSize
;
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
aclnnMatmul
(
MatmulAclnnDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
infiniopStatus_t
aclnnMatmul
(
MatmulAclnnDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
,
void
*
stream
)
{
auto
&
cDesc
=
desc
->
cDesc
;
auto
&
aDesc
=
desc
->
aDesc
;
...
...
@@ -101,30 +108,40 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc,
auto
batch
=
desc
->
info
->
batch
;
auto
&
executor
=
desc
->
executor
;
auto
&
workspaceSize
=
desc
->
workspaceSize
;
// Set runing on handle device
aclrtSetDevice
(
desc
->
device_id
);
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
AclSetTensorAddr
(
executor
,
0
,
ta
,
(
char
*
)
(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
desc
->
dtype
.
size
);
AclSetTensorAddr
(
executor
,
1
,
tb
,
(
char
*
)
(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
desc
->
dtype
.
size
);
AclSetTensorAddr
(
executor
,
2
,
tc
,
(
char
*
)
(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
desc
->
dtype
.
size
);
AclSetTensorAddr
(
executor
,
3
,
tc
,
(
char
*
)
(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
desc
->
dtype
.
size
);
aclnnStatus
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
executor
,
stream
);
size_t
workspaceSize
;
aclnnStatus
ret
;
ret
=
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
alpha
,
beta
,
0
,
0
,
tc
,
desc
->
mt
,
&
workspaceSize
,
&
(
desc
->
executor
));
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemmGetWorkspaceSize failed. ERROR: %d
\n
"
,
ret
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
if
(
workspace_size
<
workspaceSize
)
{
return
INFINIOP_STATUS_INSUFFICIENT_WORKSPACE
;
}
aclSetAclOpExecutorRepeatable
(
desc
->
executor
);
for
(
size_t
i
=
0
;
i
<
batch
;
i
++
)
{
AclSetTensorAddr
(
desc
->
executor
,
0
,
ta
,
(
char
*
)(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
infini_sizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
1
,
tb
,
(
char
*
)(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
infini_sizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
2
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infini_sizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
3
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infini_sizeof
(
desc
->
dtype
));
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
desc
->
executor
,
stream
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
return
INFINIOP_STATUS_INTERNAL_ERROR
);
}
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
infiniopStatus_t
aclnnDestroyMatmulDescriptor
(
MatmulAclnnDescriptor_t
desc
)
{
delete
desc
->
cDesc
;
delete
desc
->
bDesc
;
...
...
@@ -133,5 +150,5 @@ infiniopStatus_t aclnnDestroyMatmulDescriptor(MatmulAclnnDescriptor_t desc) {
aclDestroyAclOpExecutor
(
desc
->
executor
);
delete
desc
;
return
STATUS_SUCCESS
;
return
INFINIOP_
STATUS_SUCCESS
;
}
src/infiniop/ops/matmul/ascend/matmul_aclnn.h
View file @
58c0de0c
#ifndef __ACLNN_MATMUL_H__
#define __ACLNN_MATMUL_H__
#include "../../../devices/ascend/ascend_handle.h"
#include "../../../devices/ascend/tensor_aclnn.h"
#include "../../utils.h"
#include "../blas.h"
#include "operators.h"
#include <acl/acl_base.h>
#include <aclnn/acl_meta.h>
#include <aclnnop/level2/aclnn_gemm.h>
#include <aclnnop/aclnn_matmul.h>
#include "matmul_aclnn_api.h"
struct
MatmulAclnnDescriptor
{
Device
device
;
infini
Device
_t
device
;
int
device_id
;
aclOpExecutor
*
executor
;
MatmulInfo
*
info
;
DT
dtype
;
infiniDtype_t
dtype
;
aclnnTensorDescriptor_t
cDesc
,
aDesc
,
bDesc
;
// cubeMathType
// see doc: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha002/apiref/appdevgapi/context/aclnnBatchMatMul.md
float
alpha
;
float
beta
;
int8_t
mt
;
uint64
_t
workspaceSize
;
size
_t
workspaceSize
;
MatmulAclnnDescriptor
(
Device
_device
);
MatmulAclnnDescriptor
(
infini
Device
_t
_device
);
};
typedef
struct
MatmulAclnnDescriptor
*
MatmulAclnnDescriptor_t
;
infiniopStatus_t
aclnnCreateMatmulDescriptor
(
AscendHandle_t
handle
,
MatmulAclnnDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
float
alpha
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
,
float
beta
,
int8_t
cubeMathType
);
infiniopStatus_t
aclnnGetMatmulWorkspaceSize
(
MatmulAclnnDescriptor_t
desc
,
uint64_t
*
size
);
infiniopStatus_t
aclnnMatmul
(
MatmulAclnnDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
c
,
const
void
*
a
,
const
void
*
b
,
void
*
stream
);
infiniopStatus_t
aclnnDestroyMatmulDescriptor
(
MatmulAclnnDescriptor_t
desc
);
#endif
src/infiniop/ops/matmul/ascend/matmul_aclnn_api.h
0 → 100644
View file @
58c0de0c
#ifndef __INFINIOP_MATMUL_ACLNN_API_H__
#define __INFINIOP_MATMUL_ACLNN_API_H__
#include "../../../devices/ascend/ascend_handle.h"
#include "infiniop/operator.h"
struct
MatmulAclnnDescriptor
;
typedef
struct
MatmulAclnnDescriptor
*
MatmulAclnnDescriptor_t
;
infiniopStatus_t
aclnnCreateMatmulDescriptor
(
infiniopAscendHandle_t
handle
,
MatmulAclnnDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
,
int8_t
cubeMathType
);
infiniopStatus_t
aclnnGetMatmulWorkspaceSize
(
MatmulAclnnDescriptor_t
desc
,
size_t
*
size
);
infiniopStatus_t
aclnnMatmul
(
MatmulAclnnDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
c
,
const
void
*
a
,
const
void
*
b
,
float
alpha
,
float
beta
,
void
*
stream
);
infiniopStatus_t
aclnnDestroyMatmulDescriptor
(
MatmulAclnnDescriptor_t
desc
);
#endif // __INFINIOP_MATMUL_ACLNN_API_H__
src/infiniop/ops/matmul/blas.h
View file @
58c0de0c
...
...
@@ -47,7 +47,7 @@ typedef struct BlasMatrix {
*
status
=
INFINIOP_STATUS_SUCCESS
;
}
bool
match_batch
(
in
t
batch
)
const
{
bool
match_batch
(
size_
t
batch
)
const
{
return
this
->
batch
==
batch
||
this
->
batch
==
1
;
}
...
...
src/infiniop/ops/matmul/operator.cc
View file @
58c0de0c
...
...
@@ -10,8 +10,8 @@
#ifdef ENABLE_CAMBRICON_MLU
#include "bang/matmul_cnnl.h"
#endif
#ifdef ENABLE_ASCEND_
NPU
#include "ascend/matmul_aclnn.h"
#ifdef ENABLE_ASCEND_
API
#include "ascend/matmul_aclnn
_api
.h"
#endif
__C
infiniopStatus_t
infiniopCreateMatmulDescriptor
(
infiniopHandle_t
handle
,
...
...
@@ -34,21 +34,19 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(infiniopHandle_t handle,
return
bangCreateMatmulDescriptor
((
BangHandle_t
)
handle
,
(
MatmulBangDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCreateMatmulDescriptor
((
AscendHandle_t
)
handle
,
(
MatmulAclnnDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
,
1
);
#ifdef ENABLE_ASCEND_API
case
INFINI_DEVICE_ASCEND
:
{
return
aclnnCreateMatmulDescriptor
(
(
infiniopAscendHandle_t
)
handle
,
(
MatmulAclnnDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
,
1
);
}
#endif
}
return
INFINIOP_STATUS_BAD_DEVICE
;
}
__C
infiniopStatus_t
infiniopGetMatmulWorkspaceSize
(
infiniopMatmulDescriptor_t
desc
,
uint64_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetMatmulWorkspaceSize
(
infiniopMatmulDescriptor_t
desc
,
size_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU_API
case
INFINI_DEVICE_CPU
:
...
...
@@ -65,8 +63,8 @@ __C infiniopStatus_t infiniopGetMatmulWorkspaceSize(infiniopMatmulDescriptor_t d
return
bangGetMatmulWorkspaceSize
((
MatmulBangDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_ASCEND_
NPU
case
DevAscendNpu
:
{
#ifdef ENABLE_ASCEND_
API
case
INFINI_DEVICE_ASCEND
:
{
return
aclnnGetMatmulWorkspaceSize
((
MatmulAclnnDescriptor_t
)
desc
,
size
);
}
...
...
@@ -75,7 +73,10 @@ __C infiniopStatus_t infiniopGetMatmulWorkspaceSize(infiniopMatmulDescriptor_t d
return
INFINIOP_STATUS_BAD_DEVICE
;
}
__C
infiniopStatus_t
infiniopMatmul
(
infiniopMatmulDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
,
void
*
stream
)
{
__C
infiniopStatus_t
infiniopMatmul
(
infiniopMatmulDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU_API
case
INFINI_DEVICE_CPU
:
...
...
@@ -87,20 +88,14 @@ __C infiniopStatus_t infiniopMatmul(infiniopMatmulDescriptor_t desc, void *works
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangMatmul
((
MatmulBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
c
,
alpha
,
a
,
b
,
beta
,
stream
);
return
bangMatmul
((
MatmulBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
c
,
a
,
b
,
alpha
,
beta
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
return
aclnnMatmul
((
MatmulAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
c
,
alpha
,
a
,
b
,
beta
,
stream
);
#ifdef ENABLE_ASCEND_API
case
INFINI_DEVICE_ASCEND
:
return
aclnnMatmul
((
MatmulAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
c
,
a
,
b
,
alpha
,
beta
,
stream
);
#endif
}
return
INFINIOP_STATUS_BAD_DEVICE
;
...
...
@@ -123,8 +118,8 @@ __C infiniopStatus_t infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t
return
bangDestroyMatmulDescriptor
((
MatmulBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_
NPU
case
DevAscendNpu
:
{
#ifdef ENABLE_ASCEND_
API
case
INFINI_DEVICE_ASCEND
:
{
return
aclnnDestroyMatmulDescriptor
((
MatmulAclnnDescriptor_t
)
desc
);
}
#endif
...
...
test/infiniop/libinfiniop/liboperators.py
View file @
58c0de0c
...
...
@@ -63,7 +63,6 @@ def open_lib():
assert
(
library_path
is
not
None
),
f
"Cannot find infiniop.dll or libinfiniop.so. Check if INFINI_ROOT is set correctly."
ctypes
.
CDLL
(
r
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\cudnn64_9.dll"
)
lib
=
ctypes
.
CDLL
(
library_path
)
lib
.
infiniopCreateTensorDescriptor
.
argtypes
=
[
POINTER
(
infiniopTensorDescriptor_t
),
...
...
test/infiniop/matmul.py
View file @
58c0de0c
...
...
@@ -194,7 +194,7 @@ def test_cpu(lib, test_cases):
destroy_handle
(
lib
,
handle
)
def
test_
cud
a
(
lib
,
test_cases
):
def
test_
nvidi
a
(
lib
,
test_cases
):
device
=
InfiniDeviceEnum
.
NVIDIA
handle
=
create_handle
(
lib
,
device
)
...
...
@@ -227,7 +227,7 @@ def test_cuda(lib, test_cases):
destroy_handle
(
lib
,
handle
)
def
test_
bang
(
lib
,
test_cases
):
def
test_
cambricon
(
lib
,
test_cases
):
import
torch_mlu
device
=
InfiniDeviceEnum
.
CAMBRICON
handle
=
create_handle
(
lib
,
device
)
...
...
@@ -348,12 +348,12 @@ if __name__ == "__main__":
PROFILE
=
True
if
args
.
cpu
:
test_cpu
(
lib
,
test_cases
)
if
args
.
cud
a
:
test_
cud
a
(
lib
,
test_cases
)
if
args
.
bang
:
test_
bang
(
lib
,
test_cases
)
if
args
.
nvidi
a
:
test_
nvidi
a
(
lib
,
test_cases
)
if
args
.
cambricon
:
test_
cambricon
(
lib
,
test_cases
)
if
args
.
ascend
:
test_ascend
(
lib
,
test_cases
)
if
not
(
args
.
cpu
or
args
.
cud
a
or
args
.
bang
or
args
.
ascend
):
if
not
(
args
.
cpu
or
args
.
nvidi
a
or
args
.
cambricon
or
args
.
ascend
):
test_cpu
(
lib
,
test_cases
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
xmake.lua
View file @
58c0de0c
...
...
@@ -61,6 +61,7 @@ option_end()
if
has_config
(
"ascend-npu"
)
then
add_defines
(
"ENABLE_ASCEND_API"
)
includes
(
"xmake/ascend.lua"
)
end
-- 沐曦
...
...
@@ -126,7 +127,7 @@ target("infiniop")
add_deps
(
"cambricon-mlu"
)
end
if
has_config
(
"ascend-npu"
)
then
add_deps
(
"ascend
-npu
"
)
add_deps
(
"
infiniop-
ascend"
)
end
if
has_config
(
"metax-gpu"
)
then
add_deps
(
"metax-gpu"
)
...
...
xmake/ascend.lua
0 → 100644
View file @
58c0de0c
add_defines
(
"ENABLE_ASCEND_API"
)
local
ASCEND_HOME
=
os.getenv
(
"ASCEND_HOME"
)
local
SOC_VERSION
=
os.getenv
(
"SOC_VERSION"
)
-- Add include dirs
add_includedirs
(
ASCEND_HOME
..
"/include"
)
add_includedirs
(
ASCEND_HOME
..
"/include/aclnn"
)
add_linkdirs
(
ASCEND_HOME
..
"/lib64"
)
add_links
(
"libascendcl.so"
)
add_links
(
"libnnopbase.so"
)
add_links
(
"libopapi.so"
)
add_links
(
"libruntime.so"
)
add_linkdirs
(
ASCEND_HOME
..
"/../../driver/lib64/driver"
)
add_links
(
"libascend_hal.so"
)
local
builddir
=
string.format
(
"%s/build/%s/%s/%s"
,
os
.
projectdir
(),
get_config
(
"plat"
),
get_config
(
"arch"
),
get_config
(
"mode"
)
)
rule
(
"ascend-kernels"
)
before_link
(
function
()
local
ascend_build_dir
=
path
.
join
(
os
.
projectdir
(),
"src/infiniop/devices/ascend"
)
os
.
cd
(
ascend_build_dir
)
os
.
exec
(
"make"
)
os
.
cp
(
"$(projectdir)/src/infiniop/devices/ascend/build/lib/libascend_kernels.a"
,
builddir
..
"/"
)
os
.
cd
(
os
.
projectdir
())
end
)
after_clean
(
function
()
local
ascend_build_dir
=
path
.
join
(
os
.
projectdir
(),
"src/infiniop/devices/ascend"
)
os
.
cd
(
ascend_build_dir
)
os
.
exec
(
"make clean"
)
os
.
cd
(
os
.
projectdir
())
os
.
rm
(
builddir
..
"/libascend_kernels.a"
)
end
)
rule_end
()
target
(
"infiniop-ascend"
)
-- Other configs
set_kind
(
"static"
)
set_languages
(
"cxx17"
)
on_install
(
function
(
target
)
end
)
-- Add files
add_files
(
"$(projectdir)/src/infiniop/devices/ascend/*.cc"
,
"$(projectdir)/src/infiniop/ops/*/ascend/*.cc"
)
add_cxflags
(
"-lstdc++ -Wall -Werror -fPIC"
)
-- Add operator
-- TODO: add it back after ascend-kernels is fixed
-- add_rules("ascend-kernels")
-- add_links(builddir.."/libascend_kernels.a")
target_end
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment