Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
527086e9
Commit
527086e9
authored
Mar 27, 2025
by
zhangyunze
Browse files
fix: 依据rmsnorm昇腾算子官方更新,删除cast操作
parent
d307db53
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
43 deletions
+13
-43
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+1
-1
src/infiniop/devices/ascend/common_ascend.h
src/infiniop/devices/ascend/common_ascend.h
+1
-1
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
+11
-41
No files found.
src/infiniop/devices/ascend/common_ascend.cc
View file @
527086e9
...
...
@@ -8,7 +8,7 @@ std::vector<int64_t> inferStorageShape(std::vector<int64_t> shape, std::vector<i
return
storageShape
;
}
size_t
aclnnTensorDescriptor
::
size
()
const
{
size_t
aclnnTensorDescriptor
::
numel
()
const
{
return
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
(
size_t
)
1
,
std
::
multiplies
<
size_t
>
());
}
...
...
src/infiniop/devices/ascend/common_ascend.h
View file @
527086e9
...
...
@@ -37,7 +37,7 @@ struct aclnnTensorDescriptor {
aclnnTensorDescriptor
(
aclDataType
dtype
,
const
std
::
vector
<
int64_t
>
&
shape
,
const
std
::
vector
<
int64_t
>
&
strides
,
void
*
data
=
nullptr
);
aclnnTensorDescriptor
(
infiniopTensorDescriptor_t
y_desc
,
void
*
data
=
nullptr
);
~
aclnnTensorDescriptor
();
size_t
size
()
const
;
size_t
numel
()
const
;
std
::
string
toString
();
};
...
...
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
View file @
527086e9
#include "rms_norm_aclnn.h"
#include "../../../devices/ascend/common_ascend.h"
#include <aclnnop/aclnn_cast.h>
#include <aclnnop/aclnn_rms_norm.h>
namespace
op
::
rms_norm
::
ascend
{
struct
Descriptor
::
Opaque
{
mutable
aclOpExecutor
*
executor
;
mutable
aclOpExecutor
*
castExecutor
;
aclnnTensorDescriptor_t
y
;
aclnnTensorDescriptor_t
x
;
aclnnTensorDescriptor_t
w
;
aclnnTensorDescriptor_t
rstd
;
aclnnTensorDescriptor_t
cast
;
size_t
workspaceSize
;
size_t
castWorkspaceSize
;
~
Opaque
()
{
delete
y
;
delete
x
;
delete
w
;
delete
rstd
;
delete
cast
;
aclDestroyAclOpExecutor
(
executor
);
aclDestroyAclOpExecutor
(
castExecutor
);
}
};
...
...
@@ -42,17 +36,15 @@ infiniStatus_t Descriptor::create(
auto
handle_ascend
=
reinterpret_cast
<
device
::
ascend
::
Handle
*>
(
handle
);
CHECK_STATUS
(
createRMSNormInfo
(
&
info
,
y_desc
,
x_desc
,
w_desc
,
epsilon
));
size_t
workspace_size
,
cast_workspace_size
=
0
;
size_t
workspace_size
=
0
;
aclOpExecutor
*
executor
=
nullptr
;
aclOpExecutor
*
castExecutor
=
nullptr
;
aclnnTensorDescriptor_t
y
=
nullptr
;
aclnnTensorDescriptor_t
x
=
nullptr
;
aclnnTensorDescriptor_t
w
=
nullptr
;
aclnnTensorDescriptor_t
rstd
=
nullptr
;
aclnnTensorDescriptor_t
cast
=
nullptr
;
std
::
vector
<
int64_t
>
slice_shape
=
{
1
,
static_cast
<
int64_t
>
((
info
.
shape
)[
1
])};
auto
slice_stride
=
std
::
vector
<
int64_t
>
(
2
,
1
);
std
::
vector
<
int64_t
>
slice_shape
=
{
static_cast
<
int64_t
>
((
info
.
shape
)[
1
])};
auto
slice_stride
=
std
::
vector
<
int64_t
>
(
1
,
1
);
y
=
new
aclnnTensorDescriptor
(
toAclDataType
(
info
.
atype
),
slice_shape
,
slice_stride
);
x
=
new
aclnnTensorDescriptor
(
toAclDataType
(
info
.
atype
),
slice_shape
,
slice_stride
);
w
=
new
aclnnTensorDescriptor
(
w_desc
);
...
...
@@ -64,28 +56,17 @@ infiniStatus_t Descriptor::create(
// Set rstdDesc
// See: https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha002/apiref/appdevgapi/context/aclnnRmsNorm.md
// rstdTensor cannot set nullptr in aclnn
auto
rstd_shape
=
std
::
vector
<
int64_t
>
(
2
,
1
);
auto
rstd_strides
=
std
::
vector
<
int64_t
>
(
2
,
1
);
auto
rstd_shape
=
std
::
vector
<
int64_t
>
(
1
,
1
);
auto
rstd_strides
=
std
::
vector
<
int64_t
>
(
1
,
1
);
rstd
=
new
aclnnTensorDescriptor
(
toAclDataType
(
INFINI_DTYPE_F32
),
rstd_shape
,
rstd_strides
);
aclTensor
*
trstd
=
rstd
->
tensor
;
if
(
w
->
dataType
!=
x
->
dataType
)
{
cast
=
new
aclnnTensorDescriptor
(
x
->
dataType
,
w
->
shape
,
w
->
strides
);
}
// Get WorkspaceSize and set executor
CHECK_ACL
(
aclnnRmsNormGetWorkspaceSize
(
tx
,
cast
==
nullptr
?
tw
:
cast
->
tensor
,
static_cast
<
double
>
(
epsilon
),
ty
,
trstd
,
&
workspace_size
,
&
executor
));
CHECK_ACL
(
aclnnRmsNormGetWorkspaceSize
(
tx
,
tw
,
static_cast
<
double
>
(
epsilon
),
ty
,
trstd
,
&
workspace_size
,
&
executor
));
aclSetAclOpExecutorRepeatable
(
executor
);
if
(
cast
)
{
aclTensor
*
tc
=
cast
->
tensor
;
CHECK_ACL
(
aclnnCastGetWorkspaceSize
(
tw
,
cast
->
dataType
,
tc
,
&
cast_workspace_size
,
&
castExecutor
));
aclSetAclOpExecutorRepeatable
(
castExecutor
);
}
size_t
allWorkspaceSize
=
workspace_size
+
cast_workspace_size
+
rstd
->
size
()
*
aclDataTypeSize
(
rstd
->
dataType
);
allWorkspaceSize
=
allWorkspaceSize
+
(
cast
==
nullptr
?
0
:
cast
->
size
()
*
aclDataTypeSize
(
cast
->
dataType
));
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
executor
,
castExecutor
,
y
,
x
,
w
,
rstd
,
cast
,
workspace_size
,
cast_workspace_size
},
info
,
allWorkspaceSize
,
handle_ascend
->
device
,
handle_ascend
->
device_id
);
size_t
allWorkspaceSize
=
workspace_size
+
rstd
->
numel
()
*
aclDataTypeSize
(
rstd
->
dataType
);
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
executor
,
y
,
x
,
w
,
rstd
,
workspace_size
},
info
,
allWorkspaceSize
,
handle_ascend
->
device
,
handle_ascend
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -101,24 +82,13 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, voi
auto
trstd
=
_opaque
->
rstd
->
tensor
;
void
*
rstdPtr
=
(
void
*
)((
uint8_t
*
)
workspace
+
_opaque
->
workspaceSize
);
void
*
castPtr
=
nullptr
;
if
(
_opaque
->
cast
)
{
auto
tcast
=
_opaque
->
cast
->
tensor
;
castPtr
=
(
void
*
)((
float
*
)
rstdPtr
+
_opaque
->
rstd
->
size
());
AclSetTensorAddr
(
_opaque
->
castExecutor
,
0
,
tw
,
(
void
*
)
w
);
AclSetTensorAddr
(
_opaque
->
castExecutor
,
1
,
tcast
,
castPtr
);
CHECK_ACL
(
aclnnCast
(
nullptr
,
_opaque
->
castWorkspaceSize
,
_opaque
->
castExecutor
,
stream
));
}
auto
unit
=
infiniSizeOf
(
_info
.
atype
);
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tw
,
(
void
*
)
w
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
trstd
,
rstdPtr
);
for
(
size_t
i
=
0
;
i
<
(
_info
.
shape
)[
0
];
++
i
)
{
AclSetTensorAddr
(
_opaque
->
executor
,
0
,
tx
,
((
char
*
)
x
)
+
i
*
(
_info
.
x_strides
)[
0
]
*
unit
);
if
(
_opaque
->
cast
)
{
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
_opaque
->
cast
->
tensor
,
castPtr
);
}
else
{
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tw
,
(
void
*
)
w
);
}
AclSetTensorAddr
(
_opaque
->
executor
,
2
,
ty
,
((
char
*
)
y
)
+
i
*
(
_info
.
y_strides
)[
0
]
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
trstd
,
rstdPtr
);
CHECK_ACL
(
aclnnRmsNorm
(
workspace
,
_opaque
->
workspaceSize
,
_opaque
->
executor
,
stream
));
}
return
INFINI_STATUS_SUCCESS
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment