Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
11e7df93
Unverified
Commit
11e7df93
authored
Apr 25, 2025
by
PanZezhong1725
Committed by
GitHub
Apr 25, 2025
Browse files
Merge pull request #199 from InfiniTensor/fix-ascend-executor
修复昇腾调用aclSetAclOpExecutorRepeatable的潜在危险
parents
17415721
66e7dc56
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
52 additions
and
39 deletions
+52
-39
src/infiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
...nfiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
+24
-17
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
+15
-13
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
+13
-9
No files found.
src/infiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
View file @
11e7df93
...
...
@@ -6,22 +6,21 @@
namespace
op
::
causal_softmax
::
ascend
{
struct
Descriptor
::
Opaque
{
mutable
aclOpExecutor
*
executor
;
mutable
aclOpExecutor
*
mask_executor
;
aclnnTensorDescriptor_t
x
;
aclnnTensorDescriptor_t
mask
;
aclnnTensorDescriptor_t
y
;
aclnnTensorDescriptor_t
value
;
void
*
mask_addr
;
size_t
workspacesize_softmax
;
size_t
workspacesize_mask
;
void
*
value_addr
;
~
Opaque
()
{
delete
x
;
delete
mask
;
delete
y
;
delete
value
;
acl
DestroyAclOpExecutor
(
executo
r
);
acl
DestroyAclOpExecutor
(
mask_executo
r
);
acl
rtFree
(
mask_add
r
);
acl
rtFree
(
value_add
r
);
}
};
...
...
@@ -64,13 +63,13 @@ infiniStatus_t Descriptor::create(
auto
size
=
aclDataTypeSize
(
aclDataType
::
ACL_FLOAT16
);
CHECK_ACL
(
aclrtMalloc
(
&
value_addr
,
size
,
ACL_MEM_MALLOC_HUGE_FIRST
));
CHECK_ACL
(
aclrtMemcpy
(
value_addr
,
size
,
&
mask_value
,
size
,
ACL_MEMCPY_HOST_TO_DEVICE
));
value
=
new
aclnnTensorDescriptor
(
aclDataType
::
ACL_FLOAT16
,
{},
{}
,
value_addr
);
value
=
new
aclnnTensorDescriptor
(
aclDataType
::
ACL_FLOAT16
,
{},
{});
}
else
{
uint32_t
mask_value
=
0xff800000
;
auto
size
=
aclDataTypeSize
(
aclDataType
::
ACL_FLOAT
);
CHECK_ACL
(
aclrtMalloc
(
&
value_addr
,
size
,
ACL_MEM_MALLOC_HUGE_FIRST
));
CHECK_ACL
(
aclrtMemcpy
(
value_addr
,
size
,
&
mask_value
,
size
,
ACL_MEMCPY_HOST_TO_DEVICE
));
value
=
new
aclnnTensorDescriptor
(
aclDataType
::
ACL_FLOAT
,
{},
{}
,
value_addr
);
value
=
new
aclnnTensorDescriptor
(
aclDataType
::
ACL_FLOAT
,
{},
{});
}
// Fill Mask Tensor
...
...
@@ -93,17 +92,19 @@ infiniStatus_t Descriptor::create(
aclTensor
*
tvalue
=
value
->
tensor
;
CHECK_ACL
(
aclnnInplaceMaskedFillTensorGetWorkspaceSize
(
tx
,
tmask
,
tvalue
,
&
workspacesize_mask
,
&
mask_executor
));
aclSetAclOpExecutorRepeatable
(
mask_executor
);
int64_t
dim
=
2
;
CHECK_ACL
(
aclnnSoftmaxGetWorkspaceSize
(
tx
,
dim
,
ty
,
&
workspacesize_softmax
,
&
executor
));
aclSetAclOpExecutorRepeatable
(
executor
);
// Create the descriptor
size_t
all_workspacesize
=
workspacesize_softmax
+
workspacesize_mask
;
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
executor
,
mask_executor
,
x
,
mask
,
y
,
mask_addr
,
workspacesize_softmax
,
workspacesize_mask
},
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
x
,
mask
,
y
,
value
,
mask_addr
,
value_addr
},
std
::
move
(
info
),
all_workspacesize
,
handle_ascend
->
device
,
handle_ascend
->
device_id
);
// Delete useless executor
aclDestroyAclOpExecutor
(
executor
);
aclDestroyAclOpExecutor
(
mask_executor
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -114,18 +115,24 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, voi
auto
tx
=
_opaque
->
x
->
tensor
;
auto
ty
=
_opaque
->
y
->
tensor
;
auto
tmask
=
_opaque
->
mask
->
tensor
;
auto
executor
=
_opaque
->
executor
;
auto
mask_executor
=
_opaque
->
mask_executor
;
auto
mask_addr
=
_opaque
->
mask_addr
;
auto
tvalue
=
_opaque
->
value
->
tensor
;
aclOpExecutor
*
executor
=
nullptr
;
aclOpExecutor
*
mask_executor
=
nullptr
;
size_t
workspacesize_softmax
=
0
;
size_t
workspacesize_mask
=
0
;
int64_t
dim
=
2
;
AclSetTensorAddr
(
mask_executor
,
0
,
tx
,
(
void
*
)
x
);
AclSetTensorAddr
(
mask_executor
,
1
,
tmask
,
mask_addr
);
CHECK_ACL
(
aclnnInplaceMaskedFillTensor
(
workspace
,
_opaque
->
workspacesize_mask
,
mask_executor
,
stream
));
AclSetTensorAddr
(
mask_executor
,
1
,
tmask
,
_opaque
->
mask_addr
);
AclSetTensorAddr
(
mask_executor
,
2
,
tvalue
,
_opaque
->
value_addr
);
CHECK_ACL
(
aclnnInplaceMaskedFillTensorGetWorkspaceSize
(
tx
,
tmask
,
tvalue
,
&
workspacesize_mask
,
&
mask_executor
));
CHECK_ACL
(
aclnnInplaceMaskedFillTensor
(
workspace
,
workspacesize_mask
,
mask_executor
,
stream
));
CHECK_ACL
(
aclrtSynchronizeStream
(
stream
));
AclSetTensorAddr
(
executor
,
0
,
tx
,
(
void
*
)
x
);
AclSetTensorAddr
(
executor
,
1
,
ty
,
y
);
CHECK_ACL
(
aclnnSoftmax
(
workspace
,
_opaque
->
workspacesize_softmax
,
executor
,
stream
));
CHECK_ACL
(
aclnnSoftmaxGetWorkspaceSize
(
tx
,
dim
,
ty
,
&
workspacesize_softmax
,
&
executor
));
CHECK_ACL
(
aclnnSoftmax
(
workspace
,
workspacesize_softmax
,
executor
,
stream
));
return
INFINI_STATUS_SUCCESS
;
}
...
...
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
View file @
11e7df93
...
...
@@ -6,7 +6,6 @@
namespace
op
::
gemm
::
ascend
{
struct
Descriptor
::
Opaque
{
mutable
aclOpExecutor
*
executor
;
aclnnTensorDescriptor_t
c
,
a
,
b
;
// cubeMathType
// see doc:
...
...
@@ -17,7 +16,6 @@ struct Descriptor::Opaque {
delete
c
;
delete
a
;
delete
b
;
aclDestroyAclOpExecutor
(
executor
);
}
};
...
...
@@ -56,8 +54,8 @@ infiniStatus_t Descriptor::create(
ta
=
a
->
tensor
,
tb
=
b
->
tensor
;
aclOpExecutor
*
executor
;
size_t
workspace_size
;
aclOpExecutor
*
executor
=
nullptr
;
size_t
workspace_size
=
0
;
// aclnnGemm support C = alpha * A @ B + beta * C
// see
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha003/apiref/aolapi/context/aclnnGemm.md
...
...
@@ -69,13 +67,15 @@ infiniStatus_t Descriptor::create(
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
workspace_size
,
new
Opaque
{
executor
,
c
,
a
,
b
,
mt
,
},
handle
->
device
,
handle
->
device_id
);
aclDestroyAclOpExecutor
(
executor
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -93,22 +93,24 @@ infiniStatus_t Descriptor::calculate(
ta
=
_opaque
->
a
->
tensor
,
tb
=
_opaque
->
b
->
tensor
;
size_t
workspace_size
;
size_t
workspace_size
=
0
;
aclOpExecutor
*
executor
=
nullptr
;
CHECK_ACL
(
aclnnGemmGetWorkspaceSize
(
ta
,
tb
,
tc
,
alpha
,
beta
,
0
,
0
,
tc
,
_opaque
->
mt
,
&
workspace_size
,
&
(
_opaque
->
executor
))
)
;
&
workspace_size
,
&
executor
));
if
(
workspaceSize_
<
workspace_size
)
{
return
INFINI_STATUS_INSUFFICIENT_WORKSPACE
;
}
aclSetAclOpExecutorRepeatable
(
_opaque
->
executor
);
CHECK_ACL
(
aclSetAclOpExecutorRepeatable
(
executor
)
)
;
auto
unit
=
infiniSizeOf
(
_dtype
);
for
(
size_t
i
=
0
;
i
<
_info
.
batch
;
++
i
)
{
AclSetTensorAddr
(
_opaque
->
executor
,
0
,
ta
,
((
char
*
)
a
)
+
i
*
_info
.
a_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tb
,
((
char
*
)
b
)
+
i
*
_info
.
b_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
2
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
CHECK_ACL
(
aclnnGemm
(
workspace
,
workspace_size
,
_opaque
->
executor
,
stream
));
AclSetTensorAddr
(
executor
,
0
,
ta
,
((
char
*
)
a
)
+
i
*
_info
.
a_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
executor
,
1
,
tb
,
((
char
*
)
b
)
+
i
*
_info
.
b_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
executor
,
2
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
AclSetTensorAddr
(
executor
,
3
,
tc
,
((
char
*
)
c
)
+
i
*
_info
.
c_matrix
.
stride
*
unit
);
CHECK_ACL
(
aclnnGemm
(
workspace
,
workspace_size
,
executor
,
stream
));
}
return
INFINI_STATUS_SUCCESS
;
...
...
src/infiniop/ops/rms_norm/ascend/rms_norm_aclnn.cc
View file @
11e7df93
...
...
@@ -5,7 +5,6 @@
namespace
op
::
rms_norm
::
ascend
{
struct
Descriptor
::
Opaque
{
mutable
aclOpExecutor
*
executor
;
aclnnTensorDescriptor_t
y
;
aclnnTensorDescriptor_t
x
;
aclnnTensorDescriptor_t
w
;
...
...
@@ -17,7 +16,6 @@ struct Descriptor::Opaque {
delete
x
;
delete
w
;
delete
rstd
;
aclDestroyAclOpExecutor
(
executor
);
}
};
...
...
@@ -64,16 +62,17 @@ infiniStatus_t Descriptor::create(
// Get WorkspaceSize and set executor
CHECK_ACL
(
aclnnRmsNormGetWorkspaceSize
(
tx
,
tw
,
static_cast
<
double
>
(
epsilon
),
ty
,
trstd
,
&
workspace_size
,
&
executor
));
aclSetAclOpExecutorRepeatable
(
executor
);
auto
handle_ascend
=
reinterpret_cast
<
device
::
ascend
::
Handle
*>
(
handle
);
size_t
all_workspace_size
=
workspace_size
+
rstd
->
numel
()
*
aclDataTypeSize
(
rstd
->
dataType
);
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
executor
,
y
,
x
,
w
,
rstd
,
workspace_size
},
new
Opaque
{
y
,
x
,
w
,
rstd
,
workspace_size
},
std
::
move
(
info
),
all_workspace_size
,
handle_ascend
->
device
,
handle_ascend
->
device_id
);
aclDestroyAclOpExecutor
(
executor
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -89,16 +88,21 @@ infiniStatus_t Descriptor::calculate(
auto
tx
=
_opaque
->
x
->
tensor
;
auto
ty
=
_opaque
->
y
->
tensor
;
auto
trstd
=
_opaque
->
rstd
->
tensor
;
size_t
workspace_size_
=
0
;
aclOpExecutor
*
executor
=
nullptr
;
CHECK_ACL
(
aclnnRmsNormGetWorkspaceSize
(
tx
,
tw
,
static_cast
<
double
>
(
_info
.
epsilon
),
ty
,
trstd
,
&
workspace_size_
,
&
executor
));
CHECK_ACL
(
aclSetAclOpExecutorRepeatable
(
executor
));
void
*
rstdPtr
=
(
void
*
)((
uint8_t
*
)
workspace
+
_opaque
->
workspaceSize
);
auto
unit
=
infiniSizeOf
(
_info
.
atype
);
AclSetTensorAddr
(
_opaque
->
executor
,
1
,
tw
,
(
void
*
)
w
);
AclSetTensorAddr
(
_opaque
->
executor
,
3
,
trstd
,
rstdPtr
);
AclSetTensorAddr
(
executor
,
1
,
tw
,
(
void
*
)
w
);
AclSetTensorAddr
(
executor
,
3
,
trstd
,
rstdPtr
);
for
(
size_t
i
=
0
;
i
<
(
_info
.
shape
)[
0
];
++
i
)
{
AclSetTensorAddr
(
_opaque
->
executor
,
0
,
tx
,
((
char
*
)
x
)
+
i
*
(
_info
.
x_strides
)[
0
]
*
unit
);
AclSetTensorAddr
(
_opaque
->
executor
,
2
,
ty
,
((
char
*
)
y
)
+
i
*
(
_info
.
y_strides
)[
0
]
*
unit
);
CHECK_ACL
(
aclnnRmsNorm
(
workspace
,
_opaque
->
workspaceSize
,
_opaque
->
executor
,
stream
));
AclSetTensorAddr
(
executor
,
0
,
tx
,
((
char
*
)
x
)
+
i
*
(
_info
.
x_strides
)[
0
]
*
unit
);
AclSetTensorAddr
(
executor
,
2
,
ty
,
((
char
*
)
y
)
+
i
*
(
_info
.
y_strides
)[
0
]
*
unit
);
CHECK_ACL
(
aclnnRmsNorm
(
workspace
,
_opaque
->
workspaceSize
,
executor
,
stream
));
}
return
INFINI_STATUS_SUCCESS
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment