Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
d52ec016
"docs/vscode:/vscode.git/clone" did not exist on "f2dff4d4f6d44f3f399a3ab031b43ae4e3ef7ab9"
Unverified
Commit
d52ec016
authored
Aug 18, 2023
by
cloudhan
Committed by
GitHub
Aug 18, 2023
Browse files
Use asynchronous version of hipMemset (#850)
parent
32fe996d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
19 additions
and
10 deletions
+19
-10
include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...e_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+3
-2
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp
...tion/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp
+4
-2
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp
...sor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp
+5
-2
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
...u/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
+4
-2
include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
...mpl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
+3
-2
No files found.
include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
View file @
d52ec016
...
@@ -532,11 +532,12 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
...
@@ -532,11 +532,12 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
float
ave_time
=
0
;
float
ave_time
=
0
;
const
auto
Run
=
[
&
](
const
auto
&
kernel
)
{
const
auto
Run
=
[
&
](
const
auto
&
kernel
)
{
hipGetErrorString
(
hipMemset
(
hipGetErrorString
(
hipMemset
Async
(
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
0
,
0
,
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
.
GetElementSpaceSize
()
*
arg
.
c_grid_desc_mblock_mperblock_nblock_nperblock_
.
GetElementSpaceSize
()
*
sizeof
(
CDataType
)));
sizeof
(
CDataType
),
stream_config
.
stream_id_
));
ave_time
=
ave_time
=
launch_and_time_kernel
(
stream_config
,
launch_and_time_kernel
(
stream_config
,
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp
View file @
d52ec016
...
@@ -158,8 +158,10 @@ struct DeviceGemmXdlSplitKCShuffle : public DeviceGemmSplitK<ALayout,
...
@@ -158,8 +158,10 @@ struct DeviceGemmXdlSplitKCShuffle : public DeviceGemmSplitK<ALayout,
const
auto
Run
=
[
&
](
const
auto
&
kernel
)
{
const
auto
Run
=
[
&
](
const
auto
&
kernel
)
{
if
(
kbatch
>
1
)
if
(
kbatch
>
1
)
hipGetErrorString
(
hipGetErrorString
(
hipMemsetAsync
(
karg
.
p_c_grid
,
hipMemset
(
karg
.
p_c_grid
,
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
CDataType
)));
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
CDataType
),
stream_config
.
stream_id_
));
ave_time
=
launch_and_time_kernel
(
ave_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
dim3
(
gdx
,
gdy
,
gdz
),
dim3
(
BlockSize
),
0
,
karg
,
b2c_map
);
stream_config
,
kernel
,
dim3
(
gdx
,
gdy
,
gdz
),
dim3
(
BlockSize
),
0
,
karg
,
b2c_map
);
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp
View file @
d52ec016
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -147,7 +147,10 @@ struct DeviceGemmXdlStreamK : public DeviceGemmStreamK<ALayout,
...
@@ -147,7 +147,10 @@ struct DeviceGemmXdlStreamK : public DeviceGemmStreamK<ALayout,
if
constexpr
(
GridwiseGemm
::
Block2CTileMap
::
ReductionStrategy
==
if
constexpr
(
GridwiseGemm
::
Block2CTileMap
::
ReductionStrategy
==
StreamKReductionStrategy
::
Atomic
)
StreamKReductionStrategy
::
Atomic
)
{
{
hipGetErrorString
(
hipMemset
(
karg
.
p_c_grid
,
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
CDataType
)));
hipGetErrorString
(
hipMemsetAsync
(
karg
.
p_c_grid
,
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
CDataType
),
stream_config
.
stream_id_
));
ave_time
=
launch_and_time_kernel
(
stream_config
,
ave_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
kernel
,
grid_dims
,
grid_dims
,
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
View file @
d52ec016
...
@@ -421,8 +421,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
...
@@ -421,8 +421,10 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
for
(
const
auto
&
trans_arg
:
arg
.
gemm_kernel_args_
)
for
(
const
auto
&
trans_arg
:
arg
.
gemm_kernel_args_
)
{
{
const
auto
&
karg
=
trans_arg
.
karg_
;
const
auto
&
karg
=
trans_arg
.
karg_
;
hip_check_error
(
hip_check_error
(
hipMemsetAsync
(
karg
.
p_c_grid
,
hipMemset
(
karg
.
p_c_grid
,
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
EDataType
)));
0
,
karg
.
M
*
karg
.
N
*
sizeof
(
EDataType
),
stream_config
.
stream_id_
));
}
}
}
}
...
...
include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
View file @
d52ec016
...
@@ -886,11 +886,12 @@ struct DeviceSplitKContractionMultipleD_Xdl_CShuffle
...
@@ -886,11 +886,12 @@ struct DeviceSplitKContractionMultipleD_Xdl_CShuffle
typename
GridwiseGemmAtomicAdd
::
DefaultBlock2ETileMap
,
typename
GridwiseGemmAtomicAdd
::
DefaultBlock2ETileMap
,
has_main_loop
>
;
has_main_loop
>
;
hipGetErrorString
(
hipMemset
(
hipGetErrorString
(
hipMemset
Async
(
arg
.
p_e_grid_
,
arg
.
p_e_grid_
,
0
,
0
,
arg
.
e_grid_desc_mblock_mperblock_nblock_nperblock_
.
GetElementSpaceSize
()
*
arg
.
e_grid_desc_mblock_mperblock_nblock_nperblock_
.
GetElementSpaceSize
()
*
sizeof
(
EDataType
)));
sizeof
(
EDataType
),
stream_config
.
stream_id_
));
return
launch_and_time_kernel
(
stream_config
,
return
launch_and_time_kernel
(
stream_config
,
kernel
,
kernel
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment