Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4d18cd84
"sgl-kernel/git@developer.sourcefind.cn:change/sglang.git" did not exist on "719b29f218a09642193c4bda2a7ffa32829d5604"
Commit
4d18cd84
authored
Jul 04, 2023
by
danyao12
Browse files
adjust block_sync_lds to solve read-write conflicts
parent
6cc7d0de
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
4 additions
and
6 deletions
+4
-6
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v1.hpp
...id/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v1.hpp
+1
-2
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v2.hpp
...id/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v2.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v1.hpp
...id/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v1.hpp
+1
-2
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v2.hpp
...id/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v2.hpp
+1
-1
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v1.hpp
View file @
4d18cd84
...
...
@@ -2114,6 +2114,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
sgrad_slice_idx
[
I3
],
sgrad_slice_idx
[
I3
]
+
Gemm2Params
::
ABlockSliceLengths_M0_K0_M1_K1
::
At
(
I3
));
block_sync_lds
();
// sync before write
if
(
gemm2_a_copy_subgroup
.
IsBelong
(
mwave_range
,
nwave_range
))
{
qgrad_gemm_tile_sgrad_thread_copy_vgpr_to_lds
.
Run
(
...
...
@@ -2125,8 +2126,6 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
gemm2_a_block_buf
);
}
// block_sync_lds(); // sync before write
qgrad_gemm_tile_k_blockwise_copy
.
Run
(
Gemm2
::
b_block_desc_n0_n1_n2_k0_k1_k2_k3
,
k_block_buf
,
Gemm2
::
b_thread_desc_n0_n1_n2_k0_k1_k2_k3
,
...
...
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_b2t_v2.hpp
View file @
4d18cd84
...
...
@@ -2044,6 +2044,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
sgrad_slice_idx
[
I3
],
sgrad_slice_idx
[
I3
]
+
Gemm2Params
::
ABlockSliceLengths_M0_K0_M1_K1
::
At
(
I3
));
block_sync_lds
();
// sync before write
if
(
gemm2_a_copy_subgroup
.
IsBelong
(
mwave_range
,
nwave_range
))
{
qgrad_gemm_tile_sgrad_thread_copy_vgpr_to_lds
.
Run
(
...
...
@@ -2060,7 +2061,6 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
qgrad_gemm_tile_k_blockwise_copy
.
MoveSrcSliceWindow
(
k_grid_desc_n0_k_n1
,
Gemm2
::
b_block_slice_copy_step
);
block_sync_lds
();
// sync before write
qgrad_gemm_tile_k_blockwise_copy
.
RunWrite
(
Gemm2
::
b_block_desc_k0_n_k1
,
gemm2_b_block_buf
);
...
...
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v1.hpp
View file @
4d18cd84
...
...
@@ -2191,6 +2191,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
sgrad_slice_idx
[
I3
],
sgrad_slice_idx
[
I3
]
+
Gemm2Params
::
ABlockSliceLengths_M0_K0_M1_K1
::
At
(
I3
));
block_sync_lds
();
// sync before write
if
(
gemm2_a_copy_subgroup
.
IsBelong
(
mwave_range
,
nwave_range
))
{
qgrad_gemm_tile_sgrad_thread_copy_vgpr_to_lds
.
Run
(
...
...
@@ -2202,8 +2203,6 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V1
gemm2_a_block_buf
);
}
// block_sync_lds(); // sync before write
qgrad_gemm_tile_k_blockwise_copy
.
Run
(
Gemm2
::
b_block_desc_n0_n1_n2_k0_k1_k2_k3
,
k_block_buf
,
Gemm2
::
b_thread_desc_n0_n1_n2_k0_k1_k2_k3
,
...
...
include/ck/tensor_operation/gpu/grid/gridwise_batched_mha_bwd_xdl_cshuffle_qloop_t2b_v2.hpp
View file @
4d18cd84
...
...
@@ -2142,6 +2142,7 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
sgrad_slice_idx
[
I3
],
sgrad_slice_idx
[
I3
]
+
Gemm2Params
::
ABlockSliceLengths_M0_K0_M1_K1
::
At
(
I3
));
block_sync_lds
();
// sync before write
if
(
gemm2_a_copy_subgroup
.
IsBelong
(
mwave_range
,
nwave_range
))
{
qgrad_gemm_tile_sgrad_thread_copy_vgpr_to_lds
.
Run
(
...
...
@@ -2158,7 +2159,6 @@ struct GridwiseBatchedMultiheadAttentionBackward_Xdl_CShuffle_V2
qgrad_gemm_tile_k_blockwise_copy
.
MoveSrcSliceWindow
(
k_grid_desc_n0_k_n1
,
Gemm2
::
b_block_slice_copy_step
);
block_sync_lds
();
// sync before write
qgrad_gemm_tile_k_blockwise_copy
.
RunWrite
(
Gemm2
::
b_block_desc_k0_n_k1
,
gemm2_b_block_buf
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment