Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6c7b832e
Commit
6c7b832e
authored
Nov 30, 2023
by
aska-0096
Browse files
workaround, still keep relative order or ds_read/write
parent
c6a03cde
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
5 deletions
+18
-5
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v4.hpp
...operation/gpu/block/blockwise_gemm_pipeline_xdlops_v4.hpp
+18
-5
No files found.
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v4.hpp
View file @
6c7b832e
...
...
@@ -317,15 +317,28 @@ struct BlockwiseGemmXdlops_pipeline_v4
constexpr
auto
num_issue
=
num_buffer_load_inst
;
static_for
<
0
,
num_issue
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
num_issue
/
2
,
1
>
{}([
&
](
auto
i
)
{
ignore
=
i
;
static_for
<
0
,
2
*
num_ds_read_inst
/
num_issue
,
1
>
{}([
&
](
auto
ir
)
{
ignore
=
ir
;
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x100
,
1
,
0
);
// DS read
});
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x100
,
num_ds_read_inst
/
num_buffer_load_inst
,
0
);
// DS read
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x200
,
num_ds_write_inst
/
num_buffer_load_inst
,
0
);
// DS write
__builtin_amdgcn_sched_group_barrier
(
0x020
,
1
,
0
);
// VMEM read
__builtin_amdgcn_sched_group_barrier
(
0x008
,
num_mfma_inst
/
num_buffer_load_inst
-
2
*
num_ds_read_inst
/
num_issue
-
1
,
0
);
// MFMA
});
static_for
<
0
,
num_issue
/
2
,
1
>
{}([
&
](
auto
i
)
{
ignore
=
i
;
static_for
<
0
,
2
*
num_ds_write_inst
/
num_issue
,
1
>
{}([
&
](
auto
iw
)
{
ignore
=
iw
;
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x200
,
1
,
0
);
// DS write
});
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x020
,
1
,
0
);
// VMEM read
__builtin_amdgcn_sched_group_barrier
(
0x008
,
num_mfma_inst
/
num_buffer_load_inst
-
3
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x008
,
num_mfma_inst
/
num_buffer_load_inst
-
2
*
num_ds_write_inst
/
num_issue
-
1
,
0
);
// MFMA
});
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment