Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
3935554a
Commit
3935554a
authored
Dec 12, 2024
by
coderfeli
Browse files
rm logs
parent
da59d3b2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
22 deletions
+13
-22
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp
...or_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp
+12
-12
include/ck_tile/core/config.hpp
include/ck_tile/core/config.hpp
+0
-3
include/ck_tile/core/container/array.hpp
include/ck_tile/core/container/array.hpp
+1
-7
No files found.
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp
View file @
3935554a
...
...
@@ -374,29 +374,29 @@ struct BlockwiseGemmXdlops_pipeline_v4
{
// schedule
constexpr
auto
num_ds_read_inst
=
HotLoopInstList
::
A_LDS_Read_Inst_Num
+
HotLoopInstList
::
B_LDS_Read_Inst_Num
;
//16
HotLoopInstList
::
A_LDS_Read_Inst_Num
+
HotLoopInstList
::
B_LDS_Read_Inst_Num
;
constexpr
auto
num_ds_write_inst
=
HotLoopInstList
::
A_LDS_Write_Inst_Num
+
HotLoopInstList
::
B_LDS_Write_Inst_Num
;
//8
HotLoopInstList
::
A_LDS_Write_Inst_Num
+
HotLoopInstList
::
B_LDS_Write_Inst_Num
;
;
constexpr
auto
num_buffer_load_inst
=
HotLoopInstList
::
A_Buffer_Load_Inst_Num
+
HotLoopInstList
::
B_Buffer_Load_Inst_Num
;
//8
HotLoopInstList
::
A_Buffer_Load_Inst_Num
+
HotLoopInstList
::
B_Buffer_Load_Inst_Num
;
;
constexpr
auto
num_mfma_inst
=
HotLoopInstList
::
C_MFMA_Inst_Num
;
//64
constexpr
auto
num_mfma_inst
=
HotLoopInstList
::
C_MFMA_Inst_Num
;
constexpr
auto
num_issue
=
num_buffer_load_inst
;
// 8
constexpr
auto
num_issue
=
num_buffer_load_inst
;
static_for
<
0
,
num_issue
,
1
>
{}([
&
](
auto
i
)
{
ignore
=
i
;
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
: 1
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x100
,
num_ds_read_inst
/
num_buffer_load_inst
,
0
);
// DS read
: 2
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
: 1
0x100
,
num_ds_read_inst
/
num_buffer_load_inst
,
0
);
// DS read
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x200
,
num_ds_write_inst
/
num_buffer_load_inst
,
0
);
// DS write
: 1
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
: 1
__builtin_amdgcn_sched_group_barrier
(
0x020
,
1
,
0
);
// VMEM read
:1
0x200
,
num_ds_write_inst
/
num_buffer_load_inst
,
0
);
// DS write
__builtin_amdgcn_sched_group_barrier
(
0x008
,
1
,
0
);
// MFMA
__builtin_amdgcn_sched_group_barrier
(
0x020
,
1
,
0
);
// VMEM read
__builtin_amdgcn_sched_group_barrier
(
0x008
,
num_mfma_inst
/
num_buffer_load_inst
-
3
,
0
);
// MFMA
: 5
0x008
,
num_mfma_inst
/
num_buffer_load_inst
-
3
,
0
);
// MFMA
});
}
...
...
include/ck_tile/core/config.hpp
View file @
3935554a
...
...
@@ -230,6 +230,3 @@
#ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
#define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1
#endif
template
<
typename
T
>
CK_TILE_HOST_DEVICE
void
printx
(
T
a
=
{})
{
a
.
print
();}
\ No newline at end of file
include/ck_tile/core/container/array.hpp
View file @
3935554a
...
...
@@ -52,13 +52,7 @@ struct array
data
[
i
]
=
vlast
;
}
}
CK_TILE_HOST_DEVICE
void
print
()
const
{
printf
(
"array{size: %d, data: "
,
size
());
for
(
index_t
i
=
0
;
i
<
size
();
i
++
)
{
printf
(
"%d,"
,
int
(
get
(
i
)));
}
}
template
<
typename
Y
,
typename
=
std
::
enable_if_t
<
std
::
is_convertible_v
<
Y
,
value_type
>
||
std
::
is_constructible_v
<
Y
,
value_type
>>>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment