Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
1d89463c
Commit
1d89463c
authored
Nov 28, 2024
by
letaoqin
Browse files
add gmm0 code
parent
7018dfb2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
4 deletions
+26
-4
include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_general.hpp
...ops/fused_moe/pipeline/fused_moegemm_pipeline_general.hpp
+26
-4
No files found.
include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_general.hpp
View file @
1d89463c
...
@@ -98,12 +98,9 @@ struct FusedMoeGemmPipeline_General
...
@@ -98,12 +98,9 @@ struct FusedMoeGemmPipeline_General
index_t
hidden_size
,
index_t
hidden_size
,
index_t
intermediate_size
)
index_t
intermediate_size
)
{
{
ignore
=
g_window_
;
ignore
=
d_window_
;
ignore
=
d_window_
;
ignore
=
o_window_
;
ignore
=
o_window_
;
ignore
=
smem
;
ignore
=
hidden_size
;
ignore
=
hidden_size
;
ignore
=
intermediate_size
;
CK_TILE_LDS_ADDR
ADataType
*
smem_0
=
reinterpret_cast
<
CK_TILE_LDS_ADDR
ADataType
*>
(
smem
);
CK_TILE_LDS_ADDR
ADataType
*
smem_0
=
reinterpret_cast
<
CK_TILE_LDS_ADDR
ADataType
*>
(
smem
);
auto
a_lds_view
=
make_tensor_view
<
address_space_enum
::
lds
>
(
auto
a_lds_view
=
make_tensor_view
<
address_space_enum
::
lds
>
(
...
@@ -138,9 +135,34 @@ struct FusedMoeGemmPipeline_General
...
@@ -138,9 +135,34 @@ struct FusedMoeGemmPipeline_General
auto
g_dram_block
=
load_tile
(
g_global_to_dram_window
);
auto
g_dram_block
=
load_tile
(
g_global_to_dram_window
);
clear_tile
(
s_acc
);
// initialize C
clear_tile
(
s_acc
);
// initialize C
constexpr
index_t
kK0
=
BlockShape
::
Block_K0
;
const
index_t
k0_loops
=
ck_tile
::
integer_divide_ceil
(
intermediate_size
,
kK0
);
index_t
iCounter
=
k0_loops
-
1
;
//gemm 0
while
(
iCounter
>
0
)
{
block_sync_lds
();
gemm_0
(
s_acc
,
a_lds_win
,
g_dram_block
);
block_sync_lds
();
move_tile_window
(
a_global_to_dram_window
,
{
0
,
kK0
});
move_tile_window
(
g_global_to_dram_window
,
{
0
,
kK0
});
gemm_0
(
s_acc
,
a_lds_win
,
g_dram_block
);
a_dram_block
=
load_tile
(
a_global_to_dram_window
);
g_dram_block
=
load_tile
(
g_global_to_dram_window
);
store_tile
(
a_lds_win
,
a_dram_block
);
iCounter
--
;
}
// tail
{
block_sync_lds
();
gemm_0
(
s_acc
,
a_lds_win
,
g_dram_block
);
}
//move sacc to LDS
ignore
=
g_dram_block
;
ignore
=
g_dram_block
;
store_tile
(
o_window_
,
a_dram_block
);
store_tile
(
o_window_
,
a_dram_block
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment