Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
4b91d1ce
Commit
4b91d1ce
authored
Feb 17, 2025
by
coderfeli
Browse files
revert gemm2 swizz
parent
fcc2c867
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
20 deletions
+5
-20
include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm_scatter.hpp
...k/tensor_operation/gpu/grid/gridwise_moe_gemm_scatter.hpp
+5
-20
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_moe_gemm_scatter.hpp
View file @
4b91d1ce
...
...
@@ -197,11 +197,8 @@ struct GridwiseMoeGemmScatter
__host__
static
auto
CalculateGridSize
(
index_t
M
,
index_t
N
)
{
// return std::make_tuple(math::integer_divide_ceil(N, NPerBlock),
// math::integer_divide_ceil(M, MPerBlock),
// 1);
return
std
::
make_tuple
(
math
::
integer_divide_ceil
(
N
,
NPerBlock
)
*
math
::
integer_divide_ceil
(
M
,
MPerBlock
),
1
,
return
std
::
make_tuple
(
math
::
integer_divide_ceil
(
N
,
NPerBlock
),
math
::
integer_divide_ceil
(
M
,
MPerBlock
),
1
);
}
...
...
@@ -1152,22 +1149,10 @@ struct GridwiseMoeGemmScatter
MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
(
c_grid_desc_m_n
,
problem
.
MBlock
,
problem
.
NBlock
);
const
index_t
block_n_id
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
x
);
const
index_t
block_m_id
=
__builtin_amdgcn_readfirstlane
(
blockIdx
.
y
);
const
index_t
expert_id
=
__builtin_amdgcn_readfirstlane
(
p_sorted_expert_ids
[
block_m_id
]);
const
index_t
max_token_id
=
__builtin_amdgcn_readfirstlane
(
p_max_token_id
[
0
]);
const
index_t
expert_block_id
=
blockIdx
.
x
/
problem
.
NBlock
;
// const index_t b_block_id = blockIdx.x % problem.NBlock;
const
index_t
expert_id
=
__builtin_amdgcn_readfirstlane
(
p_sorted_expert_ids
[
expert_block_id
]);
const
index_t
es
=
__builtin_amdgcn_readfirstlane
(
p_max_token_id
[
expert_block_id
+
1
]);
const
index_t
expert_swizzle
=
es
>
0
?
es
:
1
;
//p_max_token_id[expert_id + 1];
const
index_t
expert_block_swizzle
=
expert_block_id
/
expert_swizzle
;
const
index_t
b_block_id_swizzle
=
blockIdx
.
x
%
(
problem
.
NBlock
*
expert_swizzle
);
const
index_t
block_n_id
=
__builtin_amdgcn_readfirstlane
(
b_block_id_swizzle
%
8
+
b_block_id_swizzle
/
(
8
*
expert_swizzle
)
*
8
);
const
index_t
block_m_id
=
__builtin_amdgcn_readfirstlane
(
expert_block_swizzle
*
expert_swizzle
+
b_block_id_swizzle
/
8
%
expert_swizzle
);
// const index_t block_n_id = __builtin_amdgcn_readfirstlane(blockIdx.x);
// const index_t block_m_id = __builtin_amdgcn_readfirstlane(blockIdx.y);
// const index_t expert_id = __builtin_amdgcn_readfirstlane(p_sorted_expert_ids[block_m_id]);
// const index_t max_token_id = __builtin_amdgcn_readfirstlane(p_max_token_id[0]);
const
index_t
token0
=
__builtin_amdgcn_readfirstlane
(
p_sorted_token_ids
[
block_m_id
*
MPerBlock
]
&
0xffffff
);
// constexpr auto M0 = ABlockTransferThreadClusterLengths_AK0_M_AK1{}.At(I1);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment