Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4bb61e35
Commit
4bb61e35
authored
Aug 31, 2023
by
Bartlomiej Wroblewski
Browse files
Review: Fix M, N per thread names
parent
231e3f8d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
8 deletions
+8
-8
include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp
include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp
+8
-8
No files found.
include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp
View file @
4bb61e35
...
@@ -132,22 +132,22 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
...
@@ -132,22 +132,22 @@ struct BlockwiseGemmDpp_k0mk1_k0nk1_m0n0m1n1m2n2
__host__
__device__
static
constexpr
auto
GetCThreadDescriptor_M0_N0_M1_N1_M2_N2
()
__host__
__device__
static
constexpr
auto
GetCThreadDescriptor_M0_N0_M1_N1_M2_N2
()
{
{
constexpr
auto
c_m
0_m1_m2
_n_tblk_lens
=
dpp_gemm
.
GetCMNThreadBlkLengths
();
constexpr
auto
c_m_n_tblk_lens
=
dpp_gemm
.
GetCMNThreadBlkLengths
();
constexpr
auto
M
0
=
c_m
0_m1_m2
_n_tblk_lens
[
I0
];
constexpr
auto
M
=
c_m_n_tblk_lens
[
I0
];
constexpr
auto
N
=
c_m0_m1_m2
_n_tblk_lens
[
I1
];
constexpr
auto
N
=
c_m
_n_tblk_lens
[
I1
];
return
make_naive_tensor_descriptor_packed
(
return
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{},
I1
,
I1
,
M
0
,
N
));
make_tuple
(
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{},
I1
,
I1
,
M
,
N
));
}
}
__host__
__device__
static
constexpr
auto
GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2
()
__host__
__device__
static
constexpr
auto
GetCThreadDescriptor_G_M0_N0_M1_N1_M2_N2
()
{
{
constexpr
auto
c_m
0_m1_m2
_n_tblk_lens
=
dpp_gemm
.
GetCMNThreadBlkLengths
();
constexpr
auto
c_m_n_tblk_lens
=
dpp_gemm
.
GetCMNThreadBlkLengths
();
constexpr
auto
M
0
=
c_m
0_m1_m2
_n_tblk_lens
[
I0
];
constexpr
auto
M
=
c_m_n_tblk_lens
[
I0
];
constexpr
auto
N
=
c_m0_m1_m2
_n_tblk_lens
[
I1
];
constexpr
auto
N
=
c_m
_n_tblk_lens
[
I1
];
return
make_naive_tensor_descriptor_packed
(
return
make_naive_tensor_descriptor_packed
(
make_tuple
(
I1
,
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{},
I1
,
I1
,
M
0
,
N
));
make_tuple
(
I1
,
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{},
I1
,
I1
,
M
,
N
));
}
}
__host__
__device__
static
constexpr
auto
GetCBlockDescriptor_M0_N0_M1_N1_M2_N2
()
__host__
__device__
static
constexpr
auto
GetCBlockDescriptor_M0_N0_M1_N1_M2_N2
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment