Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
673b30cf
Commit
673b30cf
authored
May 14, 2022
by
ltqin
Browse files
add K0PerBlock dim
parent
7d42a6d4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
17 deletions
+17
-17
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
...operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
+17
-17
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
View file @
673b30cf
...
...
@@ -351,19 +351,19 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
}
__host__
__device__
static
constexpr
auto
MakeBGridDescriptor_K0_N0_N1_N2_N3_K1
(
const
BGridDesc_K0_N_K1
&
b_grid_desc_k0_n_k1
)
MakeBGridDescriptor_K0_
K0B_
N0_N1_N2_N3_K1
(
const
BGridDesc_K0_N_K1
&
b_grid_desc_k0_n_k1
)
{
const
auto
K0
=
b_grid_desc_k0_n_k1
.
GetLength
(
I0
);
const
auto
N
=
b_grid_desc_k0_n_k1
.
GetLength
(
I1
);
const
auto
b_griddesc_k0_nblockid_nrepeat_waves_nperxdlops_k1
=
transform_tensor_descriptor
(
b_grid_desc_k0_n_k1
,
make_tuple
(
make_
pass_through_transform
(
K0
),
make_tuple
(
make_
unmerge_transform
(
make_tuple
(
K0
/
K0PerBlock
,
K0PerBlock
)
),
make_unmerge_transform
(
make_tuple
(
N
/
(
NXdlPerWave
*
NWaves
*
NPerXDL
),
NXdlPerWave
,
NWaves
,
NPerXDL
)),
make_pass_through_transform
(
K1
)),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
,
2
,
3
,
4
>
{},
Sequence
<
5
>
{}));
make_tuple
(
Sequence
<
0
,
1
>
{},
Sequence
<
2
,
3
,
4
,
5
>
{},
Sequence
<
6
>
{}));
return
b_griddesc_k0_nblockid_nrepeat_waves_nperxdlops_k1
;
}
...
...
@@ -554,8 +554,9 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
ck
::
tensor_operation
::
element_wise
::
PassThrough
{});
// B matrix blockwise copy
constexpr
auto
b_thread_copy_desc_k0_n0_n1_n2_n3_k1
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
KPerThread
>
{},
constexpr
auto
b_thread_copy_desc_k0_k0b_n0_n1_n2_n3_k1
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
I1
,
Number
<
KPerThread
>
{},
// KPerThread
I1
,
// NBlockId
Number
<
MXdlPerWave
>
{},
// repeat
I1
,
// waves
...
...
@@ -563,11 +564,11 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
Number
<
K1
>
{}));
ignore
=
StaticBuffer
<
AddressSpaceEnum
::
Vgpr
,
FloatAB
,
b_thread_copy_desc_k0_n0_n1_n2_n3_k1
.
GetElementSpaceSize
(),
b_thread_copy_desc_k0_
k0b_
n0_n1_n2_n3_k1
.
GetElementSpaceSize
(),
true
>
{};
auto
b_grid_desc_k0_n0_n1_n2_n3_k1
=
MakeBGridDescriptor_K0_N0_N1_N2_N3_K1
(
b_grid_desc_k0_n_k1
);
auto
b_grid_desc_k0_
k0b_
n0_n1_n2_n3_k1
=
MakeBGridDescriptor_K0_
K0B_
N0_N1_N2_N3_K1
(
b_grid_desc_k0_n_k1
);
const
auto
wave_id
=
GetWaveIdx
();
const
auto
wave_k_n_id
=
GetWaveKNIdx
(
wave_id
[
I2
]);
...
...
@@ -591,17 +592,16 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
ignore
=
ThreadwiseTensorSliceTransfer_v2
<
FloatAB
,
FloatAB
,
decltype
(
b_grid_desc_k0_n0_n1_n2_n3_k1
),
decltype
(
b_thread_copy_desc_k0_n0_n1_n2_n3_k1
),
Sequence
<
Number
<
KPerThread
>
{},
I1
,
Number
<
MXdlPerWave
>
{},
I1
,
I1
,
Number
<
K1
>
{}
>
,
Sequence
<
0
,
1
,
2
,
3
,
4
,
5
>
,
5
,
decltype
(
b_grid_desc_k0_
k0b_
n0_n1_n2_n3_k1
),
decltype
(
b_thread_copy_desc_k0_
k0b_
n0_n1_n2_n3_k1
),
Sequence
<
I1
,
Number
<
KPerThread
>
{},
I1
,
Number
<
MXdlPerWave
>
{},
I1
,
I1
,
Number
<
K1
>
{}
>
,
Sequence
<
0
,
1
,
2
,
3
,
4
,
5
,
6
>
,
6
,
1
,
BThreadTransferSrcResetCoordinateAfterRun
,
true
>
(
b_grid_desc_k0_n0_n1_n2_n3_k1
,
true
>
(
b_grid_desc_k0_k0b_n0_n1_n2_n3_k1
,
make_multi_index
(
wave_k_n_id
[
I0
],
n_
block_
data_idx_on_grid
,
0
,
wave_id
[
I1
],
wave_k_n_id
[
I1
],
0
));
0
,
wave_k_n_id
[
I0
],
block_
work_idx
[
I1
]
,
0
,
wave_id
[
I1
],
wave_k_n_id
[
I1
],
0
));
auto
b_blockwise_copy
=
BlockwiseTensorSliceTransfer_v4r1
<
BlockSize
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment