Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0e5848a4
Commit
0e5848a4
authored
May 21, 2021
by
Jing Zhang
Browse files
clean
parent
4fdee96b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
12 deletions
+15
-12
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
...include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
+15
-12
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops.hpp
View file @
0e5848a4
...
@@ -316,9 +316,16 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
...
@@ -316,9 +316,16 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
constexpr
index_t
NumBlks
=
CLayout
.
GetNumBlks
();
constexpr
index_t
NumBlks
=
CLayout
.
GetNumBlks
();
constexpr
index_t
NumXdlops
=
CLayout
.
GetNumXdlops
();
constexpr
index_t
NumXdlops
=
CLayout
.
GetNumXdlops
();
// constexpr auto c_mr_nr_nb_bk_thread_desc =
constexpr
auto
c_mr_nr_nx_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
// make_dynamic_naive_tensor_descriptor_packed_v2( make_tuple(Number<MRepeat>{},
make_tuple
(
Number
<
MRepeat
>
{},
Number
<
NRepeat
>
{},
Number
<
NumXdlops
>
{}));
// Number<NRepeat>{}, Number<NumBlks>{}, Number<BlkSize>{}));
constexpr
auto
c_blk_nb_bs_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
Number
<
NumBlks
>
{},
Number
<
BlkSize
>
{}));
StaticBuffer
<
AddressSpace
::
Vgpr
,
vector_type
<
float
,
c_blk_nb_bs_desc
.
GetElementSpaceSize
()
>
,
c_mr_nr_nx_desc
.
GetElementSpaceSize
()
>
c_thread_buf
;
// LDS allocation for A and B: be careful of alignment
// LDS allocation for A and B: be careful of alignment
constexpr
auto
a_block_space_size
=
constexpr
auto
a_block_space_size
=
...
@@ -339,11 +346,6 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
...
@@ -339,11 +346,6 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
// Sequence<MRepeat, MPerThread, NRepeat, NPerThread>>{}
// Sequence<MRepeat, MPerThread, NRepeat, NPerThread>>{}
//.Run(c_m0_m1_n0_n1_thread_desc, make_tuple(I0, I0, I0, I0), c_thread_buf, FloatAcc{0});
//.Run(c_m0_m1_n0_n1_thread_desc, make_tuple(I0, I0, I0, I0), c_thread_buf, FloatAcc{0});
StaticBuffer
<
AddressSpace
::
Vgpr
,
vector_type
<
float
,
NumBlks
*
BlkSize
>
,
MRepeat
*
NRepeat
*
NumXdlops
>
c_thread_buf
;
constexpr
auto
a_block_slice_copy_step
=
make_multi_index
(
KPerBlock
,
0
);
constexpr
auto
a_block_slice_copy_step
=
make_multi_index
(
KPerBlock
,
0
);
constexpr
auto
b_block_slice_copy_step
=
make_multi_index
(
KPerBlock
,
0
);
constexpr
auto
b_block_slice_copy_step
=
make_multi_index
(
KPerBlock
,
0
);
...
@@ -488,11 +490,12 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
...
@@ -488,11 +490,12 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
static_for
<
0
,
NRepeat
,
1
>
{}([
&
](
auto
nr_i
)
{
static_for
<
0
,
NRepeat
,
1
>
{}([
&
](
auto
nr_i
)
{
static_for
<
0
,
NumXdlops
,
1
>
{}([
&
](
auto
xdlops_i
)
{
static_for
<
0
,
NumXdlops
,
1
>
{}([
&
](
auto
xdlops_i
)
{
static_for
<
0
,
NumBlks
,
1
>
{}([
&
](
auto
blk_i
)
{
static_for
<
0
,
NumBlks
,
1
>
{}([
&
](
auto
blk_i
)
{
auto
c_blk
=
c_thread_buf
[
Number
<
c_mr_nr_nx_desc
.
CalculateOffset
(
make_tuple
(
mr_i
,
nr_i
,
xdlops_i
))
>
{}];
static_for
<
0
,
BlkSize
,
1
>
{}([
&
](
auto
j
)
{
static_for
<
0
,
BlkSize
,
1
>
{}([
&
](
auto
j
)
{
c_blk_buf_
(
j
)
=
c_blk_buf_
(
j
)
=
c_blk
.
template
AsType
<
float
>()[
Number
<
c_thread_buf
[
Number
<
(
mr_i
*
NRepeat
+
nr_i
)
*
NumXdlops
+
c_blk_nb_bs_desc
.
CalculateOffset
(
make_tuple
(
blk_i
,
j
))
>
{}];
xdlops_i
>
{}]
.
template
AsType
<
float
>()[
Number
<
blk_i
*
BlkSize
+
j
>
{}];
});
});
// calculate origin of thread output tensor on global memory
// calculate origin of thread output tensor on global memory
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment