Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e32db0e9
Commit
e32db0e9
authored
May 29, 2023
by
carlushuang
Browse files
remove scratch by manually reset coordinate
parent
d51da77a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
5 deletions
+12
-5
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp
...ensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp
+12
-5
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp
View file @
e32db0e9
...
@@ -668,7 +668,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
...
@@ -668,7 +668,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
using
Accumulation
=
ck
::
detail
::
using
Accumulation
=
ck
::
detail
::
AccumulateWithNanCheck
<
false
/*PropagateNan*/
,
reduce
::
Add
,
FloatAcc
>
;
AccumulateWithNanCheck
<
false
/*PropagateNan*/
,
reduce
::
Add
,
FloatAcc
>
;
static_for
<
0
,
MReduceIters
,
1
>
{}([
&
](
auto
i_m_reduce
)
{
// static_for<0, MReduceIters, 1>{}([&](auto i_m_reduce) {
for
(
int
i_m
=
0
;
i_m
<
MReduceIters
;
i_m
++
)
{
static_for
<
0
,
NReduceIters
,
1
>
{}([
&
](
auto
i_n_reduce
)
{
static_for
<
0
,
NReduceIters
,
1
>
{}([
&
](
auto
i_n_reduce
)
{
acc_buf
.
Clear
();
acc_buf
.
Clear
();
for
(
auto
i
=
tile_acc_offset_start
;
i
<
tile_acc_offset_end
;
i
++
)
for
(
auto
i
=
tile_acc_offset_start
;
i
<
tile_acc_offset_end
;
i
++
)
...
@@ -721,14 +723,15 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
...
@@ -721,14 +723,15 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
}
}
}
}
});
});
if
constexpr
(
i_m_reduce
!=
MReduceIters
-
1
)
//
if constexpr(i_m_reduce != MReduceIters - 1)
{
{
acc_load
.
MoveSrcSliceWindow
(
c_partial_acc_block_m_n
,
acc_load
.
MoveSrcSliceWindow
(
c_partial_acc_block_m_n
,
partial_acc_load_step_m
);
partial_acc_load_step_m
);
acc_store
.
MoveDstSliceWindow
(
c_grid_desc_mblock_mperblock_nblock_nperblock
,
acc_store
.
MoveDstSliceWindow
(
c_grid_desc_mblock_mperblock_nblock_nperblock
,
partial_acc_store_step_m
);
partial_acc_store_step_m
);
}
}
});
}
//});
return
;
return
;
}
}
}
}
...
@@ -1004,8 +1007,8 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
...
@@ -1004,8 +1007,8 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
Sequence
<
0
,
1
,
2
,
3
>
,
// typename DimAccessOrder,
Sequence
<
0
,
1
,
2
,
3
>
,
// typename DimAccessOrder,
3
,
// index_t VectorDim,
3
,
// index_t VectorDim,
CBlockTransferScalarPerVector_NWaveNPerXDL
,
// index_t ScalarPerVector,
CBlockTransferScalarPerVector_NWaveNPerXDL
,
// index_t ScalarPerVector,
tru
e
,
// bool ThreadTransferSrcResetCoordinateAfterRun,
fals
e
,
// bool ThreadTransferSrcResetCoordinateAfterRun,
tru
e
>
// bool ThreadTransferDstResetCoordinateAfterRun
fals
e
>
// bool ThreadTransferDstResetCoordinateAfterRun
{
c_block_desc_mblock_mpershuffle_nblock_npershuffle
,
{
c_block_desc_mblock_mpershuffle_nblock_npershuffle
,
make_multi_index
(
0
,
0
,
0
,
0
),
make_multi_index
(
0
,
0
,
0
,
0
),
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle
,
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle
,
...
@@ -1062,6 +1065,10 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
...
@@ -1062,6 +1065,10 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_streamk
StreamKReductionStrategy
::
Reduction
)
StreamKReductionStrategy
::
Reduction
)
{
{
// constexpr offset
// constexpr offset
c_block_copy_lds_to_partial_acc
.
SetSrcSliceOrigin
(
c_block_desc_mblock_mpershuffle_nblock_npershuffle
,
make_tuple
(
0
,
0
,
0
,
0
));
c_block_copy_lds_to_partial_acc
.
SetDstSliceOrigin
(
c_block_copy_lds_to_partial_acc
.
SetDstSliceOrigin
(
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle
,
c_block_desc_mshuffle_mpershuffle_nshuffle_npershuffle
,
make_tuple
(
mxdlperwave
.
value
,
0
,
nxdlperwave
.
value
,
0
));
make_tuple
(
mxdlperwave
.
value
,
0
,
nxdlperwave
.
value
,
0
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment