Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ad7d9460
"git@developer.sourcefind.cn:cnjsdfcy/simbricks.git" did not exist on "b29c8a88ac360200b110eeefa45a88b3bf3e175a"
Commit
ad7d9460
authored
Apr 15, 2021
by
Jing Zhang
Browse files
clean code
parent
0e221501
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
7 deletions
+7
-7
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
...nel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
+7
-7
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
View file @
ad7d9460
...
@@ -156,12 +156,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
...
@@ -156,12 +156,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
ABlockTransferSrcScalarPerVector
,
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K
>
{};
ABlockTransferDstScalarPerVector_K
>
{};
// register allocation for output
FloatAcc
p_c_thread
[
c_k_n_ho_wo_thread_desc
.
GetElementSpaceSize
()];
// zero out threadwise output
threadwise_matrix_set_zero_v3
(
c_k_n_ho_wo_thread_desc
,
p_c_thread
);
auto
c_thread_mtx_index
=
blockwise_gemm
.
GetBeginOfThreadMatrixC
(
get_thread_local_1d_id
());
auto
c_thread_mtx_index
=
blockwise_gemm
.
GetBeginOfThreadMatrixC
(
get_thread_local_1d_id
());
const
auto
k_thread_id
=
c_thread_mtx_index
.
k
;
const
auto
k_thread_id
=
c_thread_mtx_index
.
k
;
...
@@ -229,6 +223,12 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
...
@@ -229,6 +223,12 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
FloatAB
*
p_a_block
=
p_shared_block
;
FloatAB
*
p_a_block
=
p_shared_block
;
// register allocation for output
FloatAcc
p_c_thread
[
c_k_n_ho_wo_thread_desc
.
GetElementSpaceSize
()];
// zero out threadwise output
threadwise_matrix_set_zero_v3
(
c_k_n_ho_wo_thread_desc
,
p_c_thread
);
constexpr
auto
b_thread_slice_copy_step
=
make_multi_index
(
EPerBlock
,
0
,
0
,
0
);
constexpr
auto
b_thread_slice_copy_step
=
make_multi_index
(
EPerBlock
,
0
,
0
,
0
);
// hack to control index calculation when iterating over A and B matrix for threadwise copy
// hack to control index calculation when iterating over A and B matrix for threadwise copy
...
@@ -351,9 +351,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
...
@@ -351,9 +351,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
// output: register to global memory
// output: register to global memory
{
{
// hack to control index calculation when iterating over c_k_n_ho_wo_global tensor
constexpr
auto
c_k_n_ho_wo_global_tensor_iterator_hacks
=
CGlobalIteratorHacks
{};
constexpr
auto
c_k_n_ho_wo_global_tensor_iterator_hacks
=
CGlobalIteratorHacks
{};
static_assert
(
CThreadTransferDstScalarPerVector
==
16
&&
KPerBlock
==
16
,
""
);
const
index_t
k_block_data_on_global_vec
=
const
index_t
k_block_data_on_global_vec
=
k_block_work_id
*
(
KPerBlock
/
CThreadTransferDstScalarPerVector
);
k_block_work_id
*
(
KPerBlock
/
CThreadTransferDstScalarPerVector
);
const
index_t
KPerThreadVec
=
KPerThread
/
CThreadTransferDstScalarPerVector
;
const
index_t
KPerThreadVec
=
KPerThread
/
CThreadTransferDstScalarPerVector
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment