Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
d9240c6d
Commit
d9240c6d
authored
May 15, 2022
by
ltqin
Browse files
add read global data
parent
673b30cf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
8 deletions
+23
-8
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
...operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
+23
-8
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_lds_v2r3.hpp
View file @
d9240c6d
...
...
@@ -562,10 +562,11 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
I1
,
// waves
I1
,
// NPerXdlops
Number
<
K1
>
{}));
ignore
=
StaticBuffer
<
AddressSpaceEnum
::
Vgpr
,
FloatAB
,
b_thread_copy_desc_k0_k0b_n0_n1_n2_n3_k1
.
GetElementSpaceSize
(),
true
>
{};
auto
b_thread_buf
=
StaticBuffer
<
AddressSpaceEnum
::
Vgpr
,
FloatAB
,
b_thread_copy_desc_k0_k0b_n0_n1_n2_n3_k1
.
GetElementSpaceSize
(),
true
>
{};
auto
b_grid_desc_k0_k0b_n0_n1_n2_n3_k1
=
MakeBGridDescriptor_K0_K0B_N0_N1_N2_N3_K1
(
b_grid_desc_k0_n_k1
);
...
...
@@ -589,7 +590,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
wave_k_n_id[I1]);
#endif
ignore
=
ThreadwiseTensorSliceTransfer_v2
<
auto
b_threadwise_transfer
=
ThreadwiseTensorSliceTransfer_v2
<
FloatAB
,
FloatAB
,
decltype
(
b_grid_desc_k0_k0b_n0_n1_n2_n3_k1
),
...
...
@@ -603,6 +604,23 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
make_multi_index
(
0
,
wave_k_n_id
[
I0
],
block_work_idx
[
I1
],
0
,
wave_id
[
I1
],
wave_k_n_id
[
I1
],
0
));
constexpr
auto
a_block_slice_copy_step
=
make_multi_index
(
K0PerBlock
,
0
,
0
);
constexpr
auto
b_block_slice_copy_step
=
make_multi_index
(
K0PerBlock
,
0
,
0
);
constexpr
auto
b_thread_slice_copy_step
=
make_multi_index
(
1
,
0
,
0
,
0
,
0
,
0
,
0
);
// preload data to regiester
{
// Read
b_threadwise_transfer
.
Run
(
b_grid_desc_k0_k0b_n0_n1_n2_n3_k1
,
b_grid_buf
,
b_thread_copy_desc_k0_k0b_n0_n1_n2_n3_k1
,
make_tuple
(
I0
,
I0
,
I0
,
I0
,
I0
,
I0
,
I0
),
b_thread_buf
);
// Move
b_threadwise_transfer
.
MoveSrcSliceWindow
(
b_grid_desc_k0_k0b_n0_n1_n2_n3_k1
,
b_thread_slice_copy_step
);
}
auto
b_blockwise_copy
=
BlockwiseTensorSliceTransfer_v4r1
<
BlockSize
,
BElementwiseOperation
,
...
...
@@ -666,9 +684,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_skip_lds_v2r3
static_cast
<
FloatAB
*>
(
p_shared
)
+
a_block_space_size_aligned
,
b_block_desc_k0_n_k1
.
GetElementSpaceSize
());
constexpr
auto
a_block_slice_copy_step
=
make_multi_index
(
K0PerBlock
,
0
,
0
);
constexpr
auto
b_block_slice_copy_step
=
make_multi_index
(
K0PerBlock
,
0
,
0
);
// gridwise GEMM pipeline
const
auto
gridwise_gemm_pipeline
=
GridwiseGemmPipeline_v1
<
remove_cvref_t
<
decltype
(
a_grid_desc_k0_m_k1
)
>
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment