Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
bc6513a2
Commit
bc6513a2
authored
Dec 15, 2021
by
Chao Liu
Browse files
ThreadwiseTensorSliceTransfer_v3r2 support pointwise op on both src and dst
parent
583aab02
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
11 deletions
+18
-11
composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r6.hpp
...el/include/tensor_operation/gridwise_gemm_xdlops_v2r6.hpp
+4
-8
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v3r2.hpp
...ensor_operation/threadwise_tensor_slice_transfer_v3r2.hpp
+14
-3
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_v2r6.hpp
View file @
bc6513a2
...
...
@@ -507,18 +507,14 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r6
do
{
a_blockwise_copy
.
MoveSrcSliceWindow
(
a_grid_desc_k0_m_k1
,
a_block_slice_copy_step
);
b_blockwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_n_k1
,
b_block_slice_copy_step
);
a_blockwise_copy
.
MoveSrcSliceWindow
(
a_grid_desc_k0_m_k1
,
a_block_slice_copy_step
);
b_blockwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc_k0_n_k1
,
b_block_slice_copy_step
);
a_blockwise_copy
.
RunRead
(
a_grid_desc_k0_m_k1
,
a_grid_buf
);
a_blockwise_copy
.
RunRead
(
a_grid_desc_k0_m_k1
,
a_grid_buf
);
block_sync_lds
();
b_blockwise_copy
.
RunRead
(
b_grid_desc_k0_n_k1
,
b_grid_buf
);
b_blockwise_copy
.
RunRead
(
b_grid_desc_k0_n_k1
,
b_grid_buf
);
blockwise_gemm
.
Run
(
a_block_buf
,
b_block_buf
,
c_thread_buf
);
...
...
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_v3r2.hpp
View file @
bc6513a2
...
...
@@ -447,13 +447,24 @@ struct ThreadwiseTensorSliceTransfer_v3r2
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst_coord_
);
using
dst_vector_t
=
typename
vector_type_maker_t
<
DstData
,
DstScalarPerVector
>::
type
;
using
dst_vector_type
=
vector_type_maker_t
<
DstData
,
DstScalarPerVector
>
;
using
dst_vector_t
=
typename
dst_vector_type
::
type
;
// copy data from dst_thread_scratch_ to dst_buf
// copy data from dst_thread_scratch_ into dst_vector_container
auto
dst_vector_container
=
dst_vector_type
{
dst_thread_scratch_
.
template
GetAsType
<
dst_vector_t
>(
dst_data_idx_seq
)};
// apply DstElementwiseOperation on dst_vector_container
static_for
<
0
,
DstScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
dst_vector_container
.
template
AsType
<
DstData
>()(
i
)
=
dst_element_op_
(
dst_vector_container
.
template
AsType
<
DstData
>()[
i
]);
});
// copy data from dst_vector_container to dst_buf
dst_buf
.
template
Set
<
dst_vector_t
>(
dst_coord_
.
GetOffset
(),
is_dst_valid
,
dst_
thread_scratch_
.
template
Get
AsType
<
dst_vector_t
>(
dst_data_idx_seq
)
);
dst_
vector_container
.
template
AsType
<
dst_vector_t
>(
)[
I0
]
);
constexpr
auto
move_on_dim
=
[
&
]()
constexpr
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment