Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b70bcd86
"...composable_kernel_rocm.git" did not exist on "e6bb1dd72df7f948289bf7420266aabc0d593c1c"
Commit
b70bcd86
authored
Oct 22, 2024
by
aska-0096
Browse files
bug fix, sanity checked
parent
69977fab
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
4 deletions
+16
-4
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+16
-4
No files found.
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
b70bcd86
...
@@ -453,13 +453,25 @@ struct ThreadwiseTensorSliceTransfer_v1r4
...
@@ -453,13 +453,25 @@ struct ThreadwiseTensorSliceTransfer_v1r4
using
dst_vector_type
=
vector_type_maker_t
<
DstData
,
DstScalarPerVector
>
;
using
dst_vector_type
=
vector_type_maker_t
<
DstData
,
DstScalarPerVector
>
;
using
dst_vector_t
=
typename
dst_vector_type
::
type
;
using
dst_vector_t
=
typename
dst_vector_type
::
type
;
constexpr
auto
data_to_origin_disp_idx
=
constexpr
auto
dst_data_idx
=
[
&
]()
{
ordered_dst_access_idx
.
ReorderGivenOld2New
(
dst_dim_access_order
)
*
Index
ordered_idx
;
dst_scalar_per_access
;
static_for
<
0
,
nDim
,
1
>
{}([
&
](
auto
i
)
{
ordered_idx
(
i
)
=
forward_sweep
[
i
]
?
ordered_dst_access_idx
[
i
]
:
ordered_dst_access_lengths
[
i
]
-
1
-
ordered_dst_access_idx
[
i
];
});
return
container_reorder_given_old2new
(
ordered_idx
,
dst_dim_access_order
)
*
dst_scalar_per_access
;
}();
constexpr
auto
dst_data_idx_seq
=
generate_sequence_v2
(
[
&
](
auto
i
)
{
return
Number
<
dst_data_idx
[
i
]
>
{};
},
Number
<
dst_data_idx
.
Size
()
>
{});
// copy data from dst_thread_scratch_ into dst_vector_container
// copy data from dst_thread_scratch_ into dst_vector_container
auto
dst_vector
=
dst_vector_type
{
auto
dst_vector
=
dst_vector_type
{
dst_thread_scratch_
.
template
GetAsType
<
dst_vector_t
>(
d
ata_to_origin_disp_idx
)};
dst_thread_scratch_
.
template
GetAsType
<
dst_vector_t
>(
d
st_data_idx_seq
)};
const
bool
is_dst_valid
=
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst_coord_
);
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst_coord_
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment