Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f86e4436
"tests/vscode:/vscode.git/clone" did not exist on "4d39b7483d405474e913754c3a22903cca0d7fbf"
Commit
f86e4436
authored
Apr 21, 2023
by
Rostyslav Geyyer
Browse files
Merge elementwise op with type conversion
parent
845efff7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
18 deletions
+18
-18
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
+18
-18
No files found.
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
View file @
f86e4436
...
@@ -208,15 +208,6 @@ struct ThreadwiseTensorSliceTransfer_v3r1
...
@@ -208,15 +208,6 @@ struct ThreadwiseTensorSliceTransfer_v3r1
auto
src_vector_container
=
src_vector_type
{
auto
src_vector_container
=
src_vector_type
{
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
(),
is_src_valid
)};
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
(),
is_src_valid
)};
// apply SrcElementwiseOperation on src_vector_container
static_for
<
0
,
SrcScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
SrcData
src_v
;
src_element_op_
(
src_v
,
src_vector_container
.
template
AsType
<
SrcData
>()[
i
]);
src_vector_container
.
template
AsType
<
SrcData
>()(
i
)
=
src_v
;
});
// copy data from src_vector_container into src_thread_scratch_
// copy data from src_vector_container into src_thread_scratch_
src_thread_scratch_tuple_
(
thread_scratch_id
)
src_thread_scratch_tuple_
(
thread_scratch_id
)
.
template
SetAsType
<
src_vector_t
>(
.
template
SetAsType
<
src_vector_t
>(
...
@@ -346,16 +337,25 @@ struct ThreadwiseTensorSliceTransfer_v3r1
...
@@ -346,16 +337,25 @@ struct ThreadwiseTensorSliceTransfer_v3r1
src_vector_refs
,
dst_vector_refs
);
src_vector_refs
,
dst_vector_refs
);
});
});
}
}
static_ford
<
SliceLengths
>
{}([
&
](
auto
idx
)
{
static_ford
<
SliceLengths
>
{}([
&
](
auto
idx
)
{
// pick the right conversion method
// if elementwise op does conversion, use the op instead of type_convert
#if CK_EXPERIMENTAL_CONVERT_PRECISION
if
constexpr
(
is_same
<
SrcElementwiseOperation
,
using
UnaryConvert
=
ck
::
tensor_operation
::
element_wise
::
UnaryConvertPrecision
;
ck
::
tensor_operation
::
element_wise
::
UnaryConvert
>::
value
||
#else
is_same
<
SrcElementwiseOperation
,
using
UnaryConvert
=
ck
::
tensor_operation
::
element_wise
::
UnaryConvert
;
ck
::
tensor_operation
::
element_wise
::
UnaryConvertPrecision
>::
value
)
#endif
{
// convert from SrcData to DstData here
DstData
dst_v
;
UnaryConvert
{}(
dst_thread_scratch_
(
idx
),
src_element_op_
(
dst_v
,
src_thread_scratch_tuple_
[
thread_scratch_id
][
idx
]);
src_thread_scratch_tuple_
[
thread_scratch_id
][
idx
]);
dst_thread_scratch_
(
idx
)
=
dst_v
;
}
// else apply elementwise op and use type_convert for conversion
else
{
SrcData
src_v
;
src_element_op_
(
src_v
,
src_thread_scratch_tuple_
[
thread_scratch_id
][
idx
]);
dst_thread_scratch_
(
idx
)
=
type_convert
<
DstData
>
(
src_v
);
}
});
});
#endif
#endif
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment