Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6720ef75
Commit
6720ef75
authored
Feb 24, 2022
by
Jianfeng yan
Browse files
minor changes
parent
758f6977
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
36 deletions
+6
-36
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_using_space_filling_curve.hpp
...dwise_tensor_slice_transfer_using_space_filling_curve.hpp
+6
-36
No files found.
composable_kernel/include/tensor_operation/threadwise_tensor_slice_transfer_using_space_filling_curve.hpp
View file @
6720ef75
...
@@ -8,36 +8,6 @@
...
@@ -8,36 +8,6 @@
namespace
ck
{
namespace
ck
{
// Do following things to avoid "alloca" in LLVM-IR, which would cause scratch memory
// and sometimes useless instructions:
// 1. Don't save a reference to tensor descriptor in class, pass in tensor descriptor as argument
// instead
// 2. Don't construct a new tensor coordinate everytime when using it, update and reuse the same
// tensor coordinate instead
// 3. Don't use a pointer to VGPR buffer, use vector instead
// namespace detail {
// // TODO: How to fix this? It uses an struct instead of lambda because lambda
// // doesn't have constructor
// template <index_t VectorDim, index_t ScalarPerVector>
// struct lambda_scalar_per_access
// {
// __host__ __device__ constexpr auto operator()(index_t i) const
// {
// return (i == VectorDim) ? ScalarPerVector : 1;
// }
// };
//
// template <index_t VectorDim>
// struct lambda_scalar_step_in_vector
// {
// __host__ __device__ constexpr auto operator()(index_t i) const
// {
// return (i == VectorDim) ? 1 : 0;
// }
// };
// } // namespace detail
// Assume:
// Assume:
// 1. src:
// 1. src:
// 1. SrcDesc is known at compile-time
// 1. SrcDesc is known at compile-time
...
@@ -122,7 +92,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3_using_space_filling_curve
...
@@ -122,7 +92,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3_using_space_filling_curve
remove_cv_t
<
decltype
(
dst_scalar_per_access
)
>>
;
remove_cv_t
<
decltype
(
dst_scalar_per_access
)
>>
;
// TODO: Use SpaceFillingCurve::ScalarsPerAccess instread of DstScalarPerVector?
// TODO: Use SpaceFillingCurve::ScalarsPerAccess instread of DstScalarPerVector?
static_assert
(
DstScalarPerVector
==
SpaceFillingCurve
::
ScalarPerVector
);
static_assert
(
DstScalarPerVector
==
SpaceFillingCurve
::
ScalarPerVector
,
"Wrong! "
);
typename
vector_type_maker
<
DstData
,
DstScalarPerVector
>::
type
dst_vector
;
typename
vector_type_maker
<
DstData
,
DstScalarPerVector
>::
type
dst_vector
;
using
dst_vector_t
=
typename
vector_type_maker
<
DstData
,
DstScalarPerVector
>::
type
::
type
;
using
dst_vector_t
=
typename
vector_type_maker
<
DstData
,
DstScalarPerVector
>::
type
::
type
;
...
@@ -130,15 +100,15 @@ struct ThreadwiseTensorSliceTransfer_v1r3_using_space_filling_curve
...
@@ -130,15 +100,15 @@ struct ThreadwiseTensorSliceTransfer_v1r3_using_space_filling_curve
static_for
<
0
,
num_accesses
,
1
>
{}([
&
](
auto
idx_1d
)
{
static_for
<
0
,
num_accesses
,
1
>
{}([
&
](
auto
idx_1d
)
{
//
constexpr auto idx_md = SpaceFillingCurve::GetIndex(idx_1d);
constexpr
auto
idx_md
=
SpaceFillingCurve
::
GetIndex
(
idx_1d
);
constexpr
auto
all_indices
=
SpaceFillingCurve
::
GetIndices
(
idx_1d
);
//
constexpr auto all_indices = SpaceFillingCurve::GetIndices(idx_1d);
// copy data from src_buf into dst_vector
// copy data from src_buf into dst_vector
static_for
<
0
,
DstScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
DstScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
// constexpr index_t src_offset = src_desc.CalculateOffset(
// src_slice_origin_idx + idx_md + i * dst_scalar_step_in_vector);
constexpr
index_t
src_offset
=
src_desc
.
CalculateOffset
(
constexpr
index_t
src_offset
=
src_desc
.
CalculateOffset
(
src_slice_origin_idx
+
all_indices
[
i
]);
src_slice_origin_idx
+
idx_md
+
i
*
dst_scalar_step_in_vector
);
// constexpr index_t src_offset = src_desc.CalculateOffset(
// src_slice_origin_idx + all_indices[i]);
SrcData
dst_v
;
SrcData
dst_v
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment