Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5f728f5d
"include/vscode:/vscode.git/clone" did not exist on "5b3bd032ad5123b9993f2eb4660cda86b417fb51"
Commit
5f728f5d
authored
May 12, 2021
by
Chao Liu
Browse files
clean up
parent
9409e882
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
59 deletions
+45
-59
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
...or_operation/threadwise_dynamic_tensor_slice_transfer.hpp
+45
-59
No files found.
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
View file @
5f728f5d
...
...
@@ -70,7 +70,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v1r3
(
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin_idx
)
:
dst_
slice_origin_
coord_
(
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
))
:
dst_coord_
(
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
))
{
static_assert
(
SrcDesc
::
IsKnownAtCompileTime
(),
"wrong! SrcDesc need to known at compile-time"
);
...
...
@@ -78,7 +78,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
__device__
void
SetDstSliceOrigin
(
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin_idx
)
{
dst_
slice_origin_
coord_
=
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
);
dst_coord_
=
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
);
}
template
<
typename
SrcSliceOriginIdx
,
...
...
@@ -206,12 +206,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
type_convert
<
DstData
>
{}(
src_buf
[
Number
<
src_offset
>
{}]);
});
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst
_slice_origin
_coord_
);
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst_coord_
);
// copy data from dst_vector into dst_buf
dst_buf
.
template
Set
<
dst_vector_t
>(
dst_
slice_origin_
coord_
.
GetOffset
(),
dst_coord_
.
GetOffset
(),
is_dst_valid
,
dst_vector
.
template
AsType
<
dst_vector_t
>()[
Number
<
0
>
{}]);
...
...
@@ -237,15 +237,13 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
{
if
constexpr
(
forward_sweep
[
i
])
{
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_coord_
,
dst_forward_iterators
[
dim_access_order
[
i
]]);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_forward_iterators
[
dim_access_order
[
i
]]);
}
else
{
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_coord_
,
dst_backward_iterators
[
dim_access_order
[
i
]]);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_backward_iterators
[
dim_access_order
[
i
]]);
}
}
});
...
...
@@ -257,7 +255,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
const
auto
dst_reset_iterator
=
make_dynamic_tensor_coordinate_iterator
(
dst_desc
,
GetDstCoordinateResetStep
());
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_
slice_origin_
coord_
,
dst_reset_iterator
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_reset_iterator
);
}
}
...
...
@@ -354,11 +352,11 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
const
auto
adjusted_step
=
make_dynamic_tensor_coordinate_iterator
(
dst_desc
,
adjusted_step_idx
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_
slice_origin_
coord_
,
adjusted_step
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
adjusted_step
);
}
private:
DstCoord
dst_
slice_origin_
coord_
;
DstCoord
dst_coord_
;
};
// namespace ck
// Assume:
...
...
@@ -393,7 +391,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v2
(
const
SrcDesc
&
src_desc
,
const
Index
&
src_slice_origin_idx
)
:
src_
slice_origin_
coord_
(
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
))
:
src_coord_
(
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
))
{
static_assert
(
DstDesc
::
IsKnownAtCompileTime
(),
"wrong! SrcDesc need to known at compile-time"
);
...
...
@@ -401,7 +399,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
__device__
void
SetDstSliceOrigin
(
const
SrcDesc
&
src_desc
,
const
Index
&
src_slice_origin_idx
)
{
src_
slice_origin_
coord_
=
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
);
src_coord_
=
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
);
}
template
<
typename
SrcBuffer
,
...
...
@@ -518,13 +516,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
using
src_vector_t
=
typename
vector_type_maker
<
SrcData
,
SrcScalarPerVector
>::
type
::
type
;
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src
_slice_origin
_coord_
);
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src_coord_
);
// copy data from src_buf into src_vector
src_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
src_buf
.
template
Get
<
src_vector_t
>(
src_slice_origin_coord_
.
GetOffset
(),
is_src_valid
);
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
(),
is_src_valid
);
// copy data from src_vector into dst_buf
static_for
<
0
,
SrcScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
...
...
@@ -557,15 +554,13 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
{
if
constexpr
(
forward_sweep
[
i
])
{
move_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_coord_
,
src_forward_iterators
[
dim_access_order
[
i
]]);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
src_forward_iterators
[
dim_access_order
[
i
]]);
}
else
{
move_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_coord_
,
src_backward_iterators
[
dim_access_order
[
i
]]);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
src_backward_iterators
[
dim_access_order
[
i
]]);
}
}
});
...
...
@@ -577,7 +572,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
const
auto
src_reset_iterator
=
make_dynamic_tensor_coordinate_iterator
(
src_desc
,
GetSrcCoordinateResetStep
());
move_dynamic_tensor_coordinate
(
src_desc
,
src_
slice_origin_
coord_
,
src_reset_iterator
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
src_reset_iterator
);
}
}
...
...
@@ -674,11 +669,11 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
const
auto
adjusted_step
=
make_dynamic_tensor_coordinate_iterator
(
src_desc
,
adjusted_step_idx
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_
slice_origin_
coord_
,
adjusted_step
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
adjusted_step
);
}
private:
SrcCoord
src_
slice_origin_
coord_
;
SrcCoord
src_coord_
;
};
// namespace ck
// Assume:
...
...
@@ -721,8 +716,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
Index
&
src_slice_origin
,
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin
)
:
src_
slice_origin_
coord_
(
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin
)),
dst_
slice_origin_
coord_
(
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin
))
:
src_coord_
(
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin
)),
dst_coord_
(
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin
))
{
// TODO: fix this
static_assert
(
is_same
<
SrcData
,
DstData
>::
value
,
...
...
@@ -731,12 +726,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
__device__
void
SetSrcSliceOrigin
(
const
SrcDesc
&
src_desc
,
const
Index
&
src_slice_origin_idx
)
{
src_
slice_origin_
coord_
=
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
);
src_coord_
=
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_idx
);
}
__device__
void
SetDstSliceOrigin
(
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin_idx
)
{
dst_
slice_origin_
coord_
=
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
);
dst_coord_
=
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_idx
);
}
template
<
typename
SrcBuffer
,
typename
SrcIteratorHacks
>
...
...
@@ -840,13 +835,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
using
src_vector_t
=
typename
decltype
(
src_tmp_vector
)
::
type
;
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src
_slice_origin
_coord_
);
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src_coord_
);
// copy data from src_buf to src_tmp_vector
src_tmp_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
src_buf
.
template
Get
<
src_vector_t
>(
src_slice_origin_coord_
.
GetOffset
(),
is_src_valid
);
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
(),
is_src_valid
);
// copy data from src_tmp_vector to buffer_
static_for
<
0
,
SrcScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
...
...
@@ -880,16 +874,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
if
constexpr
(
forward_sweep
[
i
])
{
move_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_coord_
,
src_forward_iterators
[
src_dim_access_order
[
i
]]);
src_desc
,
src_coord_
,
src_forward_iterators
[
src_dim_access_order
[
i
]]);
}
else
{
move_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin_coord_
,
src_backward_iterators
[
src_dim_access_order
[
i
]]);
src_desc
,
src_coord_
,
src_backward_iterators
[
src_dim_access_order
[
i
]]);
}
}
});
...
...
@@ -901,7 +891,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
auto
src_reset_iterator
=
make_dynamic_tensor_coordinate_iterator
(
src_desc
,
GetSrcCoordinateResetStep
());
move_dynamic_tensor_coordinate
(
src_desc
,
src_
slice_origin_
coord_
,
src_reset_iterator
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
src_reset_iterator
);
}
}
...
...
@@ -1019,11 +1009,11 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
using
dst_vector_t
=
typename
decltype
(
dst_tmp_vector
)
::
type
;
// copy data from dst_tmp_vector to dst_buf
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst
_slice_origin
_coord_
);
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
dst_desc
,
dst_coord_
);
dst_buf
.
template
Set
<
dst_vector_t
>(
dst_
slice_origin_
coord_
.
GetOffset
(),
dst_coord_
.
GetOffset
(),
is_dst_valid
,
dst_tmp_vector
.
template
AsType
<
dst_vector_t
>()[
Number
<
0
>
{}]);
...
...
@@ -1051,16 +1041,12 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
if
constexpr
(
forward_sweep
[
i
])
{
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_coord_
,
dst_forward_iterators
[
dst_dim_access_order
[
i
]]);
dst_desc
,
dst_coord_
,
dst_forward_iterators
[
dst_dim_access_order
[
i
]]);
}
else
{
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin_coord_
,
dst_backward_iterators
[
dst_dim_access_order
[
i
]]);
dst_desc
,
dst_coord_
,
dst_backward_iterators
[
dst_dim_access_order
[
i
]]);
}
}
});
...
...
@@ -1072,7 +1058,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
auto
dst_reset_iterator
=
make_dynamic_tensor_coordinate_iterator
(
dst_desc
,
GetDstCoordinateResetStep
());
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_
slice_origin_
coord_
,
dst_reset_iterator
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_reset_iterator
);
}
}
...
...
@@ -1241,7 +1227,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
auto
adjusted_step
=
make_dynamic_tensor_coordinate_iterator
(
src_desc
,
adjusted_step_idx
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_
slice_origin_
coord_
,
adjusted_step
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
adjusted_step
);
}
// src_slice_origin_step_idx need to be known at compile-time, for performance reason
...
...
@@ -1260,7 +1246,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
auto
adjusted_step
=
make_dynamic_tensor_coordinate_iterator
(
src_desc
,
adjusted_step_idx
,
src_move_slice_window_iterator_hack
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_
slice_origin_
coord_
,
adjusted_step
);
move_dynamic_tensor_coordinate
(
src_desc
,
src_coord_
,
adjusted_step
);
}
// dst_slice_origin_step_idx need to be known at compile-time, for performance reason
__device__
void
MoveDstSliceWindow
(
const
DstDesc
&
dst_desc
,
...
...
@@ -1275,7 +1261,7 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
const
auto
adjusted_step
=
make_dynamic_tensor_coordinate_iterator
(
dst_desc
,
adjusted_step_idx
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_
slice_origin_
coord_
,
adjusted_step
);
move_dynamic_tensor_coordinate
(
dst_desc
,
dst_coord_
,
adjusted_step
);
}
private:
...
...
@@ -1286,8 +1272,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v3
StaticBuffer
<
AddressSpace
::
Vgpr
,
SrcData
,
buffer_size_
>
buffer_
;
SrcCoord
src_
slice_origin_
coord_
;
DstCoord
dst_
slice_origin_
coord_
;
SrcCoord
src_coord_
;
DstCoord
dst_coord_
;
};
// Assume:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment