Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a70d9f63
Commit
a70d9f63
authored
Sep 08, 2022
by
Po-Yen, Chen
Browse files
Add more template parameters (vector width related)
parent
4eaa502b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
8 deletions
+24
-8
include/ck/tensor_operation/gpu/device/device_permute.hpp
include/ck/tensor_operation/gpu/device/device_permute.hpp
+15
-3
include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp
include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp
+9
-5
No files found.
include/ck/tensor_operation/gpu/device/device_permute.hpp
View file @
a70d9f63
...
...
@@ -84,7 +84,11 @@ template <typename InDataType,
index_t
WPerBlock
,
index_t
InBlockLdsExtraW
,
typename
InBlockTransferThreadClusterLengths
,
typename
InBlockTransferThreadClusterArrangeOrder
>
typename
InBlockTransferThreadClusterArrangeOrder
,
index_t
SrcVectorDim
,
index_t
DstVectorDim
,
index_t
SrcScalarPerVector
,
index_t
DstScalarPerVector
>
struct
DevicePermute
:
detail
::
DevicePermuteBase
<
DevicePermute
<
InDataType
,
OutDataType
,
...
...
@@ -96,7 +100,11 @@ struct DevicePermute
WPerBlock
,
InBlockLdsExtraW
,
InBlockTransferThreadClusterLengths
,
InBlockTransferThreadClusterArrangeOrder
>>
InBlockTransferThreadClusterArrangeOrder
,
SrcVectorDim
,
DstVectorDim
,
SrcScalarPerVector
,
DstScalarPerVector
>>
{
static_assert
(
3
<=
NumDim
,
"Only accept at least 3D dimension tensor"
);
...
...
@@ -149,7 +157,11 @@ struct DevicePermute
WPerBlock
,
InBlockLdsExtraW
,
InBlockTransferThreadClusterLengths
,
InBlockTransferThreadClusterArrangeOrder
>
;
InBlockTransferThreadClusterArrangeOrder
,
SrcVectorDim
,
DstVectorDim
,
SrcScalarPerVector
,
DstScalarPerVector
>
;
struct
Argument
:
public
BaseArgument
{
...
...
include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp
View file @
a70d9f63
...
...
@@ -101,7 +101,11 @@ template <typename InGridDesc,
index_t
WPerBlock
,
index_t
InBlockLdsExtraW
,
typename
InBlockTransferThreadClusterLengths
,
typename
InBlockTransferThreadClusterArrangeOrder
>
typename
InBlockTransferThreadClusterArrangeOrder
,
index_t
SrcVectorDim
,
index_t
DstVectorDim
,
index_t
SrcScalarPerVector
,
index_t
DstScalarPerVector
>
struct
GridwisePermute
{
static_assert
(
InGridDesc
::
GetNumOfDimension
()
==
OutGridDesc
::
GetNumOfDimension
());
...
...
@@ -207,10 +211,10 @@ struct GridwisePermute
using
BlockSliceLengths
=
Sequence
<
1
,
HPerBlock
,
WPerBlock
>
;
using
InBlockTransferAccessOrder
=
Sequence
<
0
,
1
,
2
>
;
constexpr
index_t
SrcVectorDim
=
2
;
constexpr
index_t
DstVectorDim
=
1
;
constexpr
index_t
SrcScalarPerVector
=
1
;
constexpr
index_t
DstScalarPerVector
=
1
;
//
constexpr index_t SrcVectorDim = 2;
//
constexpr index_t DstVectorDim = 1;
//
constexpr index_t SrcScalarPerVector = 1;
//
constexpr index_t DstScalarPerVector = 1;
using
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment