Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ff6a04fd
Commit
ff6a04fd
authored
Sep 14, 2022
by
Po-Yen, Chen
Browse files
Use more meaningful names in permute element examples
parent
d53443d5
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
50 additions
and
38 deletions
+50
-38
example/37_permute/permute_1xHxW_fp16.cpp
example/37_permute/permute_1xHxW_fp16.cpp
+7
-7
example/37_permute/permute_NxHxW_fp16.cpp
example/37_permute/permute_NxHxW_fp16.cpp
+7
-7
example/37_permute/run_permute_element_example.inc
example/37_permute/run_permute_element_example.inc
+36
-24
No files found.
example/37_permute/permute_1xHxW_fp16.cpp
View file @
ff6a04fd
...
...
@@ -3,8 +3,8 @@
#include "common.hpp"
using
A
DataType
=
F16
;
using
B
DataType
=
F16
;
using
In
DataType
=
F16
;
using
Out
DataType
=
F16
;
// clang-format off
using
DevicePermuteInstance
=
ck
::
tensor_operation
::
device
::
DevicePermute
...
...
@@ -12,7 +12,7 @@ using DevicePermuteInstance = ck::tensor_operation::device::DevicePermute
// ######| Type| Type| Operation| | Size| Block| Block| Block| LdsExtraW| ThreadClusterLengths| ThreadClusterArrangeOrder| VectorDim| VectorDim| ScalarPerVector| ScalarPerVector|
// ######| | | | | | | | | | | | | | | |
// ######| | | | | | | | | | | | | | | |
<
A
DataType
,
B
DataType
,
PassThrough
,
3
,
256
,
1
,
32
,
32
,
3
,
S
<
1
,
32
,
8
>
,
S
<
0
,
1
,
2
>
,
2
,
1
,
2
,
1
>
;
<
In
DataType
,
Out
DataType
,
PassThrough
,
3
,
256
,
1
,
32
,
32
,
3
,
S
<
1
,
32
,
8
>
,
S
<
0
,
1
,
2
>
,
2
,
1
,
2
,
1
>
;
// clang-format on
#include "run_permute_element_example.inc"
...
...
example/37_permute/permute_NxHxW_fp16.cpp
View file @
ff6a04fd
...
...
@@ -3,8 +3,8 @@
#include "common.hpp"
using
A
DataType
=
F16
;
using
B
DataType
=
F16
;
using
In
DataType
=
F16
;
using
Out
DataType
=
F16
;
// clang-format off
using
DevicePermuteInstance
=
ck
::
tensor_operation
::
device
::
DevicePermute
...
...
@@ -12,7 +12,7 @@ using DevicePermuteInstance = ck::tensor_operation::device::DevicePermute
// ######| Type| Type| Operation| | Size| Block| Block| Block| LdsExtraW| ThreadClusterLengths| ThreadClusterArrangeOrder| VectorDim| VectorDim| ScalarPerVector| ScalarPerVector|
// ######| | | | | | | | | | | | | | | |
// ######| | | | | | | | | | | | | | | |
<
A
DataType
,
B
DataType
,
PassThrough
,
3
,
128
,
4
,
16
,
8
,
6
,
S
<
2
,
16
,
4
>
,
S
<
0
,
1
,
2
>
,
2
,
1
,
2
,
1
>
;
<
In
DataType
,
Out
DataType
,
PassThrough
,
3
,
128
,
4
,
16
,
8
,
6
,
S
<
2
,
16
,
4
>
,
S
<
0
,
1
,
2
>
,
2
,
1
,
2
,
1
>
;
// clang-format on
#include "run_permute_element_example.inc"
...
...
example/37_permute/run_permute_element_example.inc
View file @
ff6a04fd
...
...
@@ -7,37 +7,46 @@ bool run_permute_element(const Problem& problem)
{
using
std
::
begin
,
std
::
end
;
const
auto
&
shape
=
problem
.
shape
;
ck
::
remove_cvref_t
<
decltype
(
shape
)
>
transposed_shape
;
transpose_shape
(
problem
.
shape
,
problem
.
axes
,
begin
(
transposed_shape
));
const
auto
&
input_shape
=
problem
.
shape
;
const
auto
&
input_axes
=
problem
.
axes
;
Tensor
<
ADataTy
pe
>
a
(
shape
)
;
Tensor
<
BDataType
>
b
(
transposed
_shape
);
ck
::
remove_cvref_t
<
decltype
(
input_sha
pe
)
>
output_
shape
;
transpose_shape
(
input_shape
,
input_axes
,
begin
(
output
_shape
)
)
;
ck
::
utils
::
FillUniformDistribution
<
ADataType
>
{
-
1.
f
,
1.
f
}(
a
);
Tensor
<
InDataType
>
input_tensor
(
input_shape
);
Tensor
<
OutDataType
>
output_tensor
(
output_shape
);
DeviceMem
a_device_buf
(
a
.
GetElementSpaceSizeInBytes
());
DeviceMem
b_device_buf
(
b
.
GetElementSpaceSizeInBytes
());
ck
::
utils
::
FillUniformDistribution
<
InDataType
>
{
-
1.
f
,
1.
f
}(
input_tensor
);
DeviceMem
input_device_buf
(
input_tensor
.
GetElementSpaceSizeInBytes
());
DeviceMem
output_device_buf
(
output_tensor
.
GetElementSpaceSizeInBytes
());
using
std
::
data
;
a
_device_buf
.
ToDevice
(
data
(
a
));
input
_device_buf
.
ToDevice
(
data
(
input_tensor
));
std
::
array
<
ck
::
index_t
,
Problem
::
NumDim
>
a
_lengths
,
b
_lengths
;
std
::
array
<
ck
::
index_t
,
Problem
::
NumDim
>
a
_strides
,
b
_strides
;
std
::
array
<
ck
::
index_t
,
Problem
::
NumDim
>
input
_lengths
,
output
_lengths
;
std
::
array
<
ck
::
index_t
,
Problem
::
NumDim
>
input
_strides
,
output
_strides
;
const
void
*
input
=
a
_device_buf
.
GetDeviceBuffer
();
void
*
output
=
b
_device_buf
.
GetDeviceBuffer
();
const
void
*
input
_data
=
input
_device_buf
.
GetDeviceBuffer
();
void
*
output
_data
=
output
_device_buf
.
GetDeviceBuffer
();
std
::
copy
(
begin
(
shape
),
end
(
shape
),
begin
(
a_lengths
));
std
::
copy
(
begin
(
a
.
GetStrides
()),
end
(
a
.
GetStrides
()),
begin
(
a_strides
));
std
::
copy
(
begin
(
transposed_shape
),
end
(
transposed_shape
),
begin
(
b_lengths
));
std
::
copy
(
begin
(
b
.
GetStrides
()),
end
(
b
.
GetStrides
()),
begin
(
b_strides
));
std
::
copy
(
begin
(
input_shape
),
end
(
input_shape
),
begin
(
input_lengths
));
std
::
copy
(
begin
(
input_tensor
.
GetStrides
()),
end
(
input_tensor
.
GetStrides
()),
begin
(
input_strides
));
std
::
copy
(
begin
(
output_shape
),
end
(
output_shape
),
begin
(
output_lengths
));
std
::
copy
(
begin
(
output_tensor
.
GetStrides
()),
end
(
output_tensor
.
GetStrides
()),
begin
(
output_strides
));
static_assert
(
std
::
is_default_constructible_v
<
DevicePermuteInstance
>
);
auto
permute
=
DevicePermuteInstance
{};
auto
argument
=
permute
.
MakeArgument
(
a_lengths
,
a_strides
,
b_lengths
,
b_strides
,
input
,
output
,
PassThrough
{});
auto
argument
=
permute
.
MakeArgument
(
input_lengths
,
input_strides
,
output_lengths
,
output_strides
,
input_data
,
output_data
,
PassThrough
{});
if
(
!
permute
.
IsSupportedArgument
(
argument
))
{
...
...
@@ -51,16 +60,19 @@ bool run_permute_element(const Problem& problem)
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms"
<<
std
::
endl
;
b
_device_buf
.
FromDevice
(
data
(
b
));
output
_device_buf
.
FromDevice
(
data
(
output_tensor
));
Tensor
<
B
DataType
>
host_b
(
transposed
_shape
);
if
(
!
host_permute
(
a
,
problem
.
axes
,
PassThrough
{},
host
_b
))
Tensor
<
Out
DataType
>
output_tensor_host
(
output
_shape
);
if
(
!
host_permute
(
input_tensor
,
input_
axes
,
PassThrough
{},
output_tensor_
host
))
{
return
false
;
}
return
ck
::
utils
::
check_err
(
b
.
mData
,
host_b
.
mData
,
"Error: incorrect results in output tensor"
,
1
e
-
6
,
1
e
-
6
);
return
ck
::
utils
::
check_err
(
output_tensor
.
mData
,
output_tensor_host
.
mData
,
"Error: incorrect results in output tensor"
,
1
e
-
6
,
1
e
-
6
);
}
bool
run_permute_element_example
(
const
Problem
::
Shape
&
shape
,
const
Problem
::
Axes
&
axes
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment