Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5f50ed89
"vscode:/vscode.git/clone" did not exist on "b0b0c191b9654b5f89397696f10d48199d3e4dc7"
Commit
5f50ed89
authored
Sep 12, 2022
by
Po-Yen, Chen
Browse files
Add example for demonstrating bundle multiple elems in tensor
parent
82cc8731
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
69 additions
and
3 deletions
+69
-3
example/36_permute/CMakeLists.txt
example/36_permute/CMakeLists.txt
+2
-0
example/36_permute/common.hpp
example/36_permute/common.hpp
+31
-0
example/36_permute/permute_HxWx4_fp16.cpp
example/36_permute/permute_HxWx4_fp16.cpp
+24
-0
example/36_permute/run_permute_example.inc
example/36_permute/run_permute_example.inc
+12
-3
No files found.
example/36_permute/CMakeLists.txt
View file @
5f50ed89
...
...
@@ -2,6 +2,8 @@ add_custom_target(example_permute)
add_example_executable
(
example_permute_1xHxW_fp32 permute_1xHxW_fp32.cpp
)
add_example_executable
(
example_permute_NxHxW_fp32 permute_NxHxW_fp32.cpp
)
add_example_executable
(
example_permute_HxWx4_fp16 permute_HxWx4_fp16.cpp
)
add_dependencies
(
example_permute example_permute_1xHxW_fp32
)
add_dependencies
(
example_permute example_permute_NxHxW_fp32
)
add_dependencies
(
example_permute example_permute_HxWx4_fp16
)
example/36_permute/common.hpp
View file @
5f50ed89
...
...
@@ -23,6 +23,7 @@
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
F64
=
double
;
struct
ExecutionConfig
final
{
...
...
@@ -53,6 +54,36 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
namespace
detail
{
template
<
typename
Bundle
,
std
::
size_t
Divisor
>
struct
get_bundled
;
template
<
typename
Bundle
>
struct
get_bundled
<
Bundle
,
1
>
{
using
type
=
Bundle
;
};
template
<
>
struct
get_bundled
<
F64
,
2
>
{
using
type
=
F32
;
};
template
<
>
struct
get_bundled
<
F64
,
4
>
{
using
type
=
F16
;
};
template
<
>
struct
get_bundled
<
F32
,
2
>
{
using
type
=
F16
;
};
template
<
typename
Bundle
,
std
::
size_t
Divisor
>
using
get_bundled_t
=
typename
get_bundled
<
Bundle
,
Divisor
>::
type
;
template
<
typename
T
,
typename
=
void
>
struct
is_iterator
:
std
::
false_type
{
...
...
example/36_permute/permute_HxWx4_fp16.cpp
0 → 100644
View file @
5f50ed89
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
using
ADataType
=
F64
;
using
BDataType
=
F64
;
// clang-format off
using
DevicePermuteInstance
=
ck
::
tensor_operation
::
device
::
DevicePermute
// ######| InData| OutData| Elementwise| NumDim| Block| HPer| WPer| InBlock| InBlockTransfer| InBlockTransfer| Src| Dst| Src| Dst|
// ######| Type| Type| Operation| | Size| Block| Block| LdsExtraW| ThreadClusterLengths| ThreadClusterArrangeOrder| VectorDim| VectorDim| ScalarPerVector| ScalarPerVector|
// ######| | | | | | | | | | | | | | |
// ######| | | | | | | | | | | | | | |
<
ADataType
,
BDataType
,
PassThrough
,
3
,
256
,
128
,
128
,
0
,
S
<
1
,
16
,
16
>
,
S
<
0
,
1
,
2
>
,
2
,
1
,
1
,
1
>
;
// clang-format on
#define NUM_ELEMS_IN_BUNDLE 4
#include "run_permute_example.inc"
int
main
(
int
argc
,
char
*
argv
[])
{
return
!
run_permute_example
(
argc
,
argv
,
{
1
,
160
,
80
},
{
0
,
2
,
1
});
}
example/36_permute/run_permute_example.inc
View file @
5f50ed89
...
...
@@ -3,6 +3,10 @@
#pragma once
#ifndef NUM_ELEMS_IN_BUNDLE
#define NUM_ELEMS_IN_BUNDLE 1
#endif
bool
run_permute
(
const
ExecutionConfig
&
config
,
const
Problem
&
problem
)
{
using
std
::
begin
,
std
::
end
;
...
...
@@ -14,12 +18,17 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem)
Tensor
<
ADataType
>
a
(
shape
);
Tensor
<
BDataType
>
b
(
transposed_shape
);
std
::
iota
(
begin
(
a
.
mData
),
end
(
a
.
mData
),
1
);
using
std
::
data
,
std
::
size
;
{
auto
*
const
elems
=
reinterpret_cast
<
detail
::
get_bundled_t
<
ADataType
,
NUM_ELEMS_IN_BUNDLE
>*>
(
data
(
a
.
mData
));
std
::
iota
(
elems
,
elems
+
(
size
(
a
.
mData
)
*
NUM_ELEMS_IN_BUNDLE
),
1
);
}
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b
.
mDesc
.
GetElementSpaceSize
());
a_device_buf
.
ToDevice
(
a
.
mData
.
data
(
));
a_device_buf
.
ToDevice
(
data
(
a
.
mData
));
std
::
array
<
ck
::
index_t
,
3
>
a_lengths
,
b_lengths
;
std
::
array
<
ck
::
index_t
,
3
>
a_strides
,
b_strides
;
...
...
@@ -55,7 +64,7 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem)
Tensor
<
BDataType
>
host_b
(
transposed_shape
);
host_permute
(
a
,
problem
.
axes
,
PassThrough
{},
host_b
);
b_device_buf
.
FromDevice
(
b
.
mData
.
data
(
));
b_device_buf
.
FromDevice
(
data
(
b
.
mData
));
return
ck
::
utils
::
check_err
(
b
.
mData
,
host_b
.
mData
,
"Error: incorrect results in output tensor"
,
1
e
-
10
,
1
e
-
10
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment