Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e9ecf8d1
Commit
e9ecf8d1
authored
Oct 14, 2023
by
Astha Rai
Browse files
fixed errors in client example
parent
11001fa3
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
306 additions
and
65 deletions
+306
-65
library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp
...tion_instance/gpu/transpose/device_transpose_instance.hpp
+205
-0
library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp
...ck/library/tensor_operation_instance/gpu/transpose_3d.hpp
+63
-65
library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt
...rc/tensor_operation_instance/gpu/transpose/CMakeLists.txt
+3
-0
library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp
..._instance/gpu/transpose/device_transpose_instances_3d.cpp
+35
-0
No files found.
library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp
0 → 100644
View file @
e9ecf8d1
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
device_transpose_f16_instances
=
std
::
tuple
<
// clang-format off FOR 16, 32, 16, 32, 16
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
8
,
8
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
8
,
8
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
8
,
8
,
ck
::
Sequence
<
1
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
8
,
8
,
ck
::
Sequence
<
1
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
1
,
1
,
ck
::
Sequence
<
1
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
1
,
1
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
4
,
4
,
ck
::
Sequence
<
1
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F16
>
,
ck
::
Tuple
<
F16
>
,
2
,
2
,
1
,
8
,
4
,
4
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
// clang-format on
>
;
using
device_transpose_f32_instances
=
std
::
tuple
<
// clang-format off // for 16, 8, 16, 32, 8 -> test with instances for fp16
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
4
,
4
,
ck
::
Sequence
<
1
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
4
,
4
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
1
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
4
,
4
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
8
,
4
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
8
,
8
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
8
,
8
,
ck
::
Sequence
<
4
>
,
ck
::
Sequence
<
8
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
8
,
8
,
ck
::
Sequence
<
4
>
,
ck
::
Sequence
<
4
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
8
,
8
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
4
>>
,
DeviceElementwise3dImpl
<
ck
::
Tuple
<
F32
>
,
ck
::
Tuple
<
F32
>
,
2
,
2
,
1
,
4
,
4
,
8
,
ck
::
Sequence
<
8
>
,
ck
::
Sequence
<
8
>>
,
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp
View file @
e9ecf8d1
...
...
@@ -15,8 +15,6 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
using
namespace
ck
::
transpose_op
;
void
add_device_transpose_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceElementwise3dImpl
<
F16
,
F16
,
NCDHW
,
3
>>>&
instances
);
...
...
library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt
0 → 100644
View file @
e9ecf8d1
add_instance_library
(
device_transpose_instance
device_transpose_instances_3d.cpp
)
library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp
0 → 100644
View file @
e9ecf8d1
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_transpose_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceElementwise3dImpl
<
F16
,
F16
,
NCDHW
,
3
>>>&
instances
)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances
(
instances
,
device_transpose_f16_instances
<
F16
,
F16
,
NCDHW
,
3
>
{});
#else
ignore
=
instances
;
#endif
}
void
add_device_transpose_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceElementwise3dImpl
<
F32
,
F32
,
NCDHW
,
3
>>>&
instances
)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances
(
instances
,
device_transpose_f32_instances
<
F32
,
F32
,
NCDHW
,
3
>
{});
#else
ignore
=
instances
;
#endif
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment