Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b924e330
Commit
b924e330
authored
Oct 03, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
72c9f129
9c0811f3
Changes
153
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
459 additions
and
10 deletions
+459
-10
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_f32_instance.cpp
..._fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_f32_instance.cpp
+48
-0
library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt
...tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt
+3
-1
library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f8_instance.cpp
...ance/gpu/max_pool_bwd/device_max_pool_bwd_f8_instance.cpp
+20
-0
library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_int8_instance.cpp
...ce/gpu/max_pool_bwd/device_max_pool_bwd_int8_instance.cpp
+20
-0
library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp
...nstance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp
+3
-1
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+22
-7
library/src/tensor_operation_instance/gpu/pool2d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/pool2d_fwd/CMakeLists.txt
+12
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_bf16_instance.cpp
...u/pool2d_fwd/device_avg_pool2d_fwd_nhwc_bf16_instance.cpp
+25
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f16_instance.cpp
...pu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f16_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f32_instance.cpp
...pu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f32_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f8_instance.cpp
...gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f8_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_i8_instance.cpp
...gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_i8_instance.cpp
+24
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_bf16_instance.cpp
...u/pool2d_fwd/device_max_pool2d_fwd_nhwc_bf16_instance.cpp
+34
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f16_instance.cpp
...pu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f16_instance.cpp
+32
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f32_instance.cpp
...pu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f32_instance.cpp
+32
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f8_instance.cpp
...gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f8_instance.cpp
+32
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_i8_instance.cpp
...gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_i8_instance.cpp
+32
-0
library/src/tensor_operation_instance/gpu/pool2d_fwd/pool2d_fwd_instance_common.hpp
...on_instance/gpu/pool2d_fwd/pool2d_fwd_instance_common.hpp
+43
-0
library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt
+4
-0
library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp
...u/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp
+1
-1
No files found.
library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_f32_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void
add_device_grouped_conv2d_fwd_xdl_merged_groups_ngchw_gkyxc_ngkhw_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_f32_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_merged_groups_f32_instances
<
2
,
NGCHW
,
GKYXC
,
Empty_Tuple
,
NGKHW
,
ConvFwd3x3
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt
View file @
b924e330
set
(
DEVICE_MAXPOOL_BWD_INSTANCES
)
list
(
APPEND DEVICE_MAXPOOL_BWD_INSTANCES device_max_pool_bwd_f16_instance.cpp
device_max_pool_bwd_bf16_instance.cpp
device_max_pool_bwd_f32_instance.cpp
)
device_max_pool_bwd_f32_instance.cpp
device_max_pool_bwd_f8_instance.cpp
device_max_pool_bwd_int8_instance.cpp
)
add_instance_library
(
device_max_pool_bwd_instance
${
DEVICE_MAXPOOL_BWD_INSTANCES
}
)
library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "max_pool_bwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_maxpool_bwd_f8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceMaxPoolBwd
<
F8
,
I32
,
F8
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_maxpool_bwd_instances
<
F8
,
I32
,
F8
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_int8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "max_pool_bwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_maxpool_bwd_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceMaxPoolBwd
<
I8
,
I32
,
I8
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_maxpool_bwd_instances
<
I8
,
I32
,
I8
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -17,6 +17,8 @@ namespace instance {
using
I32
=
int32_t
;
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
I8
=
int8_t
;
using
F8
=
ck
::
f8_t
;
using
F32
=
float
;
template
<
typename
DOutDataType
,
typename
IndexDataType
,
typename
DInDataType
>
...
...
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
b924e330
...
...
@@ -27,23 +27,38 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops)
# headers for building lib
file
(
COPY
${
MHA_HEADERS
}
DESTINATION
${
FMHA_CPP_FOLDER
}
)
# Delete the blob file if it exists to avoid append of old content.
if
(
EXISTS
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
endif
()
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
# generate a list of kernels, but not actually emit files at config stage
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process
(
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
RESULT_VARIABLE ret
)
if
(
ret AND NOT ret EQUAL 0
)
message
(
FATAL_ERROR
"CK Tile MHA FAILED to genrate a list of kernels via Python."
)
else
()
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_
FWD_
GEN_BLOBS
)
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_GEN_BLOBS
)
endif
()
# actually generate the cpp files
# actually generate the kernel content now
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command
(
OUTPUT
${
FMHA_
FWD_
GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
OUTPUT
${
FMHA_GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--output_dir
${
FMHA_CPP_FOLDER
}
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
COMMENT
"Generating mha kernel (cpp) files now ..."
VERBATIM
)
...
...
@@ -52,12 +67,12 @@ add_custom_command(
# have filename. Since, it was cauing the cmake
# to throw "File name too long"
set
(
device_files
)
foreach
(
filepath IN LISTS FMHA_
FWD_
GEN_BLOBS
)
foreach
(
filepath IN LISTS FMHA_GEN_BLOBS
)
get_filename_component
(
filename
${
filepath
}
NAME
)
# Append the filename to the device_files list
list
(
APPEND device_files
${
filename
}
)
endforeach
()
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_
FWD_
GEN_BLOBS
}
)
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_GEN_BLOBS
}
)
add_instance_library
(
device_mha_instance
${
device_files
}
)
...
...
library/src/tensor_operation_instance/gpu/pool2d_fwd/CMakeLists.txt
0 → 100644
View file @
b924e330
set
(
DEVICE_POOL2D_FWD_INSTANCES
)
list
(
APPEND DEVICE_POOL2D_FWD_INSTANCES device_avg_pool2d_fwd_nhwc_f16_instance.cpp
device_max_pool2d_fwd_nhwc_f16_instance.cpp
device_avg_pool2d_fwd_nhwc_f32_instance.cpp
device_max_pool2d_fwd_nhwc_f32_instance.cpp
device_avg_pool2d_fwd_nhwc_bf16_instance.cpp
device_max_pool2d_fwd_nhwc_bf16_instance.cpp
device_avg_pool2d_fwd_nhwc_i8_instance.cpp
device_max_pool2d_fwd_nhwc_i8_instance.cpp
device_avg_pool2d_fwd_nhwc_f8_instance.cpp
device_max_pool2d_fwd_nhwc_f8_instance.cpp
)
add_instance_library
(
device_pool2d_fwd_instance
${
DEVICE_POOL2D_FWD_INSTANCES
}
)
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_bf16_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
void
add_device_pool2d_fwd_nhwc_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
BF16
,
BF16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
BF16
,
BF16
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f16_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
void
add_device_pool2d_fwd_nhwc_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F16
,
F16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F16
,
F16
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f32_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
void
add_device_pool2d_fwd_nhwc_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F32
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F32
,
F32
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_f8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
void
add_device_pool2d_fwd_nhwc_f8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F8
,
F8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F8
,
F8
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_avg_pool2d_fwd_nhwc_i8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
AVG
;
void
add_device_pool2d_fwd_nhwc_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
I8
,
I8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
I8
,
I8
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_bf16_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
void
add_device_pool2d_fwd_nhwc_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
BF16
,
BF16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
BF16
,
BF16
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
void
add_device_pool2d_fwd_nhwc_index_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
BF16
,
BF16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
true
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
BF16
,
BF16
,
I32
,
F32
,
ReduceOpId
,
true
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f16_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
void
add_device_pool2d_fwd_nhwc_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F16
,
F16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F16
,
F16
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
void
add_device_pool2d_fwd_nhwc_index_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F16
,
F16
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
true
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F16
,
F16
,
I32
,
F32
,
ReduceOpId
,
true
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f32_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
void
add_device_pool2d_fwd_nhwc_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F32
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F32
,
F32
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
void
add_device_pool2d_fwd_nhwc_index_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F32
,
F32
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
true
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F32
,
F32
,
I32
,
F32
,
ReduceOpId
,
true
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_f8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
void
add_device_pool2d_fwd_nhwc_f8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F8
,
F8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F8
,
F8
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
void
add_device_pool2d_fwd_nhwc_index_f8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
F8
,
F8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
true
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
F8
,
F8
,
I32
,
F32
,
ReduceOpId
,
true
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/device_max_pool2d_fwd_nhwc_i8_instance.cpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "pool2d_fwd_instance_common.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
static
constexpr
auto
ReduceOpId
=
ck
::
ReduceTensorOp
::
MAX
;
void
add_device_pool2d_fwd_nhwc_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
I8
,
I8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
false
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
I8
,
I8
,
I32
,
F32
,
ReduceOpId
,
false
>
{});
}
void
add_device_pool2d_fwd_nhwc_index_i8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DevicePoolFwd
<
4
,
2
,
I8
,
I8
,
I32
,
NHWC
,
NHWC
,
ReduceOpId
,
true
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_pool2d_fwd_nhwc_instances
<
I8
,
I8
,
I32
,
F32
,
ReduceOpId
,
true
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool2d_fwd/pool2d_fwd_instance_common.hpp
0 → 100644
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
using
I32
=
int32_t
;
using
F32
=
float
;
using
F16
=
ck
::
half_t
;
using
BF16
=
ck
::
bhalf_t
;
using
I8
=
int8_t
;
using
F8
=
ck
::
f8_t
;
using
NHWC
=
ck
::
tensor_layout
::
convolution
::
NHWC
;
template
<
typename
InDataType
,
typename
OutDataType
,
typename
IndexDataType
,
typename
ComputeDataType
,
ReduceTensorOp
ReduceOpId
,
bool
OutputIndex
>
using
device_pool2d_fwd_nhwc_instances
=
// clang-format off
std
::
tuple
<
DevicePool2dFwd_NHWC_NHWC
<
InDataType
,
OutDataType
,
IndexDataType
,
ComputeDataType
,
ReduceOpId
,
OutputIndex
,
256
,
256
,
1
,
1
,
1
,
1
>
,
DevicePool2dFwd_NHWC_NHWC
<
InDataType
,
OutDataType
,
IndexDataType
,
ComputeDataType
,
ReduceOpId
,
OutputIndex
,
256
,
256
,
1
,
2
,
1
,
2
>
,
DevicePool2dFwd_NHWC_NHWC
<
InDataType
,
OutDataType
,
IndexDataType
,
ComputeDataType
,
ReduceOpId
,
OutputIndex
,
256
,
256
,
1
,
4
,
1
,
4
>
// clang-format on
>
;
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt
View file @
b924e330
set
(
DEVICE_POOL3D_FWD_INSTANCES
)
list
(
APPEND DEVICE_POOL3D_FWD_INSTANCES device_avg_pool3d_fwd_ndhwc_f16_instance.cpp
device_max_pool3d_fwd_ndhwc_f16_instance.cpp
device_max_pool3d_fwd_ndhwc_f8_instance.cpp
device_avg_pool3d_fwd_ndhwc_f8_instance.cpp
device_max_pool3d_fwd_ndhwc_i8_instance.cpp
device_avg_pool3d_fwd_ndhwc_i8_instance.cpp
device_avg_pool3d_fwd_ndhwc_f32_instance.cpp
device_max_pool3d_fwd_ndhwc_f32_instance.cpp
device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp
View file @
b924e330
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "pool_fwd_instance_common.hpp"
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment