Add pooling instance

d5754119 · rocking · a014aa09 · d5754119 · d5754119 · d5754119
Commit d5754119 authored May 11, 2023 by rocking
10 changed files
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/CMakeLists.txt
+add_instance_library(device_pooling_fwd_instance
+    device_avg_pooling2d_fwd_nhwc_f16_instance.cpp
+    device_avg_pooling2d_fwd_nhwc_f32_instance.cpp
+    device_avg_pooling3d_fwd_ndhwc_f16_instance.cpp
+    device_avg_pooling3d_fwd_ndhwc_f32_instance.cpp
+    device_max_pooling2d_fwd_nhwc_f16_instance.cpp
+    device_max_pooling2d_fwd_nhwc_f32_instance.cpp
+    device_max_pooling3d_fwd_ndhwc_f16_instance.cpp
+    device_max_pooling3d_fwd_ndhwc_f32_instance.cpp
+)
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling2d_fwd_nhwc_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling2d_fwd_nhwc_f16_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
+
+void add_device_avg_pooling2d_fwd_nhwc_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F16, F16, F32, ReduceOpId, false>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling2d_fwd_nhwc_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling2d_fwd_nhwc_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
+
+void add_device_avg_pooling2d_fwd_nhwc_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F32, F32, F32, ReduceOpId, false>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling3d_fwd_ndhwc_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling3d_fwd_ndhwc_f16_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
+
+void add_device_avg_pooling3d_fwd_ndhwc_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F16, F16, F32, ReduceOpId, false>{});
+}
+
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling3d_fwd_ndhwc_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_avg_pooling3d_fwd_ndhwc_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
+
+void add_device_avg_pooling3d_fwd_ndhwc_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F32, F32, F32, ReduceOpId, false>{});
+}
+
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling2d_fwd_nhwc_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling2d_fwd_nhwc_f16_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
+
+void add_device_max_pooling2d_fwd_nhwc_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F16, F16, F16, ReduceOpId, false>{});
+}
+
+void add_device_max_pooling2d_fwd_nhwc_index_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, true>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F16, F16, F16, ReduceOpId, true>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling2d_fwd_nhwc_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling2d_fwd_nhwc_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
+
+void add_device_max_pooling2d_fwd_nhwc_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F32, F32, F32, ReduceOpId, false>{});
+}
+
+void add_device_max_pooling2d_fwd_nhwc_index_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<4, 2, ReduceOpId, true>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling2d_fwd_nhwc_instances<F32, F32, F32, ReduceOpId, true>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling3d_fwd_ndhwc_f16_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling3d_fwd_ndhwc_f16_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
+
+void add_device_max_pooling3d_fwd_ndhwc_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F16, F16, F16, ReduceOpId, false>{});
+}
+
+void add_device_max_pooling3d_fwd_ndhwc_index_f16_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, true>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F16, F16, F16, ReduceOpId, true>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling3d_fwd_ndhwc_f32_instance.cpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/device_max_pooling3d_fwd_ndhwc_f32_instance.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#include "pooling_fwd_instance_common.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
+
+void add_device_max_pooling3d_fwd_ndhwc_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, false>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F32, F32, F32, ReduceOpId, false>{});
+}
+
+void add_device_max_pooling3d_fwd_ndhwc_index_f32_instances(
+    std::vector<std::unique_ptr<DevicePoolFwd<5, 3, ReduceOpId, true>>>& instances)
+{
+    add_device_operation_instances(
+        instances, device_pooling3d_fwd_ndhwc_instances<F32, F32, F32, ReduceOpId, true>{});
+}
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck
--- a/library/src/tensor_operation_instance/gpu/pooling_fwd/pooling_fwd_instance_common.hpp
+++ b/library/src/tensor_operation_instance/gpu/pooling_fwd/pooling_fwd_instance_common.hpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp"
+#include "ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp"
+#include "ck/utility/data_type.hpp"
+
+#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
+
+namespace ck {
+namespace tensor_operation {
+namespace device {
+namespace instance {
+
+using F16 = ck::half_t;
+using F32 = float;
+
+template <typename InDataType,
+          typename OutDataType,
+          typename AccDataType,
+          ReduceTensorOp ReduceOpId,
+          bool OuputIndex>
+using device_pooling2d_fwd_nhwc_instances =
+    // clang-format off
+    std::tuple <
+        DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 1, 1, 1>,
+        DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 2, 1, 2>,
+        DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 4, 1, 4>
+                 // clang-format on
+                 >;
+
+template <typename InDataType,
+          typename OutDataType,
+          typename AccDataType,
+          ReduceTensorOp ReduceOpId,
+          bool OuputIndex>
+using device_pooling3d_fwd_ndhwc_instances =
+    // clang-format off
+    std::tuple <
+        DevicePool3dFwd_Input_N_Di_Hi_Wi_C_Output_N_Do_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 1, 1, 1>,
+        DevicePool3dFwd_Input_N_Di_Hi_Wi_C_Output_N_Do_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 2, 1, 2>,
+        DevicePool3dFwd_Input_N_Di_Hi_Wi_C_Output_N_Do_Ho_Wo_C<InDataType, OutDataType, AccDataType, ReduceOpId, OuputIndex, 256, 256, 1, 4, 1, 4>
+                 // clang-format on
+                 >;
+
+} // namespace instance
+} // namespace device
+} // namespace tensor_operation
+} // namespace ck