pool2d_fwd_common.hpp 8.85 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
Chao Liu's avatar
Chao Liu committed
3

Qianfeng's avatar
Qianfeng committed
4
5
#pragma once

6
#include <iostream>
7

Chao Liu's avatar
Chao Liu committed
8
9
10
11
#include "ck/ck.hpp"
#include "ck/utility/reduction_enums.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
12
#include "ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
14

Chao Liu's avatar
Chao Liu committed
15
#include "ck/library/utility/check_err.hpp"
16
17
18
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
19
#include "ck/library/utility/literals.hpp"
rocking's avatar
rocking committed
20
#include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp"
21
22
23

template <typename InDataType,
          typename OutDataType,
rocking's avatar
rocking committed
24
          typename ComputeDataType,
Qianfeng's avatar
Qianfeng committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
          typename IndexDataType,
          typename InLayout,
          typename OutLayout,
          ck::ReduceTensorOp ReduceOpId,
          bool PropagateNan,
          bool OutputIndex>
bool pool_test(bool do_verification,
               int init_method,
               bool time_kernel,
               ck::index_t N,
               ck::index_t C,
               ck::index_t Y,
               ck::index_t X,
               ck::index_t Hi,
               ck::index_t Wi,
               ck::index_t window_stride_h,
               ck::index_t window_stride_w,
rocking's avatar
rocking committed
42
43
               ck::index_t window_dilation_h,
               ck::index_t window_dilation_w,
Qianfeng's avatar
Qianfeng committed
44
45
46
47
               ck::index_t in_left_pad_h,
               ck::index_t in_left_pad_w,
               ck::index_t in_right_pad_h,
               ck::index_t in_right_pad_w)
48
{
Qianfeng's avatar
Qianfeng committed
49
    using DevicePoolFwdInstance =
rocking's avatar
rocking committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
        ck::tensor_operation::device::DevicePool2dFwd_NHWC_NHWC<InDataType,
                                                                OutDataType,
                                                                IndexDataType,
                                                                ComputeDataType,
                                                                ReduceOpId,
                                                                OutputIndex,
                                                                64, // BlockSize
                                                                64, // ReduceMThreadClusterSize
                                                                1,  // ReduceKThreadClusterSize
                                                                4,  // ReduceMThreadSliceSize
                                                                1,  // ReduceKThreadSliceSize
                                                                1>; // InSrcOutDstVectorSize

    const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
    const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
    const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
    const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
67

rocking's avatar
rocking committed
68
69
    const std::vector<ck::index_t> window_spatial_lengths{Y, X};
    const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
rocking's avatar
rocking committed
70
    const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
rocking's avatar
rocking committed
71
72
    const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
    const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
73
74
75
76

    // tensor layout
    auto f_host_tensor_descriptor =
        [](std::size_t N_, std::size_t C_, std::size_t H, std::size_t W, auto layout) {
77
78
            using namespace ck::literals;

79
80
            if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCHW>::value)
            {
81
                return HostTensorDescriptor({N_, C_, H, W}, {C_ * H * W, H * W, W, 1_uz});
82
83
84
85
            }
            else if constexpr(ck::is_same<decltype(layout),
                                          ck::tensor_layout::convolution::NHWC>::value)
            {
86
                return HostTensorDescriptor({N_, C_, H, W}, {C_ * H * W, 1_uz, W * C_, C_});
87
88
89
90
91
            }
        };

    Tensor<InDataType> in_n_c_hi_wi(f_host_tensor_descriptor(N, C, Hi, Wi, InLayout{}));
    Tensor<OutDataType> out_n_c_ho_wo_host(f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
92
93
    Tensor<IndexDataType> out_indices_n_c_ho_wo_host(
        f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
94
    Tensor<OutDataType> out_n_c_ho_wo_device(f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
95
96
    Tensor<IndexDataType> out_indices_n_c_ho_wo_device(
        f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
97
98
99
100
101
102
103

    std::cout << "in_n_c_hi_wi: " << in_n_c_hi_wi.mDesc << std::endl;
    std::cout << "out_n_c_ho_wo: " << out_n_c_ho_wo_host.mDesc << std::endl;

    switch(init_method)
    {
    case 0: break;
104
105
106
    case 1: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_1<InDataType>{1}); break;
    case 2: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5}); break;
    default: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_3<InDataType>{-5.0, 5.0});
107
108
    }

109
110
111
    DeviceMem in_device_buf(sizeof(InDataType) * in_n_c_hi_wi.mDesc.GetElementSpaceSize());
    DeviceMem out_device_buf(sizeof(OutDataType) *
                             out_n_c_ho_wo_device.mDesc.GetElementSpaceSize());
112
    DeviceMem out_indices_device_buf(sizeof(IndexDataType) *
113
                                     out_indices_n_c_ho_wo_device.mDesc.GetElementSpaceSize());
114
115
116

    in_device_buf.ToDevice(in_n_c_hi_wi.mData.data());

117
118
119
120
121
122
    auto pool         = DevicePoolFwdInstance{};
    auto invoker_ptr  = pool.MakeInvokerPointer();
    auto argument_ptr = pool.MakeArgumentPointer(
        static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
        static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
        static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
rocking's avatar
rocking committed
123
124
125
126
127
128
        {N, C, Hi, Wi},
        {Y, X},
        {N, C, Ho, Wo},
        {C * Hi * Wi, 1, Wi * C, C},
        {C * Ho * Wo, 1, Wo * C, C},
        {C * Ho * Wo, 1, Wo * C, C},
129
        window_strides,
rocking's avatar
rocking committed
130
        window_dilations,
131
        input_left_pads,
rocking's avatar
rocking committed
132
133
        input_right_pads,
        {2, 3});
134
135
136
137
138
139
140

    if(!pool.IsSupportedArgument(argument_ptr.get()))
    {
        throw std::runtime_error("wrong! device_op with the specified compilation parameters does "
                                 "not support this problem");
    }

JD's avatar
JD committed
141
    float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
142
143
144
145
146
147
148
149
150
151

    std::size_t flop = std::size_t(2) * N * C * Ho * Wo * Y * X;

    std::size_t num_btype =
        sizeof(InDataType) * (N * C * Hi * Wi) + sizeof(OutDataType) * (N * C * Ho * Wo);

    float tflops = static_cast<float>(flop) / 1.E9 / ave_time;

    float gb_per_sec = num_btype / 1.E6 / ave_time;

rocking's avatar
rocking committed
152
153
    std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
              << " GB / s " << std::endl;
154

Anthony Chang's avatar
Anthony Chang committed
155
    bool pass = true;
156

157
158
    if(do_verification)
    {
rocking's avatar
rocking committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
        using ReferencePoolingFwdInstance =
            ck::tensor_operation::host::ReferencePoolingFwd<4,
                                                            2,
                                                            InDataType,
                                                            OutDataType,
                                                            ComputeDataType,
                                                            IndexDataType,
                                                            ReduceOpId,
                                                            PropagateNan,
                                                            OutputIndex>;

        auto ref_pooling          = ReferencePoolingFwdInstance{};
        auto ref_pooling_invoker  = ref_pooling.MakeInvoker();
        auto ref_pooling_argument = ref_pooling.MakeArgument(in_n_c_hi_wi,
                                                             out_n_c_ho_wo_host,
                                                             out_indices_n_c_ho_wo_host,
                                                             window_spatial_lengths,
                                                             window_strides,
rocking's avatar
rocking committed
177
                                                             window_dilations,
rocking's avatar
rocking committed
178
179
180
181
                                                             input_left_pads,
                                                             input_right_pads);

        ref_pooling_invoker.Run(ref_pooling_argument);
182
183
184

        out_device_buf.FromDevice(out_n_c_ho_wo_device.mData.data());

185
        pass = pass && ck::utils::check_err(out_n_c_ho_wo_device, out_n_c_ho_wo_host);
186

187
        if constexpr(OutputIndex)
188
189
190
        {
            out_indices_device_buf.FromDevice(out_indices_n_c_ho_wo_device.mData.data());

191
192
            pass = pass &&
                   ck::utils::check_err(out_indices_n_c_ho_wo_device, out_indices_n_c_ho_wo_host);
193
194
        };
    }
195

Qianfeng's avatar
Qianfeng committed
196
197
    return (pass);
};