pool2d_fwd_common.hpp 8.84 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
Chao Liu's avatar
Chao Liu committed
3

Qianfeng's avatar
Qianfeng committed
4
5
#pragma once

6
#include <iostream>
7

Chao Liu's avatar
Chao Liu committed
8
9
10
11
#include "ck/ck.hpp"
#include "ck/utility/reduction_enums.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
12
#include "ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_impl.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
14

Chao Liu's avatar
Chao Liu committed
15
#include "ck/library/utility/check_err.hpp"
16
17
18
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
19
#include "ck/library/utility/literals.hpp"
rocking's avatar
rocking committed
20
#include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp"
21
22
23

template <typename InDataType,
          typename OutDataType,
rocking's avatar
rocking committed
24
          typename ComputeDataType,
Qianfeng's avatar
Qianfeng committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
          typename IndexDataType,
          typename InLayout,
          typename OutLayout,
          ck::ReduceTensorOp ReduceOpId,
          bool PropagateNan,
          bool OutputIndex>
bool pool_test(bool do_verification,
               int init_method,
               bool time_kernel,
               ck::index_t N,
               ck::index_t C,
               ck::index_t Y,
               ck::index_t X,
               ck::index_t Hi,
               ck::index_t Wi,
               ck::index_t window_stride_h,
               ck::index_t window_stride_w,
rocking's avatar
rocking committed
42
43
               ck::index_t window_dilation_h,
               ck::index_t window_dilation_w,
Qianfeng's avatar
Qianfeng committed
44
45
46
47
               ck::index_t in_left_pad_h,
               ck::index_t in_left_pad_w,
               ck::index_t in_right_pad_h,
               ck::index_t in_right_pad_w)
48
{
Qianfeng's avatar
Qianfeng committed
49
    using DevicePoolFwdInstance =
50
51
52
53
54
55
56
57
58
59
60
61
62
        ck::tensor_operation::device::DevicePool2dFwdImpl<InDataType,      // InDataType
                                                          OutDataType,     // OutDataType
                                                          IndexDataType,   // IndexDataType
                                                          ComputeDataType, // ComputeDataType
                                                          ReduceOpId,
                                                          OutputIndex,
                                                          64,     // BlockSize
                                                          64,     // ReduceMThreadClusterSize
                                                          1,      // ReduceKThreadClusterSize
                                                          4,      // ReduceMThreadSliceSize
                                                          1,      // ReduceKThreadSliceSize
                                                          1,      // InSrcOutDstVectorSize
                                                          false>; // IsFastestDimReduced
63
64
65
66

    const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
    const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;

rocking's avatar
rocking committed
67
68
    const std::vector<ck::index_t> window_spatial_lengths{Y, X};
    const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
rocking's avatar
rocking committed
69
    const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
rocking's avatar
rocking committed
70
71
    const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
    const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
72
73
74
75

    // tensor layout
    auto f_host_tensor_descriptor =
        [](std::size_t N_, std::size_t C_, std::size_t H, std::size_t W, auto layout) {
76
77
            using namespace ck::literals;

78
79
            if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCHW>::value)
            {
80
                return HostTensorDescriptor({N_, C_, H, W}, {C_ * H * W, H * W, W, 1_uz});
81
82
83
84
            }
            else if constexpr(ck::is_same<decltype(layout),
                                          ck::tensor_layout::convolution::NHWC>::value)
            {
85
                return HostTensorDescriptor({N_, C_, H, W}, {C_ * H * W, 1_uz, W * C_, C_});
86
87
88
89
90
            }
        };

    Tensor<InDataType> in_n_c_hi_wi(f_host_tensor_descriptor(N, C, Hi, Wi, InLayout{}));
    Tensor<OutDataType> out_n_c_ho_wo_host(f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
91
92
    Tensor<IndexDataType> out_indices_n_c_ho_wo_host(
        f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
93
    Tensor<OutDataType> out_n_c_ho_wo_device(f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
94
95
    Tensor<IndexDataType> out_indices_n_c_ho_wo_device(
        f_host_tensor_descriptor(N, C, Ho, Wo, OutLayout{}));
96
97
98
99
100
101
102

    std::cout << "in_n_c_hi_wi: " << in_n_c_hi_wi.mDesc << std::endl;
    std::cout << "out_n_c_ho_wo: " << out_n_c_ho_wo_host.mDesc << std::endl;

    switch(init_method)
    {
    case 0: break;
103
104
105
    case 1: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_1<InDataType>{1}); break;
    case 2: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5}); break;
    default: in_n_c_hi_wi.GenerateTensorValue(GeneratorTensor_3<InDataType>{-5.0, 5.0});
106
107
    }

108
109
110
    DeviceMem in_device_buf(sizeof(InDataType) * in_n_c_hi_wi.mDesc.GetElementSpaceSize());
    DeviceMem out_device_buf(sizeof(OutDataType) *
                             out_n_c_ho_wo_device.mDesc.GetElementSpaceSize());
111
    DeviceMem out_indices_device_buf(sizeof(IndexDataType) *
112
                                     out_indices_n_c_ho_wo_device.mDesc.GetElementSpaceSize());
113
114
115

    in_device_buf.ToDevice(in_n_c_hi_wi.mData.data());

116
117
118
119
120
121
    auto pool         = DevicePoolFwdInstance{};
    auto invoker_ptr  = pool.MakeInvokerPointer();
    auto argument_ptr = pool.MakeArgumentPointer(
        static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
        static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
        static_cast<IndexDataType*>(out_indices_device_buf.GetDeviceBuffer()),
rocking's avatar
rocking committed
122
123
124
125
126
127
        {N, C, Hi, Wi},
        {Y, X},
        {N, C, Ho, Wo},
        {C * Hi * Wi, 1, Wi * C, C},
        {C * Ho * Wo, 1, Wo * C, C},
        {C * Ho * Wo, 1, Wo * C, C},
128
        window_strides,
rocking's avatar
rocking committed
129
        window_dilations,
130
        input_left_pads,
rocking's avatar
rocking committed
131
132
        input_right_pads,
        {2, 3});
133
134
135
136
137
138
139

    if(!pool.IsSupportedArgument(argument_ptr.get()))
    {
        throw std::runtime_error("wrong! device_op with the specified compilation parameters does "
                                 "not support this problem");
    }

JD's avatar
JD committed
140
    float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
141
142
143
144
145
146
147
148
149
150
151
152
153

    std::size_t flop = std::size_t(2) * N * C * Ho * Wo * Y * X;

    std::size_t num_btype =
        sizeof(InDataType) * (N * C * Hi * Wi) + sizeof(OutDataType) * (N * C * Ho * Wo);

    float tflops = static_cast<float>(flop) / 1.E9 / ave_time;

    float gb_per_sec = num_btype / 1.E6 / ave_time;

    std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s"
              << std::endl;

Anthony Chang's avatar
Anthony Chang committed
154
    bool pass = true;
155

156
157
    if(do_verification)
    {
rocking's avatar
rocking committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        using ReferencePoolingFwdInstance =
            ck::tensor_operation::host::ReferencePoolingFwd<4,
                                                            2,
                                                            InDataType,
                                                            OutDataType,
                                                            ComputeDataType,
                                                            IndexDataType,
                                                            ReduceOpId,
                                                            PropagateNan,
                                                            OutputIndex>;

        auto ref_pooling          = ReferencePoolingFwdInstance{};
        auto ref_pooling_invoker  = ref_pooling.MakeInvoker();
        auto ref_pooling_argument = ref_pooling.MakeArgument(in_n_c_hi_wi,
                                                             out_n_c_ho_wo_host,
                                                             out_indices_n_c_ho_wo_host,
                                                             window_spatial_lengths,
                                                             window_strides,
rocking's avatar
rocking committed
176
                                                             window_dilations,
rocking's avatar
rocking committed
177
178
179
180
                                                             input_left_pads,
                                                             input_right_pads);

        ref_pooling_invoker.Run(ref_pooling_argument);
181
182
183

        out_device_buf.FromDevice(out_n_c_ho_wo_device.mData.data());

184
        pass = pass && ck::utils::check_err(out_n_c_ho_wo_device, out_n_c_ho_wo_host);
185

186
        if constexpr(OutputIndex)
187
188
189
        {
            out_indices_device_buf.FromDevice(out_indices_n_c_ho_wo_device.mData.data());

190
191
            pass = pass &&
                   ck::utils::check_err(out_indices_n_c_ho_wo_device, out_indices_n_c_ho_wo_host);
192
193
        };
    }
194

Qianfeng's avatar
Qianfeng committed
195
196
    return (pass);
};