convnd_fwd_common.hpp 14.4 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "ck/library/utility/convolution_parameter.hpp"
Chao Liu's avatar
clean  
Chao Liu committed
18
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
Chao Liu's avatar
Chao Liu committed
19
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
Chao Liu's avatar
Chao Liu committed
20

Chao Liu's avatar
Chao Liu committed
21
22
23
24
25
26
27
void print_helper_msg()
{
    std::cout << "arg1: verification (0=no, 1=yes)\n"
              << "arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"
              << "arg3: time kernel (0=no, 1=yes)\n"
              << "arg4: N spatial dimensions (default 2)\n"
              << "Following arguments (depending on number of spatial dims):\n"
Chao Liu's avatar
add G  
Chao Liu committed
28
              << " G, N, K, C, \n"
Chao Liu's avatar
Chao Liu committed
29
              << " <filter spatial dimensions>, (ie Y, X for 2D)\n"
Chao Liu's avatar
Chao Liu committed
30
              << " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
              << " <strides>, (ie Sy, Sx for 2D)\n"
              << " <dilations>, (ie Dy, Dx for 2D)\n"
              << " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
              << " <right padding>, (ie RightPy, RightPx for 2D)\n"
              << std::endl;
}

Chao Liu's avatar
clean  
Chao Liu committed
38
ck::utils::conv::ConvParam parse_conv_params(int num_dim_spatial, int arg_idx, char* const argv[])
Chao Liu's avatar
Chao Liu committed
39
{
Chao Liu's avatar
add G  
Chao Liu committed
40
    const ck::index_t G = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
41
42
43
    const ck::index_t N = std::stoi(argv[arg_idx++]);
    const ck::index_t K = std::stoi(argv[arg_idx++]);
    const ck::index_t C = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
44

Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
    std::vector<ck::index_t> filter_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> input_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_strides(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_dilations(num_dim_spatial);
    std::vector<ck::index_t> input_left_pads(num_dim_spatial);
    std::vector<ck::index_t> input_right_pads(num_dim_spatial);
Chao Liu's avatar
Chao Liu committed
51
52
53

    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
54
        filter_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
55
    }
Chao Liu's avatar
Chao Liu committed
56

Chao Liu's avatar
Chao Liu committed
57
58
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
59
        input_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
60
    }
Chao Liu's avatar
Chao Liu committed
61

Chao Liu's avatar
Chao Liu committed
62
63
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
64
        conv_filter_strides[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
65
    }
Chao Liu's avatar
Chao Liu committed
66

Chao Liu's avatar
Chao Liu committed
67
68
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
69
        conv_filter_dilations[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
70
    }
Chao Liu's avatar
Chao Liu committed
71

Chao Liu's avatar
Chao Liu committed
72
73
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
74
        input_left_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
75
    }
Chao Liu's avatar
Chao Liu committed
76

Chao Liu's avatar
Chao Liu committed
77
78
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
79
        input_right_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
80
81
    }

Chao Liu's avatar
clean  
Chao Liu committed
82
    return ck::utils::conv::ConvParam{num_dim_spatial,
Chao Liu's avatar
add G  
Chao Liu committed
83
                                      G,
Chao Liu's avatar
clean  
Chao Liu committed
84
85
86
87
88
89
90
91
92
                                      N,
                                      K,
                                      C,
                                      filter_spatial_lengths,
                                      input_spatial_lengths,
                                      conv_filter_strides,
                                      conv_filter_dilations,
                                      input_left_pads,
                                      input_right_pads};
Chao Liu's avatar
Chao Liu committed
93
94
}

Chao Liu's avatar
Chao Liu committed
95
// FIXME: current implementation only support NCHW/NHWC layout
Chao Liu's avatar
Chao Liu committed
96
template <ck::index_t NDimSpatial,
Chao Liu's avatar
Chao Liu committed
97
98
99
          typename InLayout,
          typename WeiLayout,
          typename OutLayout,
Chao Liu's avatar
Chao Liu committed
100
101
102
103
104
105
          typename InDataType,
          typename WeiDataType,
          typename OutDataType,
          typename InElementOp,
          typename WeiElementOp,
          typename OutElementOp,
Chao Liu's avatar
Chao Liu committed
106
          typename DeviceConvNDFwdInstance>
Chao Liu's avatar
Chao Liu committed
107
108
109
int run_conv_fwd(bool do_verification,
                 int init_method,
                 bool time_kernel,
Chao Liu's avatar
clean  
Chao Liu committed
110
                 const ck::utils::conv::ConvParam& conv_param,
Chao Liu's avatar
Chao Liu committed
111
112
113
                 const InElementOp& in_element_op,
                 const WeiElementOp& wei_element_op,
                 const OutElementOp& out_element_op)
Chao Liu's avatar
Chao Liu committed
114
{
Chao Liu's avatar
add G  
Chao Liu committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#if 0
    const auto in_g_n_c_wis_desc  = ck::utils::conv::get_input_host_tensor_descriptor<InLayout>(conv_param);
    const auto wei_g_k_c_xs_desc = ck::utils::conv::get_weight_host_tensor_descriptor<WeiLayout>(conv_param);
    const auto out_g_n_k_wos_desc = ck::utils::conv::get_output_host_tensor_descriptor<OutLayout>(conv_param);
#else
    const auto in_g_n_wis_c_desc = HostTensorDescriptor(
        std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
                                 static_cast<std::size_t>(conv_param.N_),
                                 static_cast<std::size_t>(conv_param.input_spatial_lengths_[0]),
                                 static_cast<std::size_t>(conv_param.input_spatial_lengths_[1]),
                                 static_cast<std::size_t>(conv_param.C_)});

    const auto wei_g_k_xs_c_desc = HostTensorDescriptor(
        std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
                                 static_cast<std::size_t>(conv_param.K_),
                                 static_cast<std::size_t>(conv_param.filter_spatial_lengths_[0]),
                                 static_cast<std::size_t>(conv_param.filter_spatial_lengths_[1]),
                                 static_cast<std::size_t>(conv_param.C_)});

    const auto bias_g_n_wos_k_desc = HostTensorDescriptor(
        std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
                                 static_cast<std::size_t>(conv_param.N_),
Chao Liu's avatar
Chao Liu committed
137
138
139
                                 static_cast<std::size_t>(conv_param.output_spatial_lengths_[0]),
                                 static_cast<std::size_t>(conv_param.output_spatial_lengths_[1]),
                                 static_cast<std::size_t>(conv_param.K_)},
Chao Liu's avatar
add G  
Chao Liu committed
140
141
142
143
144
145
146
147
        std::vector<std::size_t>{0, 0, 0, 0, 1});

    const auto out_g_n_wos_k_desc = HostTensorDescriptor(
        std::vector<std::size_t>{static_cast<std::size_t>(conv_param.G_),
                                 static_cast<std::size_t>(conv_param.N_),
                                 static_cast<std::size_t>(conv_param.output_spatial_lengths_[0]),
                                 static_cast<std::size_t>(conv_param.output_spatial_lengths_[1]),
                                 static_cast<std::size_t>(conv_param.K_)});
Chao Liu's avatar
Chao Liu committed
148

Chao Liu's avatar
add G  
Chao Liu committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
    // tensor descriptor in NCHW/KXYC/NKHW dimensional order
    const auto in_g_n_c_wis_desc = transpose_host_tensor_descriptor_given_new2old(
        in_g_n_wis_c_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
    const auto wei_g_k_c_xs_desc = transpose_host_tensor_descriptor_given_new2old(
        wei_g_k_xs_c_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
    const auto bias_g_n_k_wos_desc = transpose_host_tensor_descriptor_given_new2old(
        bias_g_n_wos_k_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
    const auto out_g_n_k_wos_desc = transpose_host_tensor_descriptor_given_new2old(
        out_g_n_wos_k_desc, std::vector<ck::index_t>{0, 1, 4, 2, 3});
#endif

    Tensor<InDataType> in(in_g_n_c_wis_desc);
    Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
    Tensor<OutDataType> bias(bias_g_n_k_wos_desc);
    Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
    Tensor<OutDataType> out_device(out_g_n_k_wos_desc);
Chao Liu's avatar
Chao Liu committed
165

Chao Liu's avatar
Chao Liu committed
166
167
    std::cout << "in: " << in.mDesc << std::endl;
    std::cout << "wei: " << wei.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
168
    std::cout << "bias: " << bias.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
169
    std::cout << "out: " << out_host.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
170
171
172
173
174

    switch(init_method)
    {
    case 0: break;
    case 1:
Chao Liu's avatar
Chao Liu committed
175
176
        in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
177
        bias.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
178
179
        break;
    default:
Chao Liu's avatar
Chao Liu committed
180
181
        in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
182
        bias.GenerateTensorValue(GeneratorTensor_3<OutDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
183
184
    }

Chao Liu's avatar
Chao Liu committed
185
186
    DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpace());
    DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
187
    DeviceMem bias_device_buf(sizeof(OutDataType) * bias.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
188
    DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
189

Chao Liu's avatar
Chao Liu committed
190
191
    in_device_buf.ToDevice(in.mData.data());
    wei_device_buf.ToDevice(wei.mData.data());
Chao Liu's avatar
Chao Liu committed
192
    bias_device_buf.ToDevice(bias.mData.data());
Chao Liu's avatar
Chao Liu committed
193

Chao Liu's avatar
add G  
Chao Liu committed
194
195
196
197
198
199
200
201
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_strides{};
    std::array<ck::index_t, NDimSpatial + 3> d_g_n_k_wos_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> d_g_n_k_wos_strides{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_strides{};
Chao Liu's avatar
Chao Liu committed
202
203
204
205
206
207
208
    std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
    std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
    std::array<ck::index_t, NDimSpatial> input_left_pads{};
    std::array<ck::index_t, NDimSpatial> input_right_pads{};

    auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };

Chao Liu's avatar
add G  
Chao Liu committed
209
210
211
212
213
214
215
216
    copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
    copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
    copy(wei_g_k_c_xs_desc.GetLengths(), b_g_k_c_xs_lengths);
    copy(wei_g_k_c_xs_desc.GetStrides(), b_g_k_c_xs_strides);
    copy(bias_g_n_k_wos_desc.GetLengths(), d_g_n_k_wos_lengths);
    copy(bias_g_n_k_wos_desc.GetStrides(), d_g_n_k_wos_strides);
    copy(out_g_n_k_wos_desc.GetLengths(), e_g_n_k_wos_lengths);
    copy(out_g_n_k_wos_desc.GetStrides(), e_g_n_k_wos_strides);
Chao Liu's avatar
Chao Liu committed
217
218
219
220
221
    copy(conv_param.conv_filter_strides_, conv_filter_strides);
    copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
    copy(conv_param.input_left_pads_, input_left_pads);
    copy(conv_param.input_right_pads_, input_right_pads);

Chao Liu's avatar
Chao Liu committed
222
223
224
    // do GEMM
    auto conv     = DeviceConvNDFwdInstance{};
    auto invoker  = conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
225
226
227
228
229
    auto argument = conv.MakeArgument(
        in_device_buf.GetDeviceBuffer(),
        wei_device_buf.GetDeviceBuffer(),
        std::array<const void*, 1>{bias_device_buf.GetDeviceBuffer()},
        out_device_buf.GetDeviceBuffer(),
Chao Liu's avatar
add G  
Chao Liu committed
230
231
232
233
234
235
236
237
        a_g_n_c_wis_lengths,
        a_g_n_c_wis_strides,
        b_g_k_c_xs_lengths,
        b_g_k_c_xs_strides,
        std::array<std::array<ck::index_t, NDimSpatial + 3>, 1>{{d_g_n_k_wos_lengths}},
        std::array<std::array<ck::index_t, NDimSpatial + 3>, 1>{{d_g_n_k_wos_strides}},
        e_g_n_k_wos_lengths,
        e_g_n_k_wos_strides,
Chao Liu's avatar
Chao Liu committed
238
239
240
241
242
243
244
        conv_filter_strides,
        conv_filter_dilations,
        input_left_pads,
        input_right_pads,
        in_element_op,
        wei_element_op,
        out_element_op);
Chao Liu's avatar
Chao Liu committed
245
246
247
248
249
250
251
252

    if(!conv.IsSupportedArgument(argument))
    {
        throw std::runtime_error(
            "wrong! device_conv with the specified compilation parameters does "
            "not support this Conv problem");
    }

Chao Liu's avatar
Chao Liu committed
253
    float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
Chao Liu's avatar
Chao Liu committed
254

Chao Liu's avatar
clean  
Chao Liu committed
255
256
    std::size_t flop      = conv_param.GetFlops();
    std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();
Chao Liu's avatar
Chao Liu committed
257

Chao Liu's avatar
Chao Liu committed
258
259
260
    float tflops     = static_cast<float>(flop) / 1.E9 / avg_time;
    float gb_per_sec = num_btype / 1.E6 / avg_time;
    std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
Chao Liu's avatar
Chao Liu committed
261
262
263
264
              << conv.GetTypeString() << std::endl;

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
265
266
        using PassThrough = ck::tensor_operation::element_wise::PassThrough;

Chao Liu's avatar
add G  
Chao Liu committed
267
        Tensor<OutDataType> c_host(out_g_n_k_wos_desc);
Chao Liu's avatar
Chao Liu committed
268

Chao Liu's avatar
Chao Liu committed
269
270
271
272
273
274
275
276
277
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InLayout,
                                                                     WeiLayout,
                                                                     OutLayout,
                                                                     InDataType,
                                                                     WeiDataType,
                                                                     OutDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
Chao Liu's avatar
Chao Liu committed
278
                                                                     PassThrough>();
Chao Liu's avatar
Chao Liu committed
279
280

        auto ref_invoker  = ref_conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
281
282
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
Chao Liu's avatar
Chao Liu committed
283
                                                  c_host,
Chao Liu's avatar
clean  
Chao Liu committed
284
285
286
287
                                                  conv_param.conv_filter_strides_,
                                                  conv_param.conv_filter_dilations_,
                                                  conv_param.input_left_pads_,
                                                  conv_param.input_right_pads_,
Chao Liu's avatar
Chao Liu committed
288
289
                                                  in_element_op,
                                                  wei_element_op,
Chao Liu's avatar
Chao Liu committed
290
                                                  PassThrough{});
Chao Liu's avatar
Chao Liu committed
291
292
293

        ref_invoker.Run(ref_argument);

Chao Liu's avatar
add G  
Chao Liu committed
294
        for(int g = 0; g < out_host.mDesc.GetLengths()[0]; g++)
Chao Liu's avatar
Chao Liu committed
295
        {
Chao Liu's avatar
add G  
Chao Liu committed
296
            for(int n = 0; n < out_host.mDesc.GetLengths()[1]; n++)
Chao Liu's avatar
Chao Liu committed
297
            {
Chao Liu's avatar
add G  
Chao Liu committed
298
                for(int k = 0; k < out_host.mDesc.GetLengths()[2]; k++)
Chao Liu's avatar
Chao Liu committed
299
                {
Chao Liu's avatar
add G  
Chao Liu committed
300
                    for(int ho = 0; ho < out_host.mDesc.GetLengths()[3]; ho++)
Chao Liu's avatar
Chao Liu committed
301
                    {
Chao Liu's avatar
add G  
Chao Liu committed
302
303
304
305
306
307
                        for(int wo = 0; wo < out_host.mDesc.GetLengths()[4]; wo++)
                        {
                            out_element_op(out_host(g, n, k, ho, wo),
                                           c_host(g, n, k, ho, wo),
                                           bias(g, n, k, ho, wo));
                        }
Chao Liu's avatar
Chao Liu committed
308
309
310
311
312
                    }
                }
            }
        }

Chao Liu's avatar
Chao Liu committed
313
        out_device_buf.FromDevice(out_device.mData.data());
Chao Liu's avatar
Chao Liu committed
314

Chao Liu's avatar
Chao Liu committed
315
        return ck::utils::check_err(
Chao Liu's avatar
Chao Liu committed
316
                   out_device.mData, out_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
Chao Liu's avatar
Chao Liu committed
317
318
319
320
321
322
                   ? 0
                   : 1;
    }

    return 0;
}