convnd_fwd_common.hpp 11.2 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "ck/library/utility/convolution_parameter.hpp"
Chao Liu's avatar
clean  
Chao Liu committed
18
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
Chao Liu's avatar
Chao Liu committed
19
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
Chao Liu's avatar
Chao Liu committed
20

Chao Liu's avatar
Chao Liu committed
21
22
23
24
25
26
27
void print_helper_msg()
{
    std::cout << "arg1: verification (0=no, 1=yes)\n"
              << "arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"
              << "arg3: time kernel (0=no, 1=yes)\n"
              << "arg4: N spatial dimensions (default 2)\n"
              << "Following arguments (depending on number of spatial dims):\n"
Chao Liu's avatar
add G  
Chao Liu committed
28
              << " G, N, K, C, \n"
Chao Liu's avatar
Chao Liu committed
29
              << " <filter spatial dimensions>, (ie Y, X for 2D)\n"
Chao Liu's avatar
Chao Liu committed
30
              << " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
              << " <strides>, (ie Sy, Sx for 2D)\n"
              << " <dilations>, (ie Dy, Dx for 2D)\n"
              << " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
              << " <right padding>, (ie RightPy, RightPx for 2D)\n"
              << std::endl;
}

Chao Liu's avatar
clean  
Chao Liu committed
38
ck::utils::conv::ConvParam parse_conv_params(int num_dim_spatial, int arg_idx, char* const argv[])
Chao Liu's avatar
Chao Liu committed
39
{
Chao Liu's avatar
add G  
Chao Liu committed
40
    const ck::index_t G = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
41
42
43
    const ck::index_t N = std::stoi(argv[arg_idx++]);
    const ck::index_t K = std::stoi(argv[arg_idx++]);
    const ck::index_t C = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
44

Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
    std::vector<ck::index_t> filter_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> input_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_strides(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_dilations(num_dim_spatial);
    std::vector<ck::index_t> input_left_pads(num_dim_spatial);
    std::vector<ck::index_t> input_right_pads(num_dim_spatial);
Chao Liu's avatar
Chao Liu committed
51
52
53

    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
54
        filter_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
55
    }
Chao Liu's avatar
Chao Liu committed
56

Chao Liu's avatar
Chao Liu committed
57
58
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
59
        input_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
60
    }
Chao Liu's avatar
Chao Liu committed
61

Chao Liu's avatar
Chao Liu committed
62
63
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
64
        conv_filter_strides[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
65
    }
Chao Liu's avatar
Chao Liu committed
66

Chao Liu's avatar
Chao Liu committed
67
68
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
69
        conv_filter_dilations[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
70
    }
Chao Liu's avatar
Chao Liu committed
71

Chao Liu's avatar
Chao Liu committed
72
73
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
74
        input_left_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
75
    }
Chao Liu's avatar
Chao Liu committed
76

Chao Liu's avatar
Chao Liu committed
77
78
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
79
        input_right_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
80
81
    }

Chao Liu's avatar
clean  
Chao Liu committed
82
    return ck::utils::conv::ConvParam{num_dim_spatial,
Chao Liu's avatar
add G  
Chao Liu committed
83
                                      G,
Chao Liu's avatar
clean  
Chao Liu committed
84
85
86
87
88
89
90
91
92
                                      N,
                                      K,
                                      C,
                                      filter_spatial_lengths,
                                      input_spatial_lengths,
                                      conv_filter_strides,
                                      conv_filter_dilations,
                                      input_left_pads,
                                      input_right_pads};
Chao Liu's avatar
Chao Liu committed
93
94
}

Chao Liu's avatar
Chao Liu committed
95
// FIXME: current implementation only support NCHW/NHWC layout
Chao Liu's avatar
Chao Liu committed
96
template <ck::index_t NDimSpatial,
Chao Liu's avatar
Chao Liu committed
97
98
99
          typename InLayout,
          typename WeiLayout,
          typename OutLayout,
Chao Liu's avatar
Chao Liu committed
100
101
102
103
104
105
          typename InDataType,
          typename WeiDataType,
          typename OutDataType,
          typename InElementOp,
          typename WeiElementOp,
          typename OutElementOp,
Chao Liu's avatar
Chao Liu committed
106
          typename DeviceConvNDFwdInstance>
Chao Liu's avatar
Chao Liu committed
107
108
109
int run_conv_fwd(bool do_verification,
                 int init_method,
                 bool time_kernel,
Chao Liu's avatar
clean  
Chao Liu committed
110
                 const ck::utils::conv::ConvParam& conv_param,
Chao Liu's avatar
Chao Liu committed
111
112
113
                 const InElementOp& in_element_op,
                 const WeiElementOp& wei_element_op,
                 const OutElementOp& out_element_op)
Chao Liu's avatar
Chao Liu committed
114
{
Chao Liu's avatar
Chao Liu committed
115
116
117
118
119
120
121
122
    const auto in_g_n_c_wis_desc =
        ck::utils::conv::make_input_host_tensor_descriptor_packed<InLayout>(conv_param);
    const auto wei_g_k_c_xs_desc =
        ck::utils::conv::make_weight_host_tensor_descriptor_packed<WeiLayout>(conv_param);
    const auto bias_g_n_k_wos_desc =
        ck::utils::conv::make_output_host_tensor_descriptor_packed<OutLayout>(conv_param);
    const auto out_g_n_k_wos_desc =
        ck::utils::conv::make_output_host_tensor_descriptor_packed<OutLayout>(conv_param);
Chao Liu's avatar
add G  
Chao Liu committed
123
124
125
126
127
128

    Tensor<InDataType> in(in_g_n_c_wis_desc);
    Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
    Tensor<OutDataType> bias(bias_g_n_k_wos_desc);
    Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
    Tensor<OutDataType> out_device(out_g_n_k_wos_desc);
Chao Liu's avatar
Chao Liu committed
129

Chao Liu's avatar
Chao Liu committed
130
131
    std::cout << "in: " << in.mDesc << std::endl;
    std::cout << "wei: " << wei.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
132
    std::cout << "bias: " << bias.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
133
    std::cout << "out: " << out_host.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
134
135
136
137
138

    switch(init_method)
    {
    case 0: break;
    case 1:
Chao Liu's avatar
Chao Liu committed
139
140
        in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
141
        bias.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
142
143
        break;
    default:
Chao Liu's avatar
Chao Liu committed
144
145
        in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
146
        bias.GenerateTensorValue(GeneratorTensor_3<OutDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
147
148
    }

Chao Liu's avatar
Chao Liu committed
149
150
    DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpace());
    DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
151
    DeviceMem bias_device_buf(sizeof(OutDataType) * bias.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
152
    DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
153

Chao Liu's avatar
Chao Liu committed
154
155
    in_device_buf.ToDevice(in.mData.data());
    wei_device_buf.ToDevice(wei.mData.data());
Chao Liu's avatar
Chao Liu committed
156
    bias_device_buf.ToDevice(bias.mData.data());
Chao Liu's avatar
Chao Liu committed
157

Chao Liu's avatar
add G  
Chao Liu committed
158
159
160
161
162
163
164
165
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_strides{};
    std::array<ck::index_t, NDimSpatial + 3> d_g_n_k_wos_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> d_g_n_k_wos_strides{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_strides{};
Chao Liu's avatar
Chao Liu committed
166
167
168
169
170
171
172
    std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
    std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
    std::array<ck::index_t, NDimSpatial> input_left_pads{};
    std::array<ck::index_t, NDimSpatial> input_right_pads{};

    auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };

Chao Liu's avatar
add G  
Chao Liu committed
173
174
175
176
177
178
179
180
    copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
    copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
    copy(wei_g_k_c_xs_desc.GetLengths(), b_g_k_c_xs_lengths);
    copy(wei_g_k_c_xs_desc.GetStrides(), b_g_k_c_xs_strides);
    copy(bias_g_n_k_wos_desc.GetLengths(), d_g_n_k_wos_lengths);
    copy(bias_g_n_k_wos_desc.GetStrides(), d_g_n_k_wos_strides);
    copy(out_g_n_k_wos_desc.GetLengths(), e_g_n_k_wos_lengths);
    copy(out_g_n_k_wos_desc.GetStrides(), e_g_n_k_wos_strides);
Chao Liu's avatar
Chao Liu committed
181
182
183
184
185
    copy(conv_param.conv_filter_strides_, conv_filter_strides);
    copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
    copy(conv_param.input_left_pads_, input_left_pads);
    copy(conv_param.input_right_pads_, input_right_pads);

Chao Liu's avatar
Chao Liu committed
186
187
188
    // do GEMM
    auto conv     = DeviceConvNDFwdInstance{};
    auto invoker  = conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
189
190
191
192
193
    auto argument = conv.MakeArgument(
        in_device_buf.GetDeviceBuffer(),
        wei_device_buf.GetDeviceBuffer(),
        std::array<const void*, 1>{bias_device_buf.GetDeviceBuffer()},
        out_device_buf.GetDeviceBuffer(),
Chao Liu's avatar
add G  
Chao Liu committed
194
195
196
197
198
199
200
201
        a_g_n_c_wis_lengths,
        a_g_n_c_wis_strides,
        b_g_k_c_xs_lengths,
        b_g_k_c_xs_strides,
        std::array<std::array<ck::index_t, NDimSpatial + 3>, 1>{{d_g_n_k_wos_lengths}},
        std::array<std::array<ck::index_t, NDimSpatial + 3>, 1>{{d_g_n_k_wos_strides}},
        e_g_n_k_wos_lengths,
        e_g_n_k_wos_strides,
Chao Liu's avatar
Chao Liu committed
202
203
204
205
206
207
208
        conv_filter_strides,
        conv_filter_dilations,
        input_left_pads,
        input_right_pads,
        in_element_op,
        wei_element_op,
        out_element_op);
Chao Liu's avatar
Chao Liu committed
209
210
211
212
213
214
215
216

    if(!conv.IsSupportedArgument(argument))
    {
        throw std::runtime_error(
            "wrong! device_conv with the specified compilation parameters does "
            "not support this Conv problem");
    }

Chao Liu's avatar
Chao Liu committed
217
    float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
Chao Liu's avatar
Chao Liu committed
218

Chao Liu's avatar
clean  
Chao Liu committed
219
220
    std::size_t flop      = conv_param.GetFlops();
    std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();
Chao Liu's avatar
Chao Liu committed
221

Chao Liu's avatar
Chao Liu committed
222
223
224
    float tflops     = static_cast<float>(flop) / 1.E9 / avg_time;
    float gb_per_sec = num_btype / 1.E6 / avg_time;
    std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
Chao Liu's avatar
Chao Liu committed
225
226
227
228
              << conv.GetTypeString() << std::endl;

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
229
230
        using PassThrough = ck::tensor_operation::element_wise::PassThrough;

Chao Liu's avatar
add G  
Chao Liu committed
231
        Tensor<OutDataType> c_host(out_g_n_k_wos_desc);
Chao Liu's avatar
Chao Liu committed
232

Chao Liu's avatar
Chao Liu committed
233
234
235
236
237
238
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InDataType,
                                                                     WeiDataType,
                                                                     OutDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
Chao Liu's avatar
Chao Liu committed
239
                                                                     PassThrough>();
Chao Liu's avatar
Chao Liu committed
240
241

        auto ref_invoker  = ref_conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
242
243
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
Chao Liu's avatar
Chao Liu committed
244
                                                  c_host,
Chao Liu's avatar
clean  
Chao Liu committed
245
246
247
248
                                                  conv_param.conv_filter_strides_,
                                                  conv_param.conv_filter_dilations_,
                                                  conv_param.input_left_pads_,
                                                  conv_param.input_right_pads_,
Chao Liu's avatar
Chao Liu committed
249
250
                                                  in_element_op,
                                                  wei_element_op,
Chao Liu's avatar
Chao Liu committed
251
                                                  PassThrough{});
Chao Liu's avatar
Chao Liu committed
252
253
254

        ref_invoker.Run(ref_argument);

Chao Liu's avatar
Chao Liu committed
255
256
257
        // TODO: implement elementwise operation for host
        out_host.ForEach(
            [&](auto&, auto idx) { out_element_op(out_host(idx), c_host(idx), bias(idx)); });
Chao Liu's avatar
Chao Liu committed
258

Chao Liu's avatar
Chao Liu committed
259
        out_device_buf.FromDevice(out_device.mData.data());
Chao Liu's avatar
Chao Liu committed
260

Chao Liu's avatar
Chao Liu committed
261
        return ck::utils::check_err(
Chao Liu's avatar
Chao Liu committed
262
                   out_device.mData, out_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
Chao Liu's avatar
Chao Liu committed
263
264
265
266
267
268
                   ? 0
                   : 1;
    }

    return 0;
}