convnd_fwd_common.hpp 10.4 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "ck/library/utility/convolution_parameter.hpp"
Chao Liu's avatar
clean  
Chao Liu committed
18
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
Chao Liu's avatar
Chao Liu committed
19
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
Chao Liu's avatar
Chao Liu committed
20

Chao Liu's avatar
Chao Liu committed
21
22
23
24
25
26
void print_helper_msg()
{
    std::cout << "arg1: verification (0=no, 1=yes)\n"
              << "arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"
              << "arg3: time kernel (0=no, 1=yes)\n"
              << "Following arguments (depending on number of spatial dims):\n"
Chao Liu's avatar
Chao Liu committed
27
              << " N spatial dimensions (1=Conv1d, 2=Conv2d, 3=Conv3d)\n"
Chao Liu's avatar
add G  
Chao Liu committed
28
              << " G, N, K, C, \n"
Chao Liu's avatar
Chao Liu committed
29
              << " <filter spatial dimensions>, (ie Y, X for 2D)\n"
Chao Liu's avatar
Chao Liu committed
30
              << " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
              << " <strides>, (ie Sy, Sx for 2D)\n"
              << " <dilations>, (ie Dy, Dx for 2D)\n"
              << " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
              << " <right padding>, (ie RightPy, RightPx for 2D)\n"
              << std::endl;
}

Chao Liu's avatar
Chao Liu committed
38
ck::utils::conv::ConvParam parse_conv_param(int num_dim_spatial, int arg_idx, char* const argv[])
Chao Liu's avatar
Chao Liu committed
39
{
Chao Liu's avatar
add G  
Chao Liu committed
40
    const ck::index_t G = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
41
42
43
    const ck::index_t N = std::stoi(argv[arg_idx++]);
    const ck::index_t K = std::stoi(argv[arg_idx++]);
    const ck::index_t C = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
44

Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
    std::vector<ck::index_t> filter_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> input_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_strides(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_dilations(num_dim_spatial);
    std::vector<ck::index_t> input_left_pads(num_dim_spatial);
    std::vector<ck::index_t> input_right_pads(num_dim_spatial);
Chao Liu's avatar
Chao Liu committed
51
52
53

    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
54
        filter_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
55
    }
Chao Liu's avatar
Chao Liu committed
56

Chao Liu's avatar
Chao Liu committed
57
58
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
59
        input_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
60
    }
Chao Liu's avatar
Chao Liu committed
61

Chao Liu's avatar
Chao Liu committed
62
63
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
64
        conv_filter_strides[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
65
    }
Chao Liu's avatar
Chao Liu committed
66

Chao Liu's avatar
Chao Liu committed
67
68
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
69
        conv_filter_dilations[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
70
    }
Chao Liu's avatar
Chao Liu committed
71

Chao Liu's avatar
Chao Liu committed
72
73
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
74
        input_left_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
75
    }
Chao Liu's avatar
Chao Liu committed
76

Chao Liu's avatar
Chao Liu committed
77
78
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
79
        input_right_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
80
81
    }

Chao Liu's avatar
clean  
Chao Liu committed
82
    return ck::utils::conv::ConvParam{num_dim_spatial,
Chao Liu's avatar
add G  
Chao Liu committed
83
                                      G,
Chao Liu's avatar
clean  
Chao Liu committed
84
85
86
87
88
89
90
91
92
                                      N,
                                      K,
                                      C,
                                      filter_spatial_lengths,
                                      input_spatial_lengths,
                                      conv_filter_strides,
                                      conv_filter_dilations,
                                      input_left_pads,
                                      input_right_pads};
Chao Liu's avatar
Chao Liu committed
93
94
}

Chao Liu's avatar
Chao Liu committed
95
96
97
98
99
100
101
template <ck::index_t NDimSpatial,
          typename InDataType,
          typename WeiDataType,
          typename OutDataType,
          typename InElementOp,
          typename WeiElementOp,
          typename OutElementOp,
Chao Liu's avatar
Chao Liu committed
102
          typename DeviceConvNDFwdInstance>
Chao Liu's avatar
Chao Liu committed
103
104
105
106
107
108
109
110
111
112
int run_grouped_conv_fwd(bool do_verification,
                         int init_method,
                         bool time_kernel,
                         const ck::utils::conv::ConvParam& conv_param,
                         const HostTensorDescriptor& in_g_n_c_wis_desc,
                         const HostTensorDescriptor& wei_g_k_c_xs_desc,
                         const HostTensorDescriptor& out_g_n_k_wos_desc,
                         const InElementOp& in_element_op,
                         const WeiElementOp& wei_element_op,
                         const OutElementOp& out_element_op)
Chao Liu's avatar
Chao Liu committed
113
{
Chao Liu's avatar
add G  
Chao Liu committed
114
115
116
117
    Tensor<InDataType> in(in_g_n_c_wis_desc);
    Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
    Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
    Tensor<OutDataType> out_device(out_g_n_k_wos_desc);
Chao Liu's avatar
Chao Liu committed
118

Chao Liu's avatar
Chao Liu committed
119
120
121
    std::cout << "in: " << in.mDesc << std::endl;
    std::cout << "wei: " << wei.mDesc << std::endl;
    std::cout << "out: " << out_host.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
122
123
124
125
126

    switch(init_method)
    {
    case 0: break;
    case 1:
Chao Liu's avatar
Chao Liu committed
127
128
        in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
129
130
        break;
    default:
Chao Liu's avatar
Chao Liu committed
131
132
        in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
133
134
    }

Chao Liu's avatar
Chao Liu committed
135
136
137
    DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
    DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpaceSize());
    DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpaceSize());
Chao Liu's avatar
Chao Liu committed
138

Chao Liu's avatar
Chao Liu committed
139
140
    in_device_buf.ToDevice(in.mData.data());
    wei_device_buf.ToDevice(wei.mData.data());
Chao Liu's avatar
Chao Liu committed
141

Chao Liu's avatar
add G  
Chao Liu committed
142
143
144
145
146
147
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> b_g_k_c_xs_strides{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> e_g_n_k_wos_strides{};
Chao Liu's avatar
Chao Liu committed
148
149
150
151
152
153
154
    std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
    std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
    std::array<ck::index_t, NDimSpatial> input_left_pads{};
    std::array<ck::index_t, NDimSpatial> input_right_pads{};

    auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };

Chao Liu's avatar
add G  
Chao Liu committed
155
156
157
158
159
160
    copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
    copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
    copy(wei_g_k_c_xs_desc.GetLengths(), b_g_k_c_xs_lengths);
    copy(wei_g_k_c_xs_desc.GetStrides(), b_g_k_c_xs_strides);
    copy(out_g_n_k_wos_desc.GetLengths(), e_g_n_k_wos_lengths);
    copy(out_g_n_k_wos_desc.GetStrides(), e_g_n_k_wos_strides);
Chao Liu's avatar
Chao Liu committed
161
162
163
164
165
    copy(conv_param.conv_filter_strides_, conv_filter_strides);
    copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
    copy(conv_param.input_left_pads_, input_left_pads);
    copy(conv_param.input_right_pads_, input_right_pads);

Chao Liu's avatar
Chao Liu committed
166
    // do Conv
Chao Liu's avatar
Chao Liu committed
167
168
    auto conv     = DeviceConvNDFwdInstance{};
    auto invoker  = conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    auto argument = conv.MakeArgument(in_device_buf.GetDeviceBuffer(),
                                      wei_device_buf.GetDeviceBuffer(),
                                      std::array<const void*, 0>{},
                                      out_device_buf.GetDeviceBuffer(),
                                      a_g_n_c_wis_lengths,
                                      a_g_n_c_wis_strides,
                                      b_g_k_c_xs_lengths,
                                      b_g_k_c_xs_strides,
                                      std::array<std::array<ck::index_t, NDimSpatial + 3>, 0>{{}},
                                      std::array<std::array<ck::index_t, NDimSpatial + 3>, 0>{{}},
                                      e_g_n_k_wos_lengths,
                                      e_g_n_k_wos_strides,
                                      conv_filter_strides,
                                      conv_filter_dilations,
                                      input_left_pads,
                                      input_right_pads,
                                      in_element_op,
                                      wei_element_op,
                                      out_element_op);
Chao Liu's avatar
Chao Liu committed
188
189
190
191
192
193
194
195

    if(!conv.IsSupportedArgument(argument))
    {
        throw std::runtime_error(
            "wrong! device_conv with the specified compilation parameters does "
            "not support this Conv problem");
    }

Chao Liu's avatar
Chao Liu committed
196
    float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
Chao Liu's avatar
Chao Liu committed
197

Chao Liu's avatar
clean  
Chao Liu committed
198
199
    std::size_t flop      = conv_param.GetFlops();
    std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();
Chao Liu's avatar
Chao Liu committed
200

Chao Liu's avatar
Chao Liu committed
201
202
203
    float tflops     = static_cast<float>(flop) / 1.E9 / avg_time;
    float gb_per_sec = num_btype / 1.E6 / avg_time;
    std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
Chao Liu's avatar
Chao Liu committed
204
205
206
207
              << conv.GetTypeString() << std::endl;

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
208
209
210
211
212
213
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InDataType,
                                                                     WeiDataType,
                                                                     OutDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
Chao Liu's avatar
Chao Liu committed
214
                                                                     OutElementOp>();
Chao Liu's avatar
Chao Liu committed
215
216

        auto ref_invoker  = ref_conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
217
218
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
Chao Liu's avatar
Chao Liu committed
219
                                                  out_host,
Chao Liu's avatar
clean  
Chao Liu committed
220
221
222
223
                                                  conv_param.conv_filter_strides_,
                                                  conv_param.conv_filter_dilations_,
                                                  conv_param.input_left_pads_,
                                                  conv_param.input_right_pads_,
Chao Liu's avatar
Chao Liu committed
224
225
                                                  in_element_op,
                                                  wei_element_op,
Chao Liu's avatar
Chao Liu committed
226
                                                  out_element_op);
Chao Liu's avatar
Chao Liu committed
227
228
229

        ref_invoker.Run(ref_argument);

Chao Liu's avatar
Chao Liu committed
230
        out_device_buf.FromDevice(out_device.mData.data());
Chao Liu's avatar
Chao Liu committed
231

Chao Liu's avatar
Chao Liu committed
232
        return ck::utils::check_err(
Chao Liu's avatar
Chao Liu committed
233
                   out_device.mData, out_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
Chao Liu's avatar
Chao Liu committed
234
235
236
237
238
239
                   ? 0
                   : 1;
    }

    return 0;
}