convnd_fwd_common.hpp 9.29 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "ck/library/utility/convolution_parameter.hpp"
Chao Liu's avatar
clean  
Chao Liu committed
18
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
Chao Liu's avatar
Chao Liu committed
19
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
Chao Liu's avatar
Chao Liu committed
20

Chao Liu's avatar
Chao Liu committed
21
22
23
24
25
26
27
28
29
void print_helper_msg()
{
    std::cout << "arg1: verification (0=no, 1=yes)\n"
              << "arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"
              << "arg3: time kernel (0=no, 1=yes)\n"
              << "arg4: N spatial dimensions (default 2)\n"
              << "Following arguments (depending on number of spatial dims):\n"
              << " N, K, C, \n"
              << " <filter spatial dimensions>, (ie Y, X for 2D)\n"
Chao Liu's avatar
Chao Liu committed
30
              << " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
Chao Liu's avatar
Chao Liu committed
31
32
33
34
35
36
37
              << " <strides>, (ie Sy, Sx for 2D)\n"
              << " <dilations>, (ie Dy, Dx for 2D)\n"
              << " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
              << " <right padding>, (ie RightPy, RightPx for 2D)\n"
              << std::endl;
}

Chao Liu's avatar
Chao Liu committed
38
ck::tensor_operation::device::ConvParams
Chao Liu's avatar
Chao Liu committed
39
40
parse_conv_params(int num_dim_spatial, int arg_idx, char* const argv[])
{
Chao Liu's avatar
Chao Liu committed
41
42
43
    const ck::index_t N = std::stoi(argv[arg_idx++]);
    const ck::index_t K = std::stoi(argv[arg_idx++]);
    const ck::index_t C = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
44

Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
    std::vector<ck::index_t> filter_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> input_spatial_lengths(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_strides(num_dim_spatial);
    std::vector<ck::index_t> conv_filter_dilations(num_dim_spatial);
    std::vector<ck::index_t> input_left_pads(num_dim_spatial);
    std::vector<ck::index_t> input_right_pads(num_dim_spatial);
Chao Liu's avatar
Chao Liu committed
51
52
53

    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
54
        filter_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
55
    }
Chao Liu's avatar
Chao Liu committed
56

Chao Liu's avatar
Chao Liu committed
57
58
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
59
        input_spatial_lengths[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
60
    }
Chao Liu's avatar
Chao Liu committed
61

Chao Liu's avatar
Chao Liu committed
62
63
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
64
        conv_filter_strides[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
65
    }
Chao Liu's avatar
Chao Liu committed
66

Chao Liu's avatar
Chao Liu committed
67
68
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
69
        conv_filter_dilations[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
70
    }
Chao Liu's avatar
Chao Liu committed
71

Chao Liu's avatar
Chao Liu committed
72
73
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
74
        input_left_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
75
    }
Chao Liu's avatar
Chao Liu committed
76

Chao Liu's avatar
Chao Liu committed
77
78
    for(int i = 0; i < num_dim_spatial; ++i)
    {
Chao Liu's avatar
Chao Liu committed
79
        input_right_pads[i] = std::stoi(argv[arg_idx++]);
Chao Liu's avatar
Chao Liu committed
80
81
    }

Chao Liu's avatar
Chao Liu committed
82
83
84
85
86
87
88
89
90
91
    return ck::tensor_operation::device::ConvParams{num_dim_spatial,
                                                    N,
                                                    K,
                                                    C,
                                                    filter_spatial_lengths,
                                                    input_spatial_lengths,
                                                    conv_filter_strides,
                                                    conv_filter_dilations,
                                                    input_left_pads,
                                                    input_right_pads};
Chao Liu's avatar
Chao Liu committed
92
93
}

Chao Liu's avatar
Chao Liu committed
94
// FIXME: current implementation only support NCHW/NHWC layout
Chao Liu's avatar
Chao Liu committed
95
template <ck::index_t NDimSpatial,
Chao Liu's avatar
Chao Liu committed
96
97
98
          typename InLayout,
          typename WeiLayout,
          typename OutLayout,
Chao Liu's avatar
Chao Liu committed
99
100
101
102
103
104
          typename InDataType,
          typename WeiDataType,
          typename OutDataType,
          typename InElementOp,
          typename WeiElementOp,
          typename OutElementOp,
Chao Liu's avatar
Chao Liu committed
105
          typename DeviceConvNDFwdInstance>
Chao Liu's avatar
Chao Liu committed
106
107
108
109
110
111
112
int run_conv_fwd(bool do_verification,
                 int init_method,
                 bool time_kernel,
                 const ck::tensor_operation::device::ConvParams& params,
                 const InElementOp& in_element_op,
                 const WeiElementOp& wei_element_op,
                 const OutElementOp& out_element_op)
Chao Liu's avatar
Chao Liu committed
113
{
Chao Liu's avatar
clean  
Chao Liu committed
114
115
116
    const auto in_desc  = ck::utils::conv::get_input_host_tensor_descriptor<InLayout>(conv_param);
    const auto wei_desc = ck::utils::conv::get_weight_host_tensor_descriptor<WeiLayout>(conv_param);
    const auto out_desc = ck::utils::conv::get_output_host_tensor_descriptor<OutLayout>(conv_param);
Chao Liu's avatar
Chao Liu committed
117
118
119
120
121

    Tensor<InDataType> in(in_desc);
    Tensor<WeiDataType> wei(wei_desc);
    Tensor<OutDataType> out_host(out_desc);
    Tensor<OutDataType> out_device(out_desc);
Chao Liu's avatar
Chao Liu committed
122

Chao Liu's avatar
Chao Liu committed
123
124
125
    std::cout << "in: " << in.mDesc << std::endl;
    std::cout << "wei: " << wei.mDesc << std::endl;
    std::cout << "out: " << out_host.mDesc << std::endl;
Chao Liu's avatar
Chao Liu committed
126
127
128
129
130

    switch(init_method)
    {
    case 0: break;
    case 1:
Chao Liu's avatar
Chao Liu committed
131
132
        in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
Chao Liu's avatar
Chao Liu committed
133
134
        break;
    default:
Chao Liu's avatar
Chao Liu committed
135
136
        in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
Chao Liu's avatar
Chao Liu committed
137
138
    }

Chao Liu's avatar
Chao Liu committed
139
140
141
    DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpace());
    DeviceMem wei_device_buf(sizeof(WeiDataType) * wei.mDesc.GetElementSpace());
    DeviceMem out_device_buf(sizeof(OutDataType) * out_device.mDesc.GetElementSpace());
Chao Liu's avatar
Chao Liu committed
142

Chao Liu's avatar
Chao Liu committed
143
144
    in_device_buf.ToDevice(in.mData.data());
    wei_device_buf.ToDevice(wei.mData.data());
Chao Liu's avatar
Chao Liu committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

    // do GEMM
    auto conv     = DeviceConvNDFwdInstance{};
    auto invoker  = conv.MakeInvoker();
    auto argument = conv.MakeArgument(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
                                      static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
                                      static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
                                      params.N_,
                                      params.K_,
                                      params.C_,
                                      params.input_spatial_lengths_,
                                      params.filter_spatial_lengths_,
                                      params.GetOutputSpatialLengths(),
                                      params.conv_filter_strides_,
                                      params.conv_filter_dilations_,
                                      params.input_left_pads_,
                                      params.input_right_pads_,
Chao Liu's avatar
Chao Liu committed
162
163
164
                                      in_element_op,
                                      wei_element_op,
                                      out_element_op);
Chao Liu's avatar
Chao Liu committed
165
166
167
168
169
170
171
172

    if(!conv.IsSupportedArgument(argument))
    {
        throw std::runtime_error(
            "wrong! device_conv with the specified compilation parameters does "
            "not support this Conv problem");
    }

Chao Liu's avatar
Chao Liu committed
173
    float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
Chao Liu's avatar
Chao Liu committed
174
175
176
177

    std::size_t flop      = params.GetFlops();
    std::size_t num_btype = params.GetByte<InDataType, WeiDataType, OutDataType>();

Chao Liu's avatar
Chao Liu committed
178
179
180
    float tflops     = static_cast<float>(flop) / 1.E9 / avg_time;
    float gb_per_sec = num_btype / 1.E6 / avg_time;
    std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
Chao Liu's avatar
Chao Liu committed
181
182
183
184
              << conv.GetTypeString() << std::endl;

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
185
186
187
188
189
190
191
192
193
194
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InLayout,
                                                                     WeiLayout,
                                                                     OutLayout,
                                                                     InDataType,
                                                                     WeiDataType,
                                                                     OutDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
                                                                     OutElementOp>();
Chao Liu's avatar
Chao Liu committed
195
196

        auto ref_invoker  = ref_conv.MakeInvoker();
Chao Liu's avatar
Chao Liu committed
197
198
199
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
                                                  out_host,
Chao Liu's avatar
Chao Liu committed
200
201
202
203
                                                  params.conv_filter_strides_,
                                                  params.conv_filter_dilations_,
                                                  params.input_left_pads_,
                                                  params.input_right_pads_,
Chao Liu's avatar
Chao Liu committed
204
205
206
                                                  in_element_op,
                                                  wei_element_op,
                                                  out_element_op);
Chao Liu's avatar
Chao Liu committed
207
208
209

        ref_invoker.Run(ref_argument);

Chao Liu's avatar
Chao Liu committed
210
        out_device_buf.FromDevice(out_device.mData.data());
Chao Liu's avatar
Chao Liu committed
211

Chao Liu's avatar
Chao Liu committed
212
213
        return ck::utils::check_err(
                   out_host.mData, out_device.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
Chao Liu's avatar
Chao Liu committed
214
215
216
217
218
219
                   ? 0
                   : 1;
    }

    return 0;
}