convnd_fwd_common.hpp 6.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

#include <cstdlib>
#include <iostream>
#include <numeric>
#include <type_traits>

#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"

13
14
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
#include "ck/library/utility/array.hpp"
15
#include "ck/library/utility/check_err.hpp"
16
17
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"

void print_helper_msg()
{
    std::cout << "arg1: verification (0=no, 1=yes)\n"
              << "arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n"
              << "arg3: time kernel (0=no, 1=yes)\n"
              << ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl;
}

template <ck::index_t NDimSpatial,
          typename InDataType,
          typename WeiDataType,
          typename OutDataType,
          typename InElementOp,
          typename WeiElementOp,
          typename OutElementOp,
          typename DeviceConvNDFwdInstance>
38
39
40
41
42
43
44
45
46
47
bool run_grouped_conv_fwd(bool do_verification,
                          int init_method,
                          bool time_kernel,
                          const ck::utils::conv::ConvParam& conv_param,
                          const HostTensorDescriptor& in_g_n_c_wis_desc,
                          const HostTensorDescriptor& wei_g_k_c_xs_desc,
                          const HostTensorDescriptor& out_g_n_k_wos_desc,
                          const InElementOp& in_element_op,
                          const WeiElementOp& wei_element_op,
                          const OutElementOp& out_element_op)
48
49
50
51
52
53
{
    Tensor<InDataType> in(in_g_n_c_wis_desc);
    Tensor<WeiDataType> wei(wei_g_k_c_xs_desc);
    Tensor<OutDataType> out_host(out_g_n_k_wos_desc);
    Tensor<OutDataType> out_device(out_g_n_k_wos_desc);

54
55
56
    std::cout << "in: " << in.GetDesc() << std::endl;
    std::cout << "wei: " << wei.GetDesc() << std::endl;
    std::cout << "out: " << out_host.GetDesc() << std::endl;
57
58
59
60
61
62
63
64
65
66
67
68
69

    switch(init_method)
    {
    case 0: break;
    case 1:
        in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
        wei.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
        break;
    default:
        in.GenerateTensorValue(GeneratorTensor_3<InDataType>{0.0, 1.0});
        wei.GenerateTensorValue(GeneratorTensor_3<WeiDataType>{-0.5, 0.5});
    }

70
71
72
73
74
75
76
77
    DeviceMem in_device_buf(in.GetMemorySize());
    DeviceMem wei_device_buf(wei.GetMemorySize());
    DeviceMem out_device_buf(out_device.GetMemorySize());

    in_device_buf.ToDevice(in.data());
    wei_device_buf.ToDevice(wei.data());

    using ck::utils::empty_array, ck::utils::to_array;
78
79
80
81
82
83

    // do Conv
    auto conv     = DeviceConvNDFwdInstance{};
    auto invoker  = conv.MakeInvoker();
    auto argument = conv.MakeArgument(in_device_buf.GetDeviceBuffer(),
                                      wei_device_buf.GetDeviceBuffer(),
84
                                      empty_array(),
85
                                      out_device_buf.GetDeviceBuffer(),
86
87
88
89
90
91
92
93
94
95
96
97
                                      to_array(in_g_n_c_wis_desc.GetLengths()),
                                      to_array(in_g_n_c_wis_desc.GetStrides()),
                                      to_array(wei_g_k_c_xs_desc.GetLengths()),
                                      to_array(wei_g_k_c_xs_desc.GetStrides()),
                                      empty_array(),
                                      empty_array(),
                                      to_array(out_g_n_k_wos_desc.GetLengths()),
                                      to_array(out_g_n_k_wos_desc.GetStrides()),
                                      to_array(conv_param.conv_filter_strides_),
                                      to_array(conv_param.conv_filter_dilations_),
                                      to_array(conv_param.input_left_pads_),
                                      to_array(conv_param.input_right_pads_),
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
                                      in_element_op,
                                      wei_element_op,
                                      out_element_op);

    if(!conv.IsSupportedArgument(argument))
    {
        throw std::runtime_error(
            "wrong! device_conv with the specified compilation parameters does "
            "not support this Conv problem");
    }

    float avg_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});

    std::size_t flop      = conv_param.GetFlops();
    std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();

    float tflops     = static_cast<float>(flop) / 1.E9 / avg_time;
    float gb_per_sec = num_btype / 1.E6 / avg_time;
    std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
              << conv.GetTypeString() << std::endl;

    if(do_verification)
    {
        auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
                                                                     InDataType,
                                                                     WeiDataType,
                                                                     OutDataType,
                                                                     InElementOp,
                                                                     WeiElementOp,
                                                                     OutElementOp>();

        auto ref_invoker  = ref_conv.MakeInvoker();
        auto ref_argument = ref_conv.MakeArgument(in,
                                                  wei,
                                                  out_host,
                                                  conv_param.conv_filter_strides_,
                                                  conv_param.conv_filter_dilations_,
                                                  conv_param.input_left_pads_,
                                                  conv_param.input_right_pads_,
                                                  in_element_op,
                                                  wei_element_op,
                                                  out_element_op);

        ref_invoker.Run(ref_argument);

143
        out_device_buf.FromDevice(out_device.data());
144
145

        return ck::utils::check_err(
146
            out_device, out_host, "Error: incorrect results!", 1e-5f, 1e-4f);
147
148
    }

149
    return true;
150
}