conv_driver.cpp 7.8 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include "config.hpp"
7
8
9
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "print_array.hpp"
#include "print_sequence.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
12
#include "conv_common.hpp"
13
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
14
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
15
//#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
16
17
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn_padded.hpp"
18
19
20
//#include "device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp"
//#include "device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
//#include "device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
21
22
#include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
Jing Zhang's avatar
Jing Zhang committed
23
#include "device_convolution_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp"
24

Chao Liu's avatar
Chao Liu committed
25
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
26
{
Chao Liu's avatar
Chao Liu committed
27
28
    using namespace ck;

ChLiu Chao's avatar
ChLiu Chao committed
29
    // 1x1, 14x14
Jing Zhang's avatar
Jing Zhang committed
30
    constexpr index_t N  = 64;
ChLiu Chao's avatar
ChLiu Chao committed
31
32
33
    constexpr index_t C  =  1024;
    constexpr index_t HI =  14;
    constexpr index_t WI =  14;
Jing Zhang's avatar
Jing Zhang committed
34
    constexpr index_t K  =  1024;
Chao Liu's avatar
Chao Liu committed
35
36
37
38
39
40
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
41
42
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
43

Chao Liu's avatar
Chao Liu committed
44
45
    auto in_nchw_desc  = make_ConstantTensorDescriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_ConstantTensorDescriptor_packed(Sequence<K, C, Y, X>{});
Chao Liu's avatar
Chao Liu committed
46
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor_deprecated(
Chao Liu's avatar
Chao Liu committed
47
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
48

Chao Liu's avatar
Chao Liu committed
49
    ostream_ConstantTensorDescriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
Chao Liu's avatar
Chao Liu committed
50
    ostream_ConstantTensorDescriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
Chao Liu's avatar
Chao Liu committed
51
    ostream_ConstantTensorDescriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
52
53
54
55
    print_sequence("LeftPads", LeftPads{});
    print_sequence("RightPads", RightPads{});
    print_sequence("ConvStrides", ConvStrides{});
    print_sequence("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
56

Chao Liu's avatar
Chao Liu committed
57
58
    using in_data_t  = float;
    using out_data_t = float;
59
60
61
62
    Tensor<in_data_t> in_nchw(make_TensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_TensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_TensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_TensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
63

Chao Liu's avatar
Chao Liu committed
64
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
65

Chao Liu's avatar
Chao Liu committed
66
67
68
69
70
71
72
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
73
    index_t nrepeat      = atoi(argv[2]);
74
75
76

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
77
#if 0
78
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
79
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
80
81
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
82
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
83
84
85
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
86
#elif 1
87
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
88
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
89
#elif 0
90
91
92
93
94
95
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
96
#endif
97
    }
Chao Liu's avatar
Chao Liu committed
98

Chao Liu's avatar
Chao Liu committed
99
#if 0
Chao Liu's avatar
Chao Liu committed
100
    device_convolution_direct_v2_nchw_kcyx_nkhw
Chao Liu's avatar
Chao Liu committed
101
        (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
102
#elif 0
Chao Liu's avatar
Chao Liu committed
103
    device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(
Chao Liu's avatar
Chao Liu committed
104
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
105
#elif 0
106
107
108
109
110
111
    device_convolution_implicit_gemm_v1_chwn_cyxk_khwn_padded(in_nchw_desc,
                                                              in_nchw,
                                                              wei_kcyx_desc,
                                                              wei_kcyx,
                                                              out_nkhw_desc,
                                                              out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
112
113
                                                              LeftPads{},
                                                              RightPads{},
114
                                                              nrepeat);
Chao Liu's avatar
Chao Liu committed
115
#elif 0
Chao Liu's avatar
Chao Liu committed
116
    device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw(
Chao Liu's avatar
Chao Liu committed
117
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
118
#elif 0
Chao Liu's avatar
Chao Liu committed
119
    device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(
Chao Liu's avatar
Chao Liu committed
120
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
121
#elif 0
Chao Liu's avatar
Chao Liu committed
122
    device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(
Chao Liu's avatar
Chao Liu committed
123
        (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
124
#elif 0
Chao Liu's avatar
Chao Liu committed
125
126
127
128
129
130
131
132
    device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
                                                         ConvStrides{},
                                                         ConvDilations{},
133
134
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
135
                                                         nrepeat);
Chao Liu's avatar
Chao Liu committed
136
#elif 1
Jing Zhang's avatar
Jing Zhang committed
137
    device_convolution_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw(in_nchw_desc,
Chao Liu's avatar
Chao Liu committed
138
139
140
141
142
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
143
144
                                                         ConvStrides{},
                                                         ConvDilations{},
145
146
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
147
                                                         nrepeat);
148
#endif
Chao Liu's avatar
Chao Liu committed
149

150
    if(do_verification)
151
    {
ChLiu Chao's avatar
ChLiu Chao committed
152
#if 0
153
154
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
155
        {
Chao Liu's avatar
Chao Liu committed
156
157
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
158
159
        }
        else
Chao Liu's avatar
Chao Liu committed
160
#endif
161
        {
162
163
164
165
166
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
167
168
                                    LeftPads{},
                                    RightPads{});
169
170
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
171

Chao Liu's avatar
Chao Liu committed
172
#if 0
173
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
174
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
175
176
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
177
#endif
178
    }
179
}