conv_bwd_data_driver.cpp 9.04 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
7
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
10
11
12
13
14
#include "device_tensor.hpp"
#include "conv_common.hpp"
#include "host_conv_bwd_data.hpp"
#include "device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw.hpp"
15
#include "device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
16
#include "device_convolution_backward_data_implicit_gemm_v5r1_nhwc_kyxc_nhwk.hpp"
Chao Liu's avatar
Chao Liu committed
17
18
19

int main(int argc, char* argv[])
{
20
    using namespace launcher;
Chao Liu's avatar
Chao Liu committed
21

zjing14's avatar
zjing14 committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#if 1
    // 1x1 filter, 14x14 image
    constexpr index_t N  = 1;
    constexpr index_t C  = 256;
    constexpr index_t HI = 1;
    constexpr index_t WI = 128;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
38
39
40
41
42
43
44
    constexpr index_t N  = 64;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
45

Chao Liu's avatar
Chao Liu committed
46
    using ConvStrides   = Sequence<1, 1>;
47
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
48
49
50
51
52
53
54
55
56

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 34x34
    constexpr index_t N  = 64;
    constexpr index_t C  = 256;
    constexpr index_t HI = 34;
    constexpr index_t WI = 34;
57
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
58
59
60
61
62
63
64
65
66
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
67
68
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
69
    constexpr index_t C  = 128;
70
71
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
Chao Liu's avatar
Chao Liu committed
72
    constexpr index_t K  = 128;
73
74
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
75
76
77
78

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

79
80
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
81
82
#elif 0
    // 1x1 filter, 8x8 image
83
84
    constexpr index_t N  = 256;
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
85
86
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
87
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
88
89
90
91
92
93
94
95
96
97
98
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1 filter, 7x7 image
    constexpr index_t N  = 128;
99
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
100
101
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
102
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
103
104
105
106
107
108
109
110
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
zjing14's avatar
zjing14 committed
111
#elif 1
Chao Liu's avatar
Chao Liu committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    // 1x1 filter, 14x14 image
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1 filter, 28x28 image
    constexpr index_t N  = 128;
129
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1 filter, 17x17 input
    constexpr index_t N  = 128;
144
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
145
146
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
147
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
148
149
150
151
152
153
154
155
156
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
157
    // 5x5 filter, 2x2 pad, 7x7 input
Chao Liu's avatar
Chao Liu committed
158
    constexpr index_t N  = 128;
159
160
161
162
163
164
    constexpr index_t C  = 1024;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 1024;
    constexpr index_t Y  = 5;
    constexpr index_t X  = 5;
Chao Liu's avatar
Chao Liu committed
165
166
167
168

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

169
170
    using LeftPads  = Sequence<2, 2>;
    using RightPads = Sequence<2, 2>;
zjing14's avatar
zjing14 committed
171
#elif 0
172
    // 1x7 filter, 0x3 pad, 17x17 input
Chao Liu's avatar
Chao Liu committed
173
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
174
    constexpr index_t C  = 128;
175
176
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
Chao Liu's avatar
Chao Liu committed
177
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
178
    constexpr index_t Y  = 1;
179
    constexpr index_t X  = 7;
Chao Liu's avatar
Chao Liu committed
180
181
182
183

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

184
185
    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
Chao Liu's avatar
Chao Liu committed
186
#elif 0
187
    // 7x1 filter, 3x0 pad, 17x17 input
Chao Liu's avatar
Chao Liu committed
188
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
189
    constexpr index_t C  = 256;
190
191
192
193
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 1024;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
194
195
196
197
198
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

199
200
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
Chao Liu's avatar
Chao Liu committed
201
#elif 1
Chao Liu's avatar
Chao Liu committed
202
203
    // 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
204
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
205
206
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
207
    constexpr index_t K  = 1280;
Chao Liu's avatar
Chao Liu committed
208
209
210
211
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
212
    using ConvDilations = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
213
214
215
216
217
218
219
220
221
222

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#endif

    constexpr auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    constexpr auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    constexpr auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});

Chao Liu's avatar
Chao Liu committed
223
224
225
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
226
227
228
229
230
    print_array("LeftPads", LeftPads{});
    print_array("LeftPads", LeftPads{});
    print_array("RightPads", RightPads{});
    print_array("ConvStrides", ConvStrides{});
    print_array("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
231

Chao Liu's avatar
Chao Liu committed
232
233
234
235
    Tensor<float> in_nchw_device(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<float> in_nchw_host(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<float> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<float> out_nkhw(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

    std::size_t num_thread = std::thread::hardware_concurrency();

    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
    std::size_t nrepeat  = atoi(argv[2]);

    if(do_verification)
    {
#if 0
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{1}, num_thread);
        out_nkhw.GenerateTensorValue(GeneratorTensor_1{1}, num_thread);
#else
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
        out_nkhw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
#endif
    }

Chao Liu's avatar
Chao Liu committed
259
#if 0
Chao Liu's avatar
Chao Liu committed
260
    device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw
261
#elif 0
Chao Liu's avatar
Chao Liu committed
262
    device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw
zjing14's avatar
zjing14 committed
263
#elif 0
264
    device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw
Chao Liu's avatar
Chao Liu committed
265
266
#elif 1
    device_convolution_backward_data_implicit_gemm_v5r1_nhwc_kyxc_nhwk
Chao Liu's avatar
Chao Liu committed
267
#endif
Chao Liu's avatar
Chao Liu committed
268
269
270
271
272
273
274
275
276
277
278
    (in_nchw_desc,
     in_nchw_device,
     wei_kcyx_desc,
     wei_kcyx,
     out_nkhw_desc,
     out_nkhw,
     ConvStrides{},
     ConvDilations{},
     LeftPads{},
     RightPads{},
     nrepeat);
Chao Liu's avatar
Chao Liu committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299

    if(do_verification)
    {
        host_direct_convolution_backward_data(in_nchw_host,
                                              wei_kcyx,
                                              out_nkhw,
                                              ConvStrides{},
                                              ConvDilations{},
                                              LeftPads{},
                                              RightPads{});

        check_error(in_nchw_host, in_nchw_device);

#if 0
        LogRange(std::cout << "out_nkhw : ", out_nkhw.mData, ",") << std::endl;
        LogRange(std::cout << "wei_kcyx : ", wei_kcyx.mData, ",") << std::endl;
        LogRange(std::cout << "in_nchw_host : ", in_nchw_host.mData, ",") << std::endl;
        LogRange(std::cout << "in_nchw_device : ", in_nchw_device.mData, ",") << std::endl;
#endif
    }
}