".github/vscode:/vscode.git/clone" did not exist on "6c60e430eeb050535aee8854c6ad649b162b62a9"
conv_driver.cpp 20.3 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include "config.hpp"
7
8
9
#include "ConstantTensorDescriptor_deprecated.hpp"
#include "print_array.hpp"
#include "print_sequence.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
12
#include "conv_common.hpp"
13
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
14
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
15
//#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
16
17
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"
//#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn_padded.hpp"
18
19
20
//#include "device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp"
//#include "device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
//#include "device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
21
22
#include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
23

Chao Liu's avatar
Chao Liu committed
24
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
25
{
Chao Liu's avatar
Chao Liu committed
26
27
    using namespace ck;

Chao Liu's avatar
Chao Liu committed
28
#if 0
ChLiu Chao's avatar
ChLiu Chao committed
29
    // 1x1, 17x17
Chao Liu's avatar
Chao Liu committed
30
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
31
32
33
34
    constexpr index_t C  = 1024;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 256;
35
36
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
37
38

    using ConvStrides   = Sequence<1, 1>;
39
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
40

Chao Liu's avatar
Chao Liu committed
41
42
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
43
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
44
    // 1x1, 8x8
45
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  =  96;
    constexpr index_t HI =  35;
    constexpr index_t WI =  35;
    constexpr index_t K  =  96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
107
108
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
ChLiu Chao's avatar
ChLiu Chao committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
    constexpr index_t K  = 320;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 224;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 224;
125
126
127
128
129
130
131
132
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
Chao Liu's avatar
Chao Liu committed
133
#elif 1
ChLiu Chao's avatar
ChLiu Chao committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  =   3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 147x147
    // v4r4@v100 xx.xx%, cudnn@v100 xx.xx%
    constexpr index_t N  = 128;
    constexpr index_t C  =  32;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    // v4r4@v100 xx.xx%, cudnn@v100 xx.xx%
    constexpr index_t N  = 128;
    constexpr index_t C  =  32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
172
173
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
174

Chao Liu's avatar
Chao Liu committed
175
    using ConvStrides   = Sequence<1, 1>;
176
177
    using ConvDilations = Sequence<1, 1>;

178
179
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
ChLiu Chao's avatar
ChLiu Chao committed
180
181
#elif 0
    // 3x3, 17x17, stride 2
182
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
183
184
185
186
    constexpr index_t C  = 192;
    constexpr index_t HI =  17;
    constexpr index_t WI =  17;
    constexpr index_t K  = 192;
187
188
189
190
191
192
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

193
194
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
195
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
196
197
198
199
200
201
    // 1x1, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 384;
    constexpr index_t HI =  35;
    constexpr index_t WI =  35;
    constexpr index_t K  =  96;
Chao Liu's avatar
Chao Liu committed
202
203
204
205
206
207
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
208
209
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
210
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
211
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
212
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
213
214
215
    constexpr index_t C  = 384;
    constexpr index_t HI =  35;
    constexpr index_t WI =  35;
Chao Liu's avatar
Chao Liu committed
216
    constexpr index_t K  = 384;
ChLiu Chao's avatar
ChLiu Chao committed
217
218
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
219

ChLiu Chao's avatar
ChLiu Chao committed
220
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
221
222
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
223
224
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
225
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
226
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
227
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
228
229
230
231
    constexpr index_t C  = 384;
    constexpr index_t HI =   8;
    constexpr index_t WI =   8;
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
232
    constexpr index_t Y  = 1;
ChLiu Chao's avatar
ChLiu Chao committed
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
#elif 0
    // 3x1, 8x8
    constexpr index_t N  = 128;
    constexpr index_t C  = 448;
    constexpr index_t HI =   8;
    constexpr index_t WI =   8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
248
249
250
251
252
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
253
254
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
Chao Liu's avatar
Chao Liu committed
255
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
256
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
257
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
258
259
260
261
262
    constexpr index_t C  = 448;
    constexpr index_t HI =   8;
    constexpr index_t WI =   8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
263
264
265
266
267
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 1
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  =  64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  =  96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
283
284
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
285
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
286
287
    // 7x1, 73x73
    // v44@v100 xx.xx%, cudnn@v100 xx.xx%
Chao Liu's avatar
Chao Liu committed
288
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
289
290
291
292
293
    constexpr index_t C  =  64;
    constexpr index_t HI =  73;
    constexpr index_t WI =  73;
    constexpr index_t K  =  64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
294
295
296
297
298
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 1
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  =  64;
    constexpr index_t HI =  73;
    constexpr index_t WI =  73;
    constexpr index_t K  =  96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
314
315
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
316
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
317
318
319
320
321
322
    // 1x1, 14x14, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  =  1024;
    constexpr index_t HI =  14;
    constexpr index_t WI =  14;
    constexpr index_t K  =  2048;
Chao Liu's avatar
Chao Liu committed
323
324
325
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

ChLiu Chao's avatar
ChLiu Chao committed
326
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
327
328
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
329
330
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
331
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
332
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
333
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
334
335
336
337
    constexpr index_t C  =  1024;
    constexpr index_t HI =  14;
    constexpr index_t WI =  14;
    constexpr index_t K  =  256;
Chao Liu's avatar
Chao Liu committed
338
339
340
341
342
343
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
344
345
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
346
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
347
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
348
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
349
350
351
352
    constexpr index_t C  =  1024;
    constexpr index_t HI =  14;
    constexpr index_t WI =  14;
    constexpr index_t K  =  512;
Chao Liu's avatar
Chao Liu committed
353
354
355
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

ChLiu Chao's avatar
ChLiu Chao committed
356
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
357
358
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
359
360
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
361
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
362
    // 3x3, 28x28
Chao Liu's avatar
Chao Liu committed
363
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
364
365
366
    constexpr index_t C  = 128;
    constexpr index_t HI =  28;
    constexpr index_t WI =  28;
Chao Liu's avatar
Chao Liu committed
367
    constexpr index_t K  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
368
369
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
370
371
372
373

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
374
375
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
376
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
377
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
378
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
    constexpr index_t C  = 256;
    constexpr index_t HI =  14;
    constexpr index_t WI =  14;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 1
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI =  56;
    constexpr index_t WI =  56;
Chao Liu's avatar
Chao Liu committed
397
398
399
400
    constexpr index_t K  = 128;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

ChLiu Chao's avatar
ChLiu Chao committed
401
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
402
403
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
404
405
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
406
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
407
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
408
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
409
410
411
412
413
414
    constexpr index_t C  =   3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;
Chao Liu's avatar
Chao Liu committed
415

ChLiu Chao's avatar
ChLiu Chao committed
416
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
417
418
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
419
420
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
421
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
422
    // 1x1, 28x28, stride = 2
Chao Liu's avatar
Chao Liu committed
423
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
424
425
426
427
    constexpr index_t C  = 512;
    constexpr index_t HI =  28;
    constexpr index_t WI =  28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
428
429
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
430

ChLiu Chao's avatar
ChLiu Chao committed
431
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
432
433
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
434
435
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
436
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
437
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
438
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
439
440
441
442
443
444
    constexpr index_t C  = 512;
    constexpr index_t HI =  28;
    constexpr index_t WI =  28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
445
446
447
448
449
450

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
451
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
452
    // 1x1, 7x7
453
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
454
455
456
457
458
459
    constexpr index_t C  = 512;
    constexpr index_t HI =   7;
    constexpr index_t WI =   7;
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
460
461
462
463

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
464
465
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
466
#elif 0
ChLiu Chao's avatar
ChLiu Chao committed
467
    // 3x3, 7x7
468
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
469
470
471
472
473
474
    constexpr index_t C  = 512;
    constexpr index_t HI =   7;
    constexpr index_t WI =   7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
475
476
477
478

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
479
480
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
481
#elif 1
ChLiu Chao's avatar
ChLiu Chao committed
482
    // 1x1, 56x56
483
    constexpr index_t N  = 128;
ChLiu Chao's avatar
ChLiu Chao committed
484
485
486
487
488
    constexpr index_t C  =  64;
    constexpr index_t HI =  56;
    constexpr index_t WI =  56;
    constexpr index_t K  =  64;
    constexpr index_t Y  = 1;
489
    constexpr index_t X  = 1;
490
491
492
493

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

ChLiu Chao's avatar
ChLiu Chao committed
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 1
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  =  64;
    constexpr index_t HI =  56;
    constexpr index_t WI =  56;
    constexpr index_t K  =  64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
511
#endif
Chao Liu's avatar
Chao Liu committed
512

Chao Liu's avatar
Chao Liu committed
513
514
    auto in_nchw_desc  = make_ConstantTensorDescriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_ConstantTensorDescriptor_packed(Sequence<K, C, Y, X>{});
Chao Liu's avatar
Chao Liu committed
515
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor_deprecated(
Chao Liu's avatar
Chao Liu committed
516
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
517

Chao Liu's avatar
Chao Liu committed
518
    ostream_ConstantTensorDescriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
Chao Liu's avatar
Chao Liu committed
519
    ostream_ConstantTensorDescriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
Chao Liu's avatar
Chao Liu committed
520
    ostream_ConstantTensorDescriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
521
522
523
524
    print_sequence("LeftPads", LeftPads{});
    print_sequence("RightPads", RightPads{});
    print_sequence("ConvStrides", ConvStrides{});
    print_sequence("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
525

Chao Liu's avatar
Chao Liu committed
526
527
    using in_data_t  = float;
    using out_data_t = float;
528
529
530
531
    Tensor<in_data_t> in_nchw(make_TensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_TensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_TensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_TensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
532

Chao Liu's avatar
Chao Liu committed
533
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
534

Chao Liu's avatar
Chao Liu committed
535
536
537
538
539
540
541
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
542
    index_t nrepeat      = atoi(argv[2]);
543
544
545

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
546
#if 0
547
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
548
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
549
550
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
551
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
552
553
554
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
555
#elif 1
556
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
557
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
558
#elif 0
559
560
561
562
563
564
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
565
#endif
566
    }
Chao Liu's avatar
Chao Liu committed
567

Chao Liu's avatar
Chao Liu committed
568
#if 0
Chao Liu's avatar
Chao Liu committed
569
    device_convolution_direct_v2_nchw_kcyx_nkhw
Chao Liu's avatar
Chao Liu committed
570
        (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
571
#elif 0
Chao Liu's avatar
Chao Liu committed
572
    device_convolution_implicit_gemm_v1_chwn_cyxk_khwn(
Chao Liu's avatar
Chao Liu committed
573
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
574
#elif 0
575
576
577
578
579
580
    device_convolution_implicit_gemm_v1_chwn_cyxk_khwn_padded(in_nchw_desc,
                                                              in_nchw,
                                                              wei_kcyx_desc,
                                                              wei_kcyx,
                                                              out_nkhw_desc,
                                                              out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
581
582
                                                              LeftPads{},
                                                              RightPads{},
583
                                                              nrepeat);
Chao Liu's avatar
Chao Liu committed
584
#elif 0
Chao Liu's avatar
Chao Liu committed
585
    device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw(
Chao Liu's avatar
Chao Liu committed
586
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
587
#elif 0
Chao Liu's avatar
Chao Liu committed
588
    device_convolution_implicit_gemm_v2_chwn_cyxk_khwn(
Chao Liu's avatar
Chao Liu committed
589
        in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
Chao Liu's avatar
Chao Liu committed
590
#elif 0
Chao Liu's avatar
Chao Liu committed
591
    device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(
Chao Liu's avatar
Chao Liu committed
592
        (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
593
#elif 0
Chao Liu's avatar
Chao Liu committed
594
595
596
597
598
599
600
601
    device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
                                                         ConvStrides{},
                                                         ConvDilations{},
602
603
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
604
                                                         nrepeat);
Chao Liu's avatar
Chao Liu committed
605
#elif 1
Chao Liu's avatar
Chao Liu committed
606
607
608
609
610
611
    device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
612
613
                                                         ConvStrides{},
                                                         ConvDilations{},
614
615
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
616
                                                         nrepeat);
617
#endif
Chao Liu's avatar
Chao Liu committed
618

619
    if(do_verification)
620
    {
ChLiu Chao's avatar
ChLiu Chao committed
621
#if 0
622
623
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
624
        {
Chao Liu's avatar
Chao Liu committed
625
626
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
627
628
        }
        else
Chao Liu's avatar
Chao Liu committed
629
#endif
630
        {
631
632
633
634
635
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
636
637
                                    LeftPads{},
                                    RightPads{});
638
639
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
640

Chao Liu's avatar
Chao Liu committed
641
#if 0
642
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
643
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
644
645
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
646
#endif
647
    }
648
}