conv_driver.cpp 23.2 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
16
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
17
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
18

Chao Liu's avatar
Chao Liu committed
19
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
20
{
Chao Liu's avatar
Chao Liu committed
21
22
    using namespace ck;

23
#if 0
Chao Liu's avatar
Chao Liu committed
24
    constexpr index_t N  = 1;
Chao Liu's avatar
Chao Liu committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
    constexpr index_t C  = 4;
    constexpr index_t HI = 1080;
    constexpr index_t WI = 1920;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 4;
Chao Liu's avatar
Chao Liu committed
40
41
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
Chao Liu's avatar
Chao Liu committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 4;
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
test  
Chao Liu committed
65
#elif 0
Chao Liu's avatar
Chao Liu committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    constexpr index_t N  = 1;
    constexpr index_t C  = 2;
    constexpr index_t HI = 1080;
    constexpr index_t WI = 1920;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 4;
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
test  
Chao Liu committed
93
#elif 0
Chao Liu's avatar
Chao Liu committed
94
95
96
97
98
    constexpr index_t N  = 1;
    constexpr index_t C  = 4;
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
Chao Liu's avatar
Chao Liu committed
99
100
101
102
103
104
105
106
107
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
108
109
110
111
112
113
114
115
116
117
118
119
120
121
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
122
#elif 0
123
124
125
126
127
128
129
130
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
131

132
    using ConvStrides   = Sequence<2, 2>;
133
134
135
136
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
137
#elif 1
Chao Liu's avatar
Chao Liu committed
138
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
139
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
140
141
142
143
144
145
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
146

Chao Liu's avatar
Chao Liu committed
147
    using ConvStrides   = Sequence<2, 2>;
148
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
149

Chao Liu's avatar
Chao Liu committed
150
151
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
152
#elif 1
Chao Liu's avatar
Chao Liu committed
153
    // 1x1, 8x8
154
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
188
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
212
#elif 1
Chao Liu's avatar
Chao Liu committed
213
214
215
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
216
217
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
218
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
219
220
221
222
223
224
225
226
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
227
#elif 0
Chao Liu's avatar
Chao Liu committed
228
229
230
231
232
233
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
234
235
236
237
238
239
240
241
242
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
243
244
245
246
247
248
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
249
250
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
251

Chao Liu's avatar
Chao Liu committed
252
    using ConvStrides   = Sequence<2, 2>;
253
254
    using ConvDilations = Sequence<1, 1>;

255
256
257
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
258
    // 3x3, 147x147
259
    constexpr index_t N  = 128;
260
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
261
262
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
263
    constexpr index_t K  = 128;
264
265
266
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
283
284
    using ConvDilations = Sequence<1, 1>;

285
286
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
287
#elif 0
Chao Liu's avatar
Chao Liu committed
288
289
290
291
292
293
294
295
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
296

Chao Liu's avatar
Chao Liu committed
297
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
298
299
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
300
301
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
302
#elif 0
Chao Liu's avatar
Chao Liu committed
303
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
304
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
305
306
307
308
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
309
310
311
312
313
314
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
315
316
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
317
#elif 0
Chao Liu's avatar
Chao Liu committed
318
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
319
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
320
321
322
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
323
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
324
325
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
326

Chao Liu's avatar
Chao Liu committed
327
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
328
329
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
330
331
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
332
#elif 0
Chao Liu's avatar
Chao Liu committed
333
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
334
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
335
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
336
337
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
338
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
339
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
340
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
341
342
343
344

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
345
346
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
347
#elif 0
Chao Liu's avatar
Chao Liu committed
348
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
349
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
350
351
352
353
354
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
355
356
357
358
359
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
375
376
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
377
#elif 0
Chao Liu's avatar
Chao Liu committed
378
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
379
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
380
381
382
383
384
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
385
386
387
388
389
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
405
406
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
407
#elif 0
Chao Liu's avatar
Chao Liu committed
408
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
409
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
410
411
412
413
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
414
415
416
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
417
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
418
419
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
420
421
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
422
#elif 0
Chao Liu's avatar
Chao Liu committed
423
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
424
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
425
426
427
428
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
429
430
431
432
433
434
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
435
436
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
437
#elif 0
Chao Liu's avatar
Chao Liu committed
438
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
439
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
440
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
441
442
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
443
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
444
445
446
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
447
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
448
449
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
450
451
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
452
#elif 1
Chao Liu's avatar
Chao Liu committed
453
454
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
455
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
456
457
458
459
460
461
462
463
464
465
466
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
467
#elif 1
Chao Liu's avatar
Chao Liu committed
468
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
469
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
470
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
471
472
473
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
474
475
476
477
478
479
480
481
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
482
#elif 0
Chao Liu's avatar
Chao Liu committed
483
484
485
486
487
488
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
489
490
491
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
492
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
493
494
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
495
496
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
497
#elif 0
Chao Liu's avatar
Chao Liu committed
498
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
499
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
519
520
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
521

Chao Liu's avatar
Chao Liu committed
522
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
523
524
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
525
526
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
527
#elif 0
Chao Liu's avatar
Chao Liu committed
528
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
529
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
530
531
532
533
534
535
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
536
537
538
539
540
541

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
542
#elif 1
Chao Liu's avatar
Chao Liu committed
543
    // 1x1, 7x7
544
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
545
    constexpr index_t C  = 512;
546
547
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
548
549
550
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
551
552
553
554

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
555
556
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
557
#elif 0
Chao Liu's avatar
Chao Liu committed
558
    // 3x3, 7x7
559
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
560
561
562
563
564
565
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
566
567
568
569

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
570
571
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
572
#elif 0
Chao Liu's avatar
Chao Liu committed
573
    // 1x1, 56x56
574
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
575
576
577
578
579
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
580
    constexpr index_t X  = 1;
581
582
583
584

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
585
586
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
587
#elif 0
Chao Liu's avatar
Chao Liu committed
588
589
590
591
592
593
594
595
596
597
598
599
600
601
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
602
#endif
Chao Liu's avatar
Chao Liu committed
603

Chao Liu's avatar
Chao Liu committed
604
605
606
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
607
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
608

Chao Liu's avatar
Chao Liu committed
609
610
611
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
612
613
614
615
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
616

Chao Liu's avatar
Chao Liu committed
617
#if 1
Chao Liu's avatar
Chao Liu committed
618
619
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
620
621
622
623
624
625
626
627
628
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
629

Chao Liu's avatar
Chao Liu committed
630
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
631

Chao Liu's avatar
Chao Liu committed
632
633
634
635
636
637
638
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
639
    index_t nrepeat      = atoi(argv[2]);
640
641
642

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
643
#if 0
644
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
645
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
646
647
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
648
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
649
#elif 0
650
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
651
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
652
#elif 1
653
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
654
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
655
#elif 0
656
657
658
659
660
661
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
662
#endif
663
    }
Chao Liu's avatar
Chao Liu committed
664

Chao Liu's avatar
Chao Liu committed
665
#if 0
666
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
667
668
669
670
671
672
673
674
675
676
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
test  
Chao Liu committed
677
#elif 1
678
679
680
681
682
683
684
685
686
687
688
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
Chao Liu committed
689
#elif 0
690
691
692
693
694
695
696
697
698
699
700
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
701
702
703
704
705
706
707
708
709
710
711
712
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
713
#endif
Chao Liu's avatar
Chao Liu committed
714

715
    if(do_verification)
716
    {
Chao Liu's avatar
Chao Liu committed
717
#if 0
718
719
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
720
        {
Chao Liu's avatar
Chao Liu committed
721
722
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
723
724
        }
        else
Chao Liu's avatar
Chao Liu committed
725
#endif
726
        {
727
728
729
730
731
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
732
733
                                    LeftPads{},
                                    RightPads{});
734
735
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
736

Chao Liu's avatar
Chao Liu committed
737
#if 0
738
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
739
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
740
741
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
742
#endif
743
    }
744
}