"benchmark/vscode:/vscode.git/clone" did not exist on "405765b18c83a252e3a4f55d208d74cd995b4f4b"
conv_driver.cpp 23.6 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
14
15
16
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
17

Chao Liu's avatar
Chao Liu committed
18
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
19
{
Chao Liu's avatar
Chao Liu committed
20
21
    using namespace ck;

22
23
24
25
26
27
28
29
30
31
32
    if(argc != 5)
    {
        printf("arg1: do_verification, arg2: do_log, arg3: init_method, arg4: nrepeat\n");
        exit(1);
    }

    const bool do_verification = atoi(argv[1]);
    const int init_method      = atoi(argv[2]);
    const bool do_log          = atoi(argv[3]);
    const int nrepeat          = atoi(argv[4]);

Chao Liu's avatar
Chao Liu committed
33
#if 0
34
35
36
37
38
39
40
    constexpr index_t N  = 8;
    constexpr index_t C  = 8;
    constexpr index_t Hi = 4;
    constexpr index_t Wi = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
41
42
43
44

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

45
46
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
47
48
49
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
50
51
    constexpr index_t Hi = 540;
    constexpr index_t Wi = 960;
Chao Liu's avatar
Chao Liu committed
52
53
54
55
56
57
58
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

59
60
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
61
62
63
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
64
65
    constexpr index_t Hi = 270;
    constexpr index_t Wi = 480;
Chao Liu's avatar
Chao Liu committed
66
    constexpr index_t K  = 16;
67
68
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
69
70

    using ConvStrides   = Sequence<1, 1>;
71
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
72

73
74
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
75
#elif 0
Chao Liu's avatar
Chao Liu committed
76
77
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
78
79
    constexpr index_t Hi = 1080;
    constexpr index_t Wi = 1920;
Chao Liu's avatar
Chao Liu committed
80
81
82
83
84
85
86
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

87
88
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
89
90
91
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 1;
92
93
    constexpr index_t Hi = 1024;
    constexpr index_t Wi = 2048;
Chao Liu's avatar
Chao Liu committed
94
95
96
97
98
99
100
    constexpr index_t K  = 4;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

101
102
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
103
#elif 0
Chao Liu's avatar
Chao Liu committed
104
105
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
106
107
    constexpr index_t Hi = 540;
    constexpr index_t Wi = 960;
Chao Liu's avatar
Chao Liu committed
108
109
110
111
112
113
114
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

115
116
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
117
118
119
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
120
121
    constexpr index_t Hi = 270;
    constexpr index_t Wi = 480;
Chao Liu's avatar
Chao Liu committed
122
123
124
125
126
127
128
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

129
130
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
131
132
133
134
#elif 0
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
135
136
    constexpr index_t Hi = 37;
    constexpr index_t Wi = 37;
Chao Liu's avatar
Chao Liu committed
137
138
139
140
141
142
143
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

144
145
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
146
147
148
149
#elif 0
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
150
151
    constexpr index_t Hi = 35;
    constexpr index_t Wi = 35;
Chao Liu's avatar
Chao Liu committed
152
153
154
155
156
157
158
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

159
160
161
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
162
163
164
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
165
166
    constexpr index_t Hi = 71;
    constexpr index_t Wi = 71;
Chao Liu's avatar
Chao Liu committed
167
168
169
170
171
172
173
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

174
175
176
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
#elif 0
Chao Liu's avatar
Chao Liu committed
177
    // 1x1, 8x8
178
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
179
    constexpr index_t C  = 1536;
180
181
    constexpr index_t Hi = 8;
    constexpr index_t Wi = 8;
Chao Liu's avatar
Chao Liu committed
182
183
184
185
186
187
188
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

189
190
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
191
192
193
194
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
195
196
    constexpr index_t Hi = 73;
    constexpr index_t Wi = 73;
Chao Liu's avatar
Chao Liu committed
197
198
199
200
201
202
203
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

204
205
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
206
207
208
209
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
210
211
    constexpr index_t Hi = 35;
    constexpr index_t Wi = 35;
Chao Liu's avatar
Chao Liu committed
212
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
213
214
215
216
217
218
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

219
220
221
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
#elif 0
Chao Liu's avatar
Chao Liu committed
222
223
224
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
225
226
    constexpr index_t Hi = 71;
    constexpr index_t Wi = 71;
Chao Liu's avatar
Chao Liu committed
227
228
229
230
231
232
233
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

234
235
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
236
#elif 0
Chao Liu's avatar
Chao Liu committed
237
238
239
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
240
241
    constexpr index_t Hi = 17;
    constexpr index_t Wi = 17;
Chao Liu's avatar
Chao Liu committed
242
243
244
245
246
247
248
    constexpr index_t K  = 128;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

249
250
251
    using InLeftPads  = Sequence<3, 0>;
    using InRightPads = Sequence<3, 0>;
#elif 1
Chao Liu's avatar
Chao Liu committed
252
253
254
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
255
256
    constexpr index_t Hi = 17;
    constexpr index_t Wi = 17;
Chao Liu's avatar
Chao Liu committed
257
    constexpr index_t K  = 128;
258
259
260
261
262
263
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

264
265
    using InLeftPads  = Sequence<0, 3>;
    using InRightPads = Sequence<0, 3>;
266
#elif 0
Chao Liu's avatar
Chao Liu committed
267
268
269
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
270
271
    constexpr index_t Hi = 299;
    constexpr index_t Wi = 299;
Chao Liu's avatar
Chao Liu committed
272
    constexpr index_t K  = 32;
273
274
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
275

Chao Liu's avatar
Chao Liu committed
276
    using ConvStrides   = Sequence<2, 2>;
277
278
    using ConvDilations = Sequence<1, 1>;

279
280
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
281
#elif 0
Chao Liu's avatar
Chao Liu committed
282
    // 3x3, 147x147
283
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
284
    constexpr index_t C  = 128;
285
286
    constexpr index_t Hi = 147;
    constexpr index_t Wi = 147;
Chao Liu's avatar
Chao Liu committed
287
    constexpr index_t K  = 128;
288
289
290
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
291
292
293
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

294
295
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
296
297
298
299
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
300
301
    constexpr index_t Hi = 149;
    constexpr index_t Wi = 149;
Chao Liu's avatar
Chao Liu committed
302
303
304
305
306
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
307
308
    using ConvDilations = Sequence<1, 1>;

309
310
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
311
#elif 0
Chao Liu's avatar
Chao Liu committed
312
313
314
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
315
316
    constexpr index_t Hi = 17;
    constexpr index_t Wi = 17;
Chao Liu's avatar
Chao Liu committed
317
318
319
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
320

Chao Liu's avatar
Chao Liu committed
321
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
322
323
    using ConvDilations = Sequence<1, 1>;

324
325
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
326
#elif 0
Chao Liu's avatar
Chao Liu committed
327
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
328
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
329
    constexpr index_t C  = 384;
330
331
    constexpr index_t Hi = 35;
    constexpr index_t Wi = 35;
Chao Liu's avatar
Chao Liu committed
332
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
333
334
335
336
337
338
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

339
340
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
341
#elif 0
Chao Liu's avatar
Chao Liu committed
342
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
343
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
344
    constexpr index_t C  = 288;
345
346
    constexpr index_t Hi = 35;
    constexpr index_t Wi = 35;
Chao Liu's avatar
Chao Liu committed
347
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
348
349
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
350

Chao Liu's avatar
Chao Liu committed
351
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
352
353
    using ConvDilations = Sequence<1, 1>;

354
355
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
356
#elif 0
Chao Liu's avatar
Chao Liu committed
357
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
358
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
359
    constexpr index_t C  = 384;
360
361
    constexpr index_t Hi = 8;
    constexpr index_t Wi = 8;
Chao Liu's avatar
Chao Liu committed
362
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
363
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
364
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
365
366
367
368

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

369
370
    using InLeftPads  = Sequence<0, 1>;
    using InRightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
371
#elif 0
Chao Liu's avatar
Chao Liu committed
372
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
373
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
374
    constexpr index_t C  = 448;
375
376
    constexpr index_t Hi = 8;
    constexpr index_t Wi = 8;
Chao Liu's avatar
Chao Liu committed
377
378
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
379
380
381
382
383
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

384
385
    using InLeftPads  = Sequence<1, 0>;
    using InRightPads = Sequence<1, 0>;
Chao Liu's avatar
Chao Liu committed
386
387
388
389
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
390
391
    constexpr index_t Hi = 147;
    constexpr index_t Wi = 147;
Chao Liu's avatar
Chao Liu committed
392
393
394
395
396
397
398
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

399
400
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
401
#elif 0
Chao Liu's avatar
Chao Liu committed
402
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
403
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
404
    constexpr index_t C  = 64;
405
406
    constexpr index_t Hi = 73;
    constexpr index_t Wi = 73;
Chao Liu's avatar
Chao Liu committed
407
408
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
409
410
411
412
413
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

414
415
    using InLeftPads  = Sequence<3, 0>;
    using InRightPads = Sequence<3, 0>;
Chao Liu's avatar
Chao Liu committed
416
417
418
419
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
420
421
    constexpr index_t Hi = 73;
    constexpr index_t Wi = 73;
Chao Liu's avatar
Chao Liu committed
422
423
424
425
426
427
428
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

429
430
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
431
#elif 0
Chao Liu's avatar
Chao Liu committed
432
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
433
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
434
    constexpr index_t C  = 1024;
435
436
    constexpr index_t Hi = 14;
    constexpr index_t Wi = 14;
Chao Liu's avatar
Chao Liu committed
437
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
438
439
440
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
441
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
442
443
    using ConvDilations = Sequence<1, 1>;

444
445
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
446
#elif 0
Chao Liu's avatar
Chao Liu committed
447
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
448
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
449
    constexpr index_t C  = 1024;
450
451
    constexpr index_t Hi = 14;
    constexpr index_t Wi = 14;
Chao Liu's avatar
Chao Liu committed
452
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
453
454
455
456
457
458
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

459
460
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
461
#elif 0
Chao Liu's avatar
Chao Liu committed
462
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
463
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
464
    constexpr index_t C  = 1024;
465
466
    constexpr index_t Hi = 14;
    constexpr index_t Wi = 14;
Chao Liu's avatar
Chao Liu committed
467
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
468
469
470
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
471
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
472
473
    using ConvDilations = Sequence<1, 1>;

474
475
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
476
#elif 1
Chao Liu's avatar
Chao Liu committed
477
478
479
    // 3x3, 28x28
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
480
481
    constexpr index_t Hi = 28;
    constexpr index_t Wi = 28;
Chao Liu's avatar
Chao Liu committed
482
483
484
485
486
487
488
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

489
490
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
491
492
#elif 1
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
493
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
494
    constexpr index_t C  = 256;
495
496
    constexpr index_t Hi = 14;
    constexpr index_t Wi = 14;
Chao Liu's avatar
Chao Liu committed
497
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
498
499
500
501
502
503
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

504
505
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
506
#elif 0
Chao Liu's avatar
Chao Liu committed
507
508
509
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
510
511
    constexpr index_t Hi = 56;
    constexpr index_t Wi = 56;
Chao Liu's avatar
Chao Liu committed
512
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
513
514
515
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
516
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
517
518
    using ConvDilations = Sequence<1, 1>;

519
520
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
521
#elif 0
Chao Liu's avatar
Chao Liu committed
522
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
523
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
524
    constexpr index_t C  = 3;
525
526
    constexpr index_t Hi = 230;
    constexpr index_t Wi = 230;
Chao Liu's avatar
Chao Liu committed
527
528
529
530
531
532
533
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

534
535
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
536
537
538
539
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
540
541
    constexpr index_t Hi = 28;
    constexpr index_t Wi = 28;
Chao Liu's avatar
Chao Liu committed
542
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
543
544
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
545

Chao Liu's avatar
Chao Liu committed
546
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
547
548
    using ConvDilations = Sequence<1, 1>;

549
550
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
551
#elif 0
Chao Liu's avatar
Chao Liu committed
552
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
553
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
554
    constexpr index_t C  = 512;
555
556
    constexpr index_t Hi = 28;
    constexpr index_t Wi = 28;
Chao Liu's avatar
Chao Liu committed
557
558
559
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
560
561
562
563

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

564
565
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
566
#elif 1
Chao Liu's avatar
Chao Liu committed
567
    // 1x1, 7x7
568
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
569
    constexpr index_t C  = 512;
570
571
    constexpr index_t Hi = 7;
    constexpr index_t Wi = 7;
Chao Liu's avatar
Chao Liu committed
572
573
574
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
575
576
577
578

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

579
580
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
581
#elif 0
Chao Liu's avatar
Chao Liu committed
582
    // 3x3, 7x7
583
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
584
    constexpr index_t C  = 512;
585
586
    constexpr index_t Hi = 7;
    constexpr index_t Wi = 7;
Chao Liu's avatar
Chao Liu committed
587
588
589
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
590
591
592
593

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

594
595
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
596
#elif 0
Chao Liu's avatar
Chao Liu committed
597
    // 1x1, 56x56
598
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
599
    constexpr index_t C  = 64;
600
601
    constexpr index_t Hi = 56;
    constexpr index_t Wi = 56;
Chao Liu's avatar
Chao Liu committed
602
603
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
604
    constexpr index_t X  = 1;
605
606
607
608

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

609
610
    using InLeftPads  = Sequence<0, 0>;
    using InRightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
611
#elif 0
Chao Liu's avatar
Chao Liu committed
612
613
614
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
615
616
    constexpr index_t Hi = 56;
    constexpr index_t Wi = 56;
Chao Liu's avatar
Chao Liu committed
617
618
619
620
621
622
623
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

624
625
    using InLeftPads  = Sequence<1, 1>;
    using InRightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
626
#endif
Chao Liu's avatar
Chao Liu committed
627

628
629
    constexpr index_t YEff = (Y - 1) * ConvDilations{}[0] + 1;
    constexpr index_t XEff = (X - 1) * ConvDilations{}[1] + 1;
Chao Liu's avatar
Chao Liu committed
630

631
632
    constexpr index_t Ho = (Hi + InLeftPads{}[0] + InRightPads{}[0] - YEff) / ConvStrides{}[0] + 1;
    constexpr index_t Wo = (Wi + InLeftPads{}[1] + InRightPads{}[1] - XEff) / ConvStrides{}[1] + 1;
Chao Liu's avatar
Chao Liu committed
633

634
#if 1
Chao Liu's avatar
Chao Liu committed
635
    constexpr index_t in_vector_size = 1;
636
    using in_data_t                  = typename vector_type<float, in_vector_size>::type;
Chao Liu's avatar
Chao Liu committed
637
638
639
640
    using acc_data_t                 = float;
    using out_data_t                 = float;
#elif 0
    constexpr index_t in_vector_size = 1;
641
    using in_data_t                  = typename vector_type<float, in_vector_size>::type;
Chao Liu's avatar
Chao Liu committed
642
643
644
    using acc_data_t                 = float;
    using out_data_t                 = int8_t;
#elif 1
645
    constexpr index_t in_vector_size = 16;
646
    using in_data_t                  = typename vector_type<int8_t, in_vector_size>::type;
Chao Liu's avatar
Chao Liu committed
647
648
    using acc_data_t                 = int32_t;
    using out_data_t                 = int8_t;
Chao Liu's avatar
Chao Liu committed
649
650
#endif

651
652
653
654
655
656
    Tensor<in_data_t> in_nchw(HostTensorDescriptor(std::initializer_list<index_t>{N, C, Hi, Wi}));
    Tensor<in_data_t> wei_kcyx(HostTensorDescriptor(std::initializer_list<index_t>{K, C, Y, X}));
    Tensor<out_data_t> out_nkhw_host(
        HostTensorDescriptor(std::initializer_list<index_t>{N, K, Ho, Wo}));
    Tensor<out_data_t> out_nkhw_device(
        HostTensorDescriptor(std::initializer_list<index_t>{N, K, Ho, Wo}));
Chao Liu's avatar
Chao Liu committed
657

658
659
660
    ostream_HostTensorDescriptor(in_nchw.mDesc, std::cout << "in_nchw_desc: ");
    ostream_HostTensorDescriptor(wei_kcyx.mDesc, std::cout << "wei_kcyx_desc: ");
    ostream_HostTensorDescriptor(out_nkhw_host.mDesc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
661

662
663
664
665
    print_array("InLeftPads", InLeftPads{});
    print_array("InRightPads", InRightPads{});
    print_array("ConvStrides", ConvStrides{});
    print_array("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
666

667
    std::size_t num_thread = std::thread::hardware_concurrency();
668
669
670

    if(do_verification)
    {
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
        switch(init_method)
        {
        case 0:
            in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
            wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
            break;
        case 1:
            in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
            wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
            break;
        case 2:
            in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
            wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
            break;
        case 3:
            in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
            wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
            break;
        default:
            in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

            auto gen_wei = [](auto... is) {
                return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
            };
            wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
        }
697
    }
Chao Liu's avatar
Chao Liu committed
698

699
700
701
702
703
    constexpr auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, Hi, Wi>{});
    constexpr auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    constexpr auto out_nkhw_desc = make_native_tensor_descriptor_packed(Sequence<N, K, Ho, Wo>{});

#if 1
Chao Liu's avatar
Chao Liu committed
704
705
706
707
708
709
710
711
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
712
713
                                                                 InLeftPads{},
                                                                 InRightPads{},
Chao Liu's avatar
Chao Liu committed
714
715
716
717
718
719
720
721
722
723
                                                                 nrepeat);
#elif 0
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
724
725
                                                                 InLeftPads{},
                                                                 InRightPads{},
Chao Liu's avatar
Chao Liu committed
726
727
728
729
730
731
732
733
734
735
                                                                 nrepeat);
#elif 0
    device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
736
737
                                                                 InLeftPads{},
                                                                 InRightPads{},
Chao Liu's avatar
Chao Liu committed
738
                                                                 nrepeat);
739
#endif
Chao Liu's avatar
Chao Liu committed
740

741
    if(do_verification)
742
    {
Chao Liu's avatar
Chao Liu committed
743
744
745
746
747
        host_direct_convolution(in_nchw,
                                wei_kcyx,
                                out_nkhw_host,
                                ConvStrides{},
                                ConvDilations{},
748
749
                                InLeftPads{},
                                InRightPads{});
Chao Liu's avatar
Chao Liu committed
750

751
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
752

Chao Liu's avatar
Chao Liu committed
753
754
755
756
757
758
759
        if(do_log)
        {
            LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
            LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
        }
760
    }
761
}