conv_driver.cpp 25.8 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
14
15
16
17
18
19
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp"
20

Jing Zhang's avatar
Jing Zhang committed
21
22
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.hpp"

Chao Liu's avatar
Chao Liu committed
23
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
24
{
Chao Liu's avatar
Chao Liu committed
25
26
    using namespace ck;

Jing Zhang's avatar
Jing Zhang committed
27
#if 1
Jing Zhang's avatar
Jing Zhang committed
28
    constexpr index_t N  = 4;
Jing Zhang's avatar
Jing Zhang committed
29
30
31
    constexpr index_t C  = 16;
    constexpr index_t HI = 4;
    constexpr index_t WI = 4;
Jing Zhang's avatar
Jing Zhang committed
32
    constexpr index_t K  = 64;
Jing Zhang's avatar
Jing Zhang committed
33
34
35
36
37
38
39
40
41
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
42
43
44
45
46
47
48
49
50
51
52
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 1080;
    constexpr index_t WI = 1920;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Jing Zhang's avatar
Jing Zhang committed
53
54
    using LeftPads                   = Sequence<0, 0>;
    using RightPads                  = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
61
62
63
64
65
66
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Jing Zhang's avatar
Jing Zhang committed
67
68
    using LeftPads                   = Sequence<0, 0>;
    using RightPads                  = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
69
70
71
72
73
74
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
75
76
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
77
78

    using ConvStrides   = Sequence<1, 1>;
79
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
80

Jing Zhang's avatar
Jing Zhang committed
81
82
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
83
#elif 0
Chao Liu's avatar
Chao Liu committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 1080;
    constexpr index_t WI = 1920;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 1;
    constexpr index_t HI = 1024;
    constexpr index_t WI = 2048;
    constexpr index_t K  = 4;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
111
#elif 0
Chao Liu's avatar
Chao Liu committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
167
168
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
169
#elif 1
Chao Liu's avatar
Chao Liu committed
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 1
Chao Liu's avatar
Chao Liu committed
185
    // 1x1, 8x8
186
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
220
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
221
222
223
224
225
226
227
228
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
229
#elif 1
Chao Liu's avatar
Chao Liu committed
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
244
#elif 0
Chao Liu's avatar
Chao Liu committed
245
246
247
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
248
249
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
Chao Liu's avatar
Chao Liu committed
250
251
252
253
254
255
256
257
258
    constexpr index_t K  = 128;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
Chao Liu's avatar
Chao Liu committed
259
#elif 0
Chao Liu's avatar
Chao Liu committed
260
261
262
263
264
265
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
266
267
268
269
270
271
272
273
274
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
275
276
277
278
279
280
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
281
282
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
283

Chao Liu's avatar
Chao Liu committed
284
    using ConvStrides   = Sequence<2, 2>;
285
286
    using ConvDilations = Sequence<1, 1>;

287
288
289
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
290
    // 3x3, 147x147
291
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
292
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
293
294
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
Chao Liu's avatar
Chao Liu committed
295
    constexpr index_t K  = 128;
296
297
298
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
315
316
    using ConvDilations = Sequence<1, 1>;

317
318
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
319
#elif 0
Chao Liu's avatar
Chao Liu committed
320
321
322
323
324
325
326
327
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
328

Chao Liu's avatar
Chao Liu committed
329
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
330
331
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
332
333
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
334
#elif 0
Chao Liu's avatar
Chao Liu committed
335
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
336
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
337
338
339
340
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
341
342
343
344
345
346
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
347
348
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
349
#elif 0
Chao Liu's avatar
Chao Liu committed
350
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
351
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
352
353
354
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
355
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
356
357
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
358

Chao Liu's avatar
Chao Liu committed
359
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
360
361
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
362
363
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
364
#elif 0
Chao Liu's avatar
Chao Liu committed
365
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
366
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
367
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
368
369
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
370
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
371
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
372
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
373
374
375
376

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
377
378
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
379
#elif 0
Chao Liu's avatar
Chao Liu committed
380
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
381
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
382
383
384
385
386
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
387
388
389
390
391
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
407
408
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
409
#elif 0
Chao Liu's avatar
Chao Liu committed
410
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
411
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
412
413
414
415
416
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
417
418
419
420
421
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
437
438
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
439
#elif 0
Chao Liu's avatar
Chao Liu committed
440
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
441
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
442
443
444
445
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
446
447
448
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
449
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
450
451
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
452
453
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
454
#elif 0
Chao Liu's avatar
Chao Liu committed
455
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
456
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
457
458
459
460
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
461
462
463
464
465
466
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
467
468
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
469
#elif 0
Chao Liu's avatar
Chao Liu committed
470
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
471
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
472
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
473
474
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
475
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
476
477
478
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
479
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
480
481
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
482
483
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
484
#elif 1
Chao Liu's avatar
Chao Liu committed
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
    // 3x3, 28x28
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 1
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
501
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
502
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
503
504
505
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
506
507
508
509
510
511
512
513
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
514
#elif 0
Chao Liu's avatar
Chao Liu committed
515
516
517
518
519
520
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
521
522
523
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
524
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
525
526
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
527
528
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
529
#elif 0
Chao Liu's avatar
Chao Liu committed
530
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
531
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
551
552
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
553

Chao Liu's avatar
Chao Liu committed
554
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
555
556
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
557
558
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
559
#elif 0
Chao Liu's avatar
Chao Liu committed
560
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
561
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
562
563
564
565
566
567
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
568
569
570
571
572
573

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
574
#elif 1
Chao Liu's avatar
Chao Liu committed
575
    // 1x1, 7x7
576
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
577
    constexpr index_t C  = 512;
578
579
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
580
581
582
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
583
584
585
586

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
587
588
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
589
#elif 0
Chao Liu's avatar
Chao Liu committed
590
    // 3x3, 7x7
591
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
592
593
594
595
596
597
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
598
599
600
601

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
602
603
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
604
#elif 0
Chao Liu's avatar
Chao Liu committed
605
    // 1x1, 56x56
606
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
607
608
609
610
611
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
612
    constexpr index_t X  = 1;
613
614
615
616

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
617
618
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
619
#elif 0
Chao Liu's avatar
Chao Liu committed
620
621
622
623
624
625
626
627
628
629
630
631
632
633
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
634
#endif
Chao Liu's avatar
Chao Liu committed
635

Chao Liu's avatar
Chao Liu committed
636
637
638
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
639
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
640

Chao Liu's avatar
Chao Liu committed
641
642
643
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
644
645
646
647
648
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));

Jing Zhang's avatar
Jing Zhang committed
649
#if 1
Chao Liu's avatar
Chao Liu committed
650
651
652
653
654
655
656
657
658
659
660
    using in_data_t                  = float;
    constexpr index_t in_vector_size = 1;
    using acc_data_t                 = float;
    using out_data_t                 = float;
#elif 0
    using in_data_t                  = float;
    constexpr index_t in_vector_size = 1;
    using acc_data_t                 = float;
    using out_data_t                 = int8_t;
#elif 1
    using in_data_t                  = int8_t;
661
    constexpr index_t in_vector_size = 16;
Chao Liu's avatar
Chao Liu committed
662
663
    using acc_data_t                 = int32_t;
    using out_data_t                 = int8_t;
Chao Liu's avatar
Chao Liu committed
664
665
666
667
668
669
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
670

Chao Liu's avatar
Chao Liu committed
671
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
672

Chao Liu's avatar
Chao Liu committed
673
    if(argc != 4)
Chao Liu's avatar
Chao Liu committed
674
    {
Chao Liu's avatar
Chao Liu committed
675
        printf("arg1: do_verification, arg2: do_log, arg3: nrepeat\n");
Chao Liu's avatar
Chao Liu committed
676
677
678
679
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
680
681
    bool do_log          = atoi(argv[2]);
    index_t nrepeat      = atoi(argv[3]);
682
683
684

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
685
#if 0
686
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
687
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
688
689
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
690
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Jing Zhang's avatar
Jing Zhang committed
691
#elif 1
Chao Liu's avatar
Chao Liu committed
692
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
693
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
694
#elif 1
695
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
696
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
697
#elif 0
698
699
700
701
702
703
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
704
#endif
705
    }
Chao Liu's avatar
Chao Liu committed
706

Chao Liu's avatar
Chao Liu committed
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
#if 0
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
#elif 0
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
#elif 0
    device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Jing Zhang's avatar
Jing Zhang committed
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw<in_data_t,
                                                                                in_vector_size,
                                                                                acc_data_t,
                                                                                out_data_t>(
        in_nchw_desc,
        in_nchw,
        wei_kcyx_desc,
        wei_kcyx,
        out_nkhw_desc,
        out_nkhw_device,
        ConvStrides{},
        ConvDilations{},
        LeftPads{},
        RightPads{},
        nrepeat);
759
#elif 0
Chao Liu's avatar
Chao Liu committed
760
761
762
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
                                                                         in_vector_size,
                                                                         acc_data_t,
763
764
765
766
767
768
769
770
771
772
773
774
                                                                         out_data_t>(
        in_nchw_desc,
        in_nchw,
        wei_kcyx_desc,
        wei_kcyx,
        out_nkhw_desc,
        out_nkhw_device,
        ConvStrides{},
        ConvDilations{},
        LeftPads{},
        RightPads{},
        nrepeat);
775
#elif 1
Chao Liu's avatar
Chao Liu committed
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk<in_data_t,
                                                                         in_vector_size,
                                                                         acc_data_t,
                                                                         out_data_t>

        (in_nchw_desc,
         in_nchw,
         wei_kcyx_desc,
         wei_kcyx,
         out_nkhw_desc,
         out_nkhw_device,
         ConvStrides{},
         ConvDilations{},
         LeftPads{},
         RightPads{},
         nrepeat);
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw<in_data_t,
                                                                         in_vector_size,
                                                                         acc_data_t,
                                                                         out_data_t>(
        in_nchw_desc,
        in_nchw,
        wei_kcyx_desc,
        wei_kcyx,
        out_nkhw_desc,
        out_nkhw_device,
        ConvStrides{},
        ConvDilations{},
        LeftPads{},
        RightPads{},
        nrepeat);
808
#endif
Chao Liu's avatar
Chao Liu committed
809

810
    if(do_verification)
811
    {
Chao Liu's avatar
Chao Liu committed
812
813
814
815
816
817
818
819
        host_direct_convolution(in_nchw,
                                wei_kcyx,
                                out_nkhw_host,
                                ConvStrides{},
                                ConvDilations{},
                                LeftPads{},
                                RightPads{});

820
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
821

Chao Liu's avatar
Chao Liu committed
822
823
824
825
826
827
828
        if(do_log)
        {
            LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
            LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
        }
829
    }
830
}