conv_driver.cpp 23.9 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
16
#include "device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
17
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
18
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
19

root's avatar
root committed
20
21
#include "device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp"

Chao Liu's avatar
Chao Liu committed
22
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
23
{
Chao Liu's avatar
Chao Liu committed
24
25
    using namespace ck;

26
#if 0
Chao Liu's avatar
Chao Liu committed
27
28
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
Chao Liu's avatar
Chao Liu committed
29
30
31
32
33
34
35
36
37
    constexpr index_t HI = 1080;
    constexpr index_t WI = 1920;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

root's avatar
merge  
root committed
38
39
    using LeftPads                   = Sequence<0, 0>;
    using RightPads                  = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
40
41
#elif 0
    constexpr index_t N  = 1;
Chao Liu's avatar
Chao Liu committed
42
    constexpr index_t C  = 16;
Chao Liu's avatar
Chao Liu committed
43
44
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
Chao Liu's avatar
Chao Liu committed
45
46
47
48
49
50
51
52
53
54
55
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    constexpr index_t N  = 1;
Chao Liu's avatar
Chao Liu committed
56
    constexpr index_t C  = 16;
Chao Liu's avatar
Chao Liu committed
57
58
59
60
61
62
63
64
65
66
67
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
fix bug  
Chao Liu committed
68
#elif 1
Chao Liu's avatar
Chao Liu committed
69
70
    constexpr index_t N  = 1;
    constexpr index_t C  = 4;
root's avatar
root committed
71
72
    constexpr index_t HI = 1024;
    constexpr index_t WI = 2048;
Chao Liu's avatar
Chao Liu committed
73
74
75
76
77
78
79
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
80
81
82
83
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    constexpr index_t N  = 1;
Chao Liu's avatar
Chao Liu committed
84
    constexpr index_t C  = 16;
Chao Liu's avatar
Chao Liu committed
85
86
87
88
89
90
91
92
93
94
95
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 16;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
test  
Chao Liu committed
96
#elif 0
Chao Liu's avatar
Chao Liu committed
97
    constexpr index_t N  = 1;
Chao Liu's avatar
Chao Liu committed
98
    constexpr index_t C  = 16;
Chao Liu's avatar
Chao Liu committed
99
100
101
    constexpr index_t HI = 270;
    constexpr index_t WI = 480;
    constexpr index_t K  = 16;
Chao Liu's avatar
Chao Liu committed
102
103
104
105
106
107
108
109
110
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
111
112
113
114
115
116
117
118
119
120
121
122
123
124
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
125
#elif 0
126
127
128
129
130
131
132
133
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
134

135
    using ConvStrides   = Sequence<2, 2>;
136
137
138
139
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
140
#elif 0
Chao Liu's avatar
Chao Liu committed
141
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
142
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
143
144
145
146
147
148
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
149

Chao Liu's avatar
Chao Liu committed
150
    using ConvStrides   = Sequence<2, 2>;
151
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
152

Chao Liu's avatar
Chao Liu committed
153
154
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
155
#elif 1
Chao Liu's avatar
Chao Liu committed
156
    // 1x1, 8x8
157
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
191
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
215
#elif 1
Chao Liu's avatar
Chao Liu committed
216
217
218
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
219
220
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
221
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
222
223
224
225
226
227
228
229
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
230
#elif 0
Chao Liu's avatar
Chao Liu committed
231
232
233
234
235
236
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
237
238
239
240
241
242
243
244
245
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
246
247
248
249
250
251
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
252
253
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
254

Chao Liu's avatar
Chao Liu committed
255
    using ConvStrides   = Sequence<2, 2>;
256
257
    using ConvDilations = Sequence<1, 1>;

258
259
260
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
261
    // 3x3, 147x147
262
    constexpr index_t N  = 128;
263
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
264
265
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
266
    constexpr index_t K  = 128;
267
268
269
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
286
287
    using ConvDilations = Sequence<1, 1>;

288
289
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
290
#elif 0
Chao Liu's avatar
Chao Liu committed
291
292
293
294
295
296
297
298
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
299

Chao Liu's avatar
Chao Liu committed
300
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
301
302
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
303
304
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
305
#elif 0
Chao Liu's avatar
Chao Liu committed
306
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
307
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
308
309
310
311
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
312
313
314
315
316
317
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
318
319
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
320
#elif 0
Chao Liu's avatar
Chao Liu committed
321
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
322
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
323
324
325
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
326
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
327
328
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
329

Chao Liu's avatar
Chao Liu committed
330
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
331
332
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
333
334
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
335
#elif 0
Chao Liu's avatar
Chao Liu committed
336
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
337
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
338
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
339
340
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
341
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
342
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
343
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
344
345
346
347

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
348
349
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
350
#elif 0
Chao Liu's avatar
Chao Liu committed
351
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
352
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
353
354
355
356
357
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
358
359
360
361
362
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
378
379
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
380
#elif 0
Chao Liu's avatar
Chao Liu committed
381
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
382
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
383
384
385
386
387
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
388
389
390
391
392
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
408
409
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
410
#elif 0
Chao Liu's avatar
Chao Liu committed
411
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
412
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
413
414
415
416
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
417
418
419
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
420
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
421
422
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
423
424
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
425
#elif 0
Chao Liu's avatar
Chao Liu committed
426
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
427
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
428
429
430
431
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
432
433
434
435
436
437
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
438
439
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
440
#elif 0
Chao Liu's avatar
Chao Liu committed
441
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
442
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
443
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
444
445
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
446
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
447
448
449
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
450
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
451
452
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
453
454
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
455
#elif 1
Chao Liu's avatar
Chao Liu committed
456
457
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
458
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
459
460
461
462
463
464
465
466
467
468
469
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
470
#elif 1
Chao Liu's avatar
Chao Liu committed
471
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
472
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
473
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
474
475
476
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
477
478
479
480
481
482
483
484
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
485
#elif 0
Chao Liu's avatar
Chao Liu committed
486
487
488
489
490
491
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
492
493
494
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
495
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
496
497
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
498
499
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
500
#elif 0
Chao Liu's avatar
Chao Liu committed
501
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
502
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
522
523
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
524

Chao Liu's avatar
Chao Liu committed
525
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
526
527
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
528
529
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
530
#elif 0
Chao Liu's avatar
Chao Liu committed
531
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
532
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
533
534
535
536
537
538
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
539
540
541
542
543
544

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
545
#elif 1
Chao Liu's avatar
Chao Liu committed
546
    // 1x1, 7x7
547
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
548
    constexpr index_t C  = 512;
549
550
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
551
552
553
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
554
555
556
557

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
558
559
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
560
#elif 0
Chao Liu's avatar
Chao Liu committed
561
    // 3x3, 7x7
562
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
563
564
565
566
567
568
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
569
570
571
572

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
573
574
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
575
#elif 0
Chao Liu's avatar
Chao Liu committed
576
    // 1x1, 56x56
577
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
578
579
580
581
582
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
583
    constexpr index_t X  = 1;
584
585
586
587

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
588
589
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
590
#elif 0
Chao Liu's avatar
Chao Liu committed
591
592
593
594
595
596
597
598
599
600
601
602
603
604
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
605
#endif
Chao Liu's avatar
Chao Liu committed
606

Chao Liu's avatar
Chao Liu committed
607
608
609
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
610
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
611

Chao Liu's avatar
Chao Liu committed
612
613
614
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
615
616
617
618
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
619

root's avatar
root committed
620
621
#if 1
    using in_data_t                  = float;
Chao Liu's avatar
Chao Liu committed
622
    constexpr index_t in_vector_size = 1;
root's avatar
root committed
623
    using acc_data_t                 = float;
Chao Liu's avatar
Chao Liu committed
624
    using out_data_t                 = float;
Chao Liu's avatar
Chao Liu committed
625
#elif 1
Chao Liu's avatar
Chao Liu committed
626
627
628
629
    using in_data_t                  = int8_t;
    constexpr index_t in_vector_size = 4;
    using acc_data_t                 = int32_t;
    using out_data_t                 = int8_t;
Chao Liu's avatar
Chao Liu committed
630
631
632
633
634
635
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
636

Chao Liu's avatar
Chao Liu committed
637
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
638

Chao Liu's avatar
Chao Liu committed
639
    if(argc != 4)
Chao Liu's avatar
Chao Liu committed
640
    {
Chao Liu's avatar
Chao Liu committed
641
        printf("arg1: do_verification, arg2: do_log, arg3: nrepeat\n");
Chao Liu's avatar
Chao Liu committed
642
643
644
645
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
646
647
    bool do_log          = atoi(argv[2]);
    index_t nrepeat      = atoi(argv[3]);
648
649
650

    if(do_verification)
    {
root's avatar
root committed
651
#if 0
652
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
653
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
654
655
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
656
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
657
#elif 0
658
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
659
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
660
#elif 1
661
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
662
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
663
#elif 0
664
665
666
667
668
669
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
670
#endif
671
    }
Chao Liu's avatar
Chao Liu committed
672

Chao Liu's avatar
Chao Liu committed
673
#if 0
674
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
675
676
677
678
679
680
681
682
683
684
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
Chao Liu committed
685
#elif 0
686
687
688
689
690
691
692
693
694
695
696
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
Chao Liu committed
697
#elif 0
Chao Liu's avatar
Chao Liu committed
698
699
700
701
702
703
704
705
706
707
708
    device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
Chao Liu committed
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
                                                                         in_vector_size,
                                                                         acc_data_t,
                                                                         out_data_t>

        (in_nchw_desc,
         in_nchw,
         wei_kcyx_desc,
         wei_kcyx,
         out_nkhw_desc,
         out_nkhw_device,
         ConvStrides{},
         ConvDilations{},
         LeftPads{},
         RightPads{},
         nrepeat);
root's avatar
root committed
726
727
728
729
730
731
732
733
734
735
736
737
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
738
#endif
Chao Liu's avatar
Chao Liu committed
739

740
    if(do_verification)
741
    {
Chao Liu's avatar
Chao Liu committed
742
743
744
745
746
747
748
749
        host_direct_convolution(in_nchw,
                                wei_kcyx,
                                out_nkhw_host,
                                ConvStrides{},
                                ConvDilations{},
                                LeftPads{},
                                RightPads{});

750
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
751

Chao Liu's avatar
Chao Liu committed
752
753
754
755
756
757
758
        if(do_log)
        {
            LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
            LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
            LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
        }
759
    }
760
}