conv_driver.cpp 19.9 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
16
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
17

Chao Liu's avatar
Chao Liu committed
18
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
19
{
Chao Liu's avatar
Chao Liu committed
20
21
    using namespace ck;

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#if 0
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
37
#elif 0
38
39
40
41
42
43
44
45
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
46

47
    using ConvStrides   = Sequence<2, 2>;
48
49
50
51
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
52
#elif 1
Chao Liu's avatar
Chao Liu committed
53
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
54
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
55
56
57
58
59
60
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
61

Chao Liu's avatar
Chao Liu committed
62
    using ConvStrides   = Sequence<2, 2>;
63
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
64

Chao Liu's avatar
Chao Liu committed
65
66
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
67
#elif 1
Chao Liu's avatar
Chao Liu committed
68
    // 1x1, 8x8
69
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
103
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
127
#elif 1
Chao Liu's avatar
Chao Liu committed
128
129
130
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
131
132
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
133
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
134
135
136
137
138
139
140
141
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
142
#elif 0
Chao Liu's avatar
Chao Liu committed
143
144
145
146
147
148
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
149
150
151
152
153
154
155
156
157
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
158
159
160
161
162
163
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
164
165
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
166

Chao Liu's avatar
Chao Liu committed
167
    using ConvStrides   = Sequence<2, 2>;
168
169
    using ConvDilations = Sequence<1, 1>;

170
171
172
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
173
    // 3x3, 147x147
174
    constexpr index_t N  = 128;
175
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
176
177
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
178
    constexpr index_t K  = 128;
179
180
181
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
198
199
    using ConvDilations = Sequence<1, 1>;

200
201
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
202
#elif 0
Chao Liu's avatar
Chao Liu committed
203
204
205
206
207
208
209
210
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
211

Chao Liu's avatar
Chao Liu committed
212
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
213
214
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
215
216
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
217
#elif 0
Chao Liu's avatar
Chao Liu committed
218
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
219
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
220
221
222
223
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
224
225
226
227
228
229
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
230
231
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
232
#elif 0
Chao Liu's avatar
Chao Liu committed
233
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
234
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
235
236
237
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
238
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
239
240
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
241

Chao Liu's avatar
Chao Liu committed
242
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
243
244
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
245
246
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
247
#elif 0
Chao Liu's avatar
Chao Liu committed
248
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
249
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
250
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
251
252
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
253
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
254
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
255
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
256
257
258
259

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
260
261
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
262
#elif 0
Chao Liu's avatar
Chao Liu committed
263
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
264
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
265
266
267
268
269
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
270
271
272
273
274
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
290
291
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
292
#elif 0
Chao Liu's avatar
Chao Liu committed
293
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
294
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
295
296
297
298
299
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
300
301
302
303
304
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
320
321
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
322
#elif 0
Chao Liu's avatar
Chao Liu committed
323
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
324
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
325
326
327
328
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
329
330
331
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
332
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
333
334
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
335
336
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
337
#elif 0
Chao Liu's avatar
Chao Liu committed
338
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
339
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
340
341
342
343
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
344
345
346
347
348
349
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
350
351
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
352
#elif 0
Chao Liu's avatar
Chao Liu committed
353
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
354
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
355
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
356
357
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
358
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
359
360
361
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
362
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
363
364
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
365
366
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
367
#elif 1
Chao Liu's avatar
Chao Liu committed
368
369
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
370
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
371
372
373
374
375
376
377
378
379
380
381
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
382
#elif 1
Chao Liu's avatar
Chao Liu committed
383
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
384
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
385
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
386
387
388
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
389
390
391
392
393
394
395
396
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
397
#elif 0
Chao Liu's avatar
Chao Liu committed
398
399
400
401
402
403
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
404
405
406
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
407
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
408
409
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
410
411
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
412
#elif 0
Chao Liu's avatar
Chao Liu committed
413
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
414
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
434
435
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
436

Chao Liu's avatar
Chao Liu committed
437
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
438
439
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
440
441
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
442
#elif 0
Chao Liu's avatar
Chao Liu committed
443
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
444
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
445
446
447
448
449
450
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
451
452
453
454
455
456

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
457
#elif 1
Chao Liu's avatar
Chao Liu committed
458
    // 1x1, 7x7
459
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
460
    constexpr index_t C  = 512;
461
462
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
463
464
465
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
466
467
468
469

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
470
471
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
472
#elif 0
Chao Liu's avatar
Chao Liu committed
473
    // 3x3, 7x7
474
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
475
476
477
478
479
480
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
481
482
483
484

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
485
486
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
487
#elif 0
Chao Liu's avatar
Chao Liu committed
488
    // 1x1, 56x56
489
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
490
491
492
493
494
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
495
    constexpr index_t X  = 1;
496
497
498
499

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
500
501
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
502
#elif 0
Chao Liu's avatar
Chao Liu committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
517
#endif
Chao Liu's avatar
Chao Liu committed
518

Chao Liu's avatar
Chao Liu committed
519
520
521
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
522
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
523

Chao Liu's avatar
Chao Liu committed
524
525
526
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
527
528
529
530
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
531

Chao Liu's avatar
Chao Liu committed
532
#if 1
Chao Liu's avatar
Chao Liu committed
533
534
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
535
536
537
538
539
540
541
542
543
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
544

Chao Liu's avatar
Chao Liu committed
545
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
546

Chao Liu's avatar
Chao Liu committed
547
548
549
550
551
552
553
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
554
    index_t nrepeat      = atoi(argv[2]);
555
556
557

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
558
#if 0
559
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
560
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
561
562
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
563
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
564
#elif 0
565
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
566
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
567
#elif 1
568
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
569
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
570
#elif 0
571
572
573
574
575
576
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
577
#endif
578
    }
Chao Liu's avatar
Chao Liu committed
579

Chao Liu's avatar
Chao Liu committed
580
#if 0
581
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
582
583
584
585
586
587
588
589
590
591
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
592
#elif 0
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
616
#endif
Chao Liu's avatar
Chao Liu committed
617

618
    if(do_verification)
619
    {
Chao Liu's avatar
Chao Liu committed
620
#if 0
621
622
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
623
        {
Chao Liu's avatar
Chao Liu committed
624
625
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
626
627
        }
        else
Chao Liu's avatar
Chao Liu committed
628
#endif
629
        {
630
631
632
633
634
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
635
636
                                    LeftPads{},
                                    RightPads{});
637
638
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
639

Chao Liu's avatar
Chao Liu committed
640
#if 0
641
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
642
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
643
644
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
645
#endif
646
    }
647
}