conv_driver.cpp 21.3 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
16
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
17
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk.hpp"
18

Chao Liu's avatar
Chao Liu committed
19
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
20
{
Chao Liu's avatar
Chao Liu committed
21
22
    using namespace ck;

23
#if 0
Chao Liu's avatar
Chao Liu committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
    constexpr index_t N  = 1;
    constexpr index_t C  = 32;
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
38
39
40
41
42
43
44
45
46
47
48
49
50
51
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
52
#elif 0
53
54
55
56
57
58
59
60
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
61

62
    using ConvStrides   = Sequence<2, 2>;
63
64
65
66
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
67
#elif 1
Chao Liu's avatar
Chao Liu committed
68
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
69
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
70
71
72
73
74
75
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
76

Chao Liu's avatar
Chao Liu committed
77
    using ConvStrides   = Sequence<2, 2>;
78
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
79

Chao Liu's avatar
Chao Liu committed
80
81
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
82
#elif 1
Chao Liu's avatar
Chao Liu committed
83
    // 1x1, 8x8
84
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
118
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
142
#elif 1
Chao Liu's avatar
Chao Liu committed
143
144
145
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
146
147
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
148
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
149
150
151
152
153
154
155
156
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
157
#elif 0
Chao Liu's avatar
Chao Liu committed
158
159
160
161
162
163
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
164
165
166
167
168
169
170
171
172
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
173
174
175
176
177
178
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
179
180
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
181

Chao Liu's avatar
Chao Liu committed
182
    using ConvStrides   = Sequence<2, 2>;
183
184
    using ConvDilations = Sequence<1, 1>;

185
186
187
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
188
    // 3x3, 147x147
189
    constexpr index_t N  = 128;
190
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
191
192
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
193
    constexpr index_t K  = 128;
194
195
196
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
213
214
    using ConvDilations = Sequence<1, 1>;

215
216
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
217
#elif 0
Chao Liu's avatar
Chao Liu committed
218
219
220
221
222
223
224
225
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
226

Chao Liu's avatar
Chao Liu committed
227
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
228
229
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
230
231
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
232
#elif 0
Chao Liu's avatar
Chao Liu committed
233
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
234
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
235
236
237
238
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
239
240
241
242
243
244
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
245
246
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
247
#elif 0
Chao Liu's avatar
Chao Liu committed
248
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
249
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
250
251
252
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
253
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
254
255
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
256

Chao Liu's avatar
Chao Liu committed
257
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
258
259
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
260
261
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
262
#elif 0
Chao Liu's avatar
Chao Liu committed
263
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
264
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
265
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
266
267
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
268
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
269
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
270
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
271
272
273
274

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
275
276
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
277
#elif 0
Chao Liu's avatar
Chao Liu committed
278
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
279
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
280
281
282
283
284
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
285
286
287
288
289
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
305
306
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
307
#elif 0
Chao Liu's avatar
Chao Liu committed
308
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
309
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
310
311
312
313
314
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
315
316
317
318
319
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
335
336
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
337
#elif 0
Chao Liu's avatar
Chao Liu committed
338
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
339
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
340
341
342
343
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
344
345
346
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
347
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
348
349
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
350
351
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
352
#elif 0
Chao Liu's avatar
Chao Liu committed
353
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
354
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
355
356
357
358
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
359
360
361
362
363
364
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
365
366
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
367
#elif 0
Chao Liu's avatar
Chao Liu committed
368
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
369
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
370
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
371
372
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
373
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
374
375
376
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
377
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
378
379
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
380
381
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
382
#elif 1
Chao Liu's avatar
Chao Liu committed
383
384
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
385
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
386
387
388
389
390
391
392
393
394
395
396
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
397
#elif 1
Chao Liu's avatar
Chao Liu committed
398
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
399
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
400
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
401
402
403
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
404
405
406
407
408
409
410
411
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
412
#elif 0
Chao Liu's avatar
Chao Liu committed
413
414
415
416
417
418
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
419
420
421
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
422
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
423
424
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
425
426
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
427
#elif 0
Chao Liu's avatar
Chao Liu committed
428
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
429
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
449
450
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
451

Chao Liu's avatar
Chao Liu committed
452
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
453
454
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
455
456
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
457
#elif 0
Chao Liu's avatar
Chao Liu committed
458
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
459
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
460
461
462
463
464
465
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
466
467
468
469
470
471

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
472
#elif 1
Chao Liu's avatar
Chao Liu committed
473
    // 1x1, 7x7
474
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
475
    constexpr index_t C  = 512;
476
477
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
478
479
480
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
481
482
483
484

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
485
486
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
487
#elif 0
Chao Liu's avatar
Chao Liu committed
488
    // 3x3, 7x7
489
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
490
491
492
493
494
495
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
496
497
498
499

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
500
501
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
502
#elif 0
Chao Liu's avatar
Chao Liu committed
503
    // 1x1, 56x56
504
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
505
506
507
508
509
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
510
    constexpr index_t X  = 1;
511
512
513
514

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
515
516
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
517
#elif 0
Chao Liu's avatar
Chao Liu committed
518
519
520
521
522
523
524
525
526
527
528
529
530
531
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
532
#endif
Chao Liu's avatar
Chao Liu committed
533

Chao Liu's avatar
Chao Liu committed
534
535
536
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
537
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
538

Chao Liu's avatar
Chao Liu committed
539
540
541
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
542
543
544
545
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
546

Chao Liu's avatar
Chao Liu committed
547
#if 1
Chao Liu's avatar
Chao Liu committed
548
549
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
550
551
552
553
554
555
556
557
558
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
559

Chao Liu's avatar
Chao Liu committed
560
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
561

Chao Liu's avatar
Chao Liu committed
562
563
564
565
566
567
568
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
569
    index_t nrepeat      = atoi(argv[2]);
570
571
572

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
573
#if 0
574
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
575
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
576
577
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
578
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
579
#elif 0
580
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
581
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
582
#elif 1
583
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
584
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
585
#elif 0
586
587
588
589
590
591
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
592
#endif
593
    }
Chao Liu's avatar
Chao Liu committed
594

Chao Liu's avatar
Chao Liu committed
595
#if 0
596
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
597
598
599
600
601
602
603
604
605
606
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
607
#elif 0
608
609
610
611
612
613
614
615
616
617
618
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
Chao Liu's avatar
Chao Liu committed
619
#elif 1
620
621
622
623
624
625
626
627
628
629
630
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
631
632
633
634
635
636
637
638
639
640
641
642
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
643
#endif
Chao Liu's avatar
Chao Liu committed
644

645
    if(do_verification)
646
    {
Chao Liu's avatar
Chao Liu committed
647
#if 0
648
649
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
650
        {
Chao Liu's avatar
Chao Liu committed
651
652
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
653
654
        }
        else
Chao Liu's avatar
Chao Liu committed
655
#endif
656
        {
657
658
659
660
661
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
662
663
                                    LeftPads{},
                                    RightPads{});
664
665
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
666

Chao Liu's avatar
Chao Liu committed
667
#if 0
668
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
669
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
670
671
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
672
#endif
673
    }
674
}