conv_driver.cpp 18.4 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
8
9
#include "print_array.hpp"
#include "print_sequence.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
12
#include "conv_common.hpp"
13
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
14
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
15
16
#include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
17

Chao Liu's avatar
Chao Liu committed
18
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
19
{
Chao Liu's avatar
Chao Liu committed
20
21
    using namespace ck;

Chao Liu's avatar
Chao Liu committed
22
23
24
25
26
27
#if 0
    // 1x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 1024;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
28
    constexpr index_t K  = 256;
29
30
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
31
32

    using ConvStrides   = Sequence<1, 1>;
33
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
34

Chao Liu's avatar
Chao Liu committed
35
36
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
37
#elif 0
Chao Liu's avatar
Chao Liu committed
38
    // 1x1, 8x8
39
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
101
102
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
Chao Liu's avatar
Chao Liu committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    constexpr index_t K  = 128;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 1
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
119
120
121
122
123
124
125
126
127
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
128
129
130
131
132
133
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
134
135
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
136

Chao Liu's avatar
Chao Liu committed
137
    using ConvStrides   = Sequence<2, 2>;
138
139
    using ConvDilations = Sequence<1, 1>;

140
141
142
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
143
144
    // 3x3, 147x147
    // v4r4@v100 xx.xx%, cudnn@v100 xx.xx%
145
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
146
147
148
149
    constexpr index_t C  = 32;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 64;
150
151
152
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    // v4r4@v100 xx.xx%, cudnn@v100 xx.xx%
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
170
171
    using ConvDilations = Sequence<1, 1>;

172
173
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
174
#elif 0
Chao Liu's avatar
Chao Liu committed
175
176
177
178
179
180
181
182
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
183

Chao Liu's avatar
Chao Liu committed
184
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
185
186
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
187
188
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
189
#elif 0
Chao Liu's avatar
Chao Liu committed
190
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
191
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
192
193
194
195
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
196
197
198
199
200
201
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
202
203
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
204
205
#elif 1
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
206
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
207
208
209
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
210
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
211
212
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
213

Chao Liu's avatar
Chao Liu committed
214
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
215
216
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
217
218
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
219
#elif 0
Chao Liu's avatar
Chao Liu committed
220
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
221
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
222
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
223
224
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
225
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
226
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
227
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
228
229
230
231

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
232
233
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
234
#elif 0
Chao Liu's avatar
Chao Liu committed
235
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
236
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
237
238
239
240
241
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
242
243
244
245
246
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
247
248
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
Chao Liu's avatar
Chao Liu committed
249
#elif 0
Chao Liu's avatar
Chao Liu committed
250
251
252
    // 3x1, 8x8
    constexpr index_t N  = 128;
    constexpr index_t C  = 448;
Chao Liu's avatar
Chao Liu committed
253
254
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
255
256
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
257
258
259
260
261
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
277
278
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
279
#elif 0
Chao Liu's avatar
Chao Liu committed
280
281
    // 7x1, 73x73
    // v44@v100 xx.xx%, cudnn@v100 xx.xx%
Chao Liu's avatar
Chao Liu committed
282
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
283
284
285
286
287
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
288
289
290
291
292
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
308
309
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
310
#elif 0
Chao Liu's avatar
Chao Liu committed
311
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
312
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
313
314
315
316
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
317
318
319
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
320
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
321
322
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
323
324
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
325
#elif 0
Chao Liu's avatar
Chao Liu committed
326
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
327
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
328
329
330
331
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
332
333
334
335
336
337
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
338
339
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
340
#elif 0
Chao Liu's avatar
Chao Liu committed
341
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
342
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
343
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
344
345
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
346
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
347
348
349
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
350
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
351
352
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
353
354
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
355
#elif 0
Chao Liu's avatar
Chao Liu committed
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
    // 3x3, 28x28
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 1
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
372
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
373
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
374
375
376
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 1
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
392
393
394
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
395
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
396
397
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
398
399
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
400
#elif 0
Chao Liu's avatar
Chao Liu committed
401
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
402
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
422
423
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
424

Chao Liu's avatar
Chao Liu committed
425
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
426
427
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
428
429
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
430
#elif 0
Chao Liu's avatar
Chao Liu committed
431
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
432
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
433
434
435
436
437
438
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
439
440
441
442
443
444

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
445
#elif 0
Chao Liu's avatar
Chao Liu committed
446
    // 1x1, 7x7
447
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
448
    constexpr index_t C  = 512;
449
450
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
451
452
453
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
454
455
456
457

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
458
459
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
460
#elif 0
Chao Liu's avatar
Chao Liu committed
461
    // 3x3, 7x7
462
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
463
464
465
466
467
468
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
469
470
471
472

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
473
474
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
475
#elif 1
Chao Liu's avatar
Chao Liu committed
476
    // 1x1, 56x56
477
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
478
479
480
481
482
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
483
    constexpr index_t X  = 1;
484
485
486
487

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 1
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
505
#endif
Chao Liu's avatar
Chao Liu committed
506

Chao Liu's avatar
Chao Liu committed
507
508
509
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
510
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
511

Chao Liu's avatar
Chao Liu committed
512
513
514
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
515
516
517
518
    print_sequence("LeftPads", LeftPads{});
    print_sequence("RightPads", RightPads{});
    print_sequence("ConvStrides", ConvStrides{});
    print_sequence("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
519

Chao Liu's avatar
Chao Liu committed
520
#if 1
Chao Liu's avatar
Chao Liu committed
521
522
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
523
524
525
526
527
528
529
530
531
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
532

Chao Liu's avatar
Chao Liu committed
533
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
534

Chao Liu's avatar
Chao Liu committed
535
536
537
538
539
540
541
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
542
    index_t nrepeat      = atoi(argv[2]);
543
544
545

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
546
#if 0
547
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
548
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
549
550
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
551
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
552
553
554
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
555
#elif 1
556
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
557
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
558
#elif 0
559
560
561
562
563
564
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
565
#endif
566
    }
Chao Liu's avatar
Chao Liu committed
567

Chao Liu's avatar
Chao Liu committed
568
#if 1
Chao Liu's avatar
Chao Liu committed
569
570
571
572
573
574
575
576
    device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
                                                         ConvStrides{},
                                                         ConvDilations{},
577
578
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
579
                                                         nrepeat);
Chao Liu's avatar
Chao Liu committed
580
#elif 1
Chao Liu's avatar
Chao Liu committed
581
582
583
584
585
586
    device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
587
588
                                                         ConvStrides{},
                                                         ConvDilations{},
589
590
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
591
                                                         nrepeat);
592
#endif
Chao Liu's avatar
Chao Liu committed
593

594
    if(do_verification)
595
    {
Chao Liu's avatar
Chao Liu committed
596
#if 0
597
598
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
599
        {
Chao Liu's avatar
Chao Liu committed
600
601
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
602
603
        }
        else
Chao Liu's avatar
Chao Liu committed
604
#endif
605
        {
606
607
608
609
610
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
611
612
                                    LeftPads{},
                                    RightPads{});
613
614
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
615

Chao Liu's avatar
Chao Liu committed
616
#if 0
617
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
618
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
619
620
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
621
#endif
622
    }
623
}