"driver/vscode:/vscode.git/clone" did not exist on "1c4ef23cff46f627ea22c8e2afc68218017f2523"
conv_driver.cpp 18.4 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
8
9
#include "print_array.hpp"
#include "print_sequence.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
12
#include "conv_common.hpp"
13
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
14
#include "device_tensor.hpp"
Chao Liu's avatar
Chao Liu committed
15
16
#include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "device_dummy_transform.hpp"
18

Chao Liu's avatar
Chao Liu committed
19
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
20
{
Chao Liu's avatar
Chao Liu committed
21
22
    using namespace ck;

Chao Liu's avatar
Chao Liu committed
23
#if 0
Chao Liu's avatar
Chao Liu committed
24
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
25
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
26
27
28
29
30
31
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
32

Chao Liu's avatar
Chao Liu committed
33
    using ConvStrides   = Sequence<2, 2>;
34
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
35

Chao Liu's avatar
Chao Liu committed
36
37
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
38
#elif 0
Chao Liu's avatar
Chao Liu committed
39
    // 1x1, 8x8
40
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
102
103
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
Chao Liu's avatar
Chao Liu committed
104
105
106
107
108
109
110
111
112
    constexpr index_t K  = 128;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
Chao Liu's avatar
Chao Liu committed
113
#elif 0
Chao Liu's avatar
Chao Liu committed
114
115
116
117
118
119
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
120
121
122
123
124
125
126
127
128
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
129
130
131
132
133
134
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
135
136
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
137

Chao Liu's avatar
Chao Liu committed
138
    using ConvStrides   = Sequence<2, 2>;
139
140
    using ConvDilations = Sequence<1, 1>;

141
142
143
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
144
    // 3x3, 147x147
145
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
146
147
148
149
    constexpr index_t C  = 32;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 64;
150
151
152
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
169
170
    using ConvDilations = Sequence<1, 1>;

171
172
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
173
#elif 0
Chao Liu's avatar
Chao Liu committed
174
175
176
177
178
179
180
181
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
182

Chao Liu's avatar
Chao Liu committed
183
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
184
185
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
186
187
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
188
#elif 0
Chao Liu's avatar
Chao Liu committed
189
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
190
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
191
192
193
194
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
195
196
197
198
199
200
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
201
202
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
203
#elif 0
Chao Liu's avatar
Chao Liu committed
204
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
205
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
206
207
208
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
209
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
210
211
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
212

Chao Liu's avatar
Chao Liu committed
213
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
214
215
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
216
217
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
218
#elif 0
Chao Liu's avatar
Chao Liu committed
219
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
220
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
221
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
222
223
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
224
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
225
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
226
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
227
228
229
230

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
231
232
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
233
#elif 0
Chao Liu's avatar
Chao Liu committed
234
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
235
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
236
237
238
239
240
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
241
242
243
244
245
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
261
262
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
263
#elif 0
Chao Liu's avatar
Chao Liu committed
264
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
265
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
266
267
268
269
270
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
271
272
273
274
275
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
291
292
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
293
#elif 0
Chao Liu's avatar
Chao Liu committed
294
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
295
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
296
297
298
299
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
300
301
302
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
303
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
304
305
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
306
307
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
308
#elif 0
Chao Liu's avatar
Chao Liu committed
309
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
310
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
311
312
313
314
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
315
316
317
318
319
320
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
321
322
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
323
#elif 0
Chao Liu's avatar
Chao Liu committed
324
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
325
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
326
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
327
328
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
329
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
330
331
332
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
333
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
334
335
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
336
337
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
338
#elif 1
Chao Liu's avatar
Chao Liu committed
339
340
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
341
    constexpr index_t C  = 192;
Chao Liu's avatar
Chao Liu committed
342
343
344
345
346
347
348
349
350
351
352
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
353
#elif 0
Chao Liu's avatar
Chao Liu committed
354
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
355
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
356
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
357
358
359
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
360
361
362
363
364
365
366
367
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
368
#elif 0
Chao Liu's avatar
Chao Liu committed
369
370
371
372
373
374
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
375
376
377
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
378
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
379
380
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
381
382
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
383
#elif 0
Chao Liu's avatar
Chao Liu committed
384
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
385
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
405
406
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
407

Chao Liu's avatar
Chao Liu committed
408
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
409
410
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
411
412
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
413
#elif 0
Chao Liu's avatar
Chao Liu committed
414
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
415
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
416
417
418
419
420
421
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
422
423
424
425
426
427

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
428
#elif 0
Chao Liu's avatar
Chao Liu committed
429
    // 1x1, 7x7
430
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
431
    constexpr index_t C  = 512;
432
433
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
434
435
436
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
437
438
439
440

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
441
442
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
443
#elif 0
Chao Liu's avatar
Chao Liu committed
444
    // 3x3, 7x7
445
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
446
447
448
449
450
451
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
452
453
454
455

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
456
457
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
458
#elif 0
Chao Liu's avatar
Chao Liu committed
459
    // 1x1, 56x56
460
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
461
462
463
464
465
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
466
    constexpr index_t X  = 1;
467
468
469
470

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
471
472
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
473
#elif 0
Chao Liu's avatar
Chao Liu committed
474
475
476
477
478
479
480
481
482
483
484
485
486
487
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
488
#endif
Chao Liu's avatar
Chao Liu committed
489

Chao Liu's avatar
Chao Liu committed
490
491
492
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
493
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
494

Chao Liu's avatar
Chao Liu committed
495
496
497
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
498
499
500
501
    print_sequence("LeftPads", LeftPads{});
    print_sequence("RightPads", RightPads{});
    print_sequence("ConvStrides", ConvStrides{});
    print_sequence("ConvDilations", ConvDilations{});
Chao Liu's avatar
Chao Liu committed
502

Chao Liu's avatar
Chao Liu committed
503
#if 1
Chao Liu's avatar
Chao Liu committed
504
505
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
506
507
508
509
510
511
512
513
514
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
515

Chao Liu's avatar
Chao Liu committed
516
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
517

Chao Liu's avatar
Chao Liu committed
518
519
520
521
522
523
524
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
525
    index_t nrepeat      = atoi(argv[2]);
526
527
528

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
529
#if 0
530
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
531
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
532
533
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
534
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
535
536
537
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
538
#elif 1
539
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
540
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
541
#elif 0
542
543
544
545
546
547
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
548
#endif
549
    }
Chao Liu's avatar
Chao Liu committed
550

Chao Liu's avatar
Chao Liu committed
551
#if 0
Chao Liu's avatar
Chao Liu committed
552
553
554
555
556
557
558
559
    device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
                                                         ConvStrides{},
                                                         ConvDilations{},
560
561
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
562
                                                         nrepeat);
Chao Liu's avatar
Chao Liu committed
563
#elif 0
Chao Liu's avatar
Chao Liu committed
564
565
566
567
568
569
    device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
Chao Liu's avatar
Chao Liu committed
570
571
                                                         ConvStrides{},
                                                         ConvDilations{},
572
573
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
574
                                                         nrepeat);
Chao Liu's avatar
Chao Liu committed
575
576
577
578
579
580
581
582
583
584
585
586
#elif 1
    device_dummy_transform(in_nchw_desc,
                           in_nchw,
                           wei_kcyx_desc,
                           wei_kcyx,
                           out_nkhw_desc,
                           out_nkhw_device,
                           ConvStrides{},
                           ConvDilations{},
                           LeftPads{},
                           RightPads{},
                           nrepeat);
587
#endif
Chao Liu's avatar
Chao Liu committed
588

589
    if(do_verification)
590
    {
Chao Liu's avatar
Chao Liu committed
591
#if 0
592
593
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
594
        {
Chao Liu's avatar
Chao Liu committed
595
596
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
597
598
        }
        else
Chao Liu's avatar
Chao Liu committed
599
#endif
600
        {
601
602
603
604
605
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
606
607
                                    LeftPads{},
                                    RightPads{});
608
609
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
610

Chao Liu's avatar
Chao Liu committed
611
#if 0
612
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
613
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
614
615
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
616
#endif
617
    }
618
}