conv_driver.cpp 21.2 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
16
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
Chao Liu's avatar
Chao Liu committed
17
#include "device_dummy_static_transform.hpp"
Chao Liu's avatar
Chao Liu committed
18
#include "device_dummy_dynamic_transform_v1.hpp"
Chao Liu's avatar
fix  
Chao Liu committed
19
#include "device_dummy_dynamic_transform.hpp"
20

Chao Liu's avatar
Chao Liu committed
21
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
22
{
Chao Liu's avatar
Chao Liu committed
23
24
    using namespace ck;

Chao Liu's avatar
Chao Liu committed
25
#if 0
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    // 1x1, 8x8
    constexpr index_t N  = 2;
    constexpr index_t C  = 24;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
41
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
42
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
43
44
45
46
47
48
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
49

Chao Liu's avatar
Chao Liu committed
50
    using ConvStrides   = Sequence<2, 2>;
51
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
52

Chao Liu's avatar
Chao Liu committed
53
54
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
55
#elif 0
Chao Liu's avatar
Chao Liu committed
56
    // 1x1, 8x8
57
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
119
120
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
Chao Liu's avatar
Chao Liu committed
121
122
123
124
125
126
127
128
129
    constexpr index_t K  = 128;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
Chao Liu's avatar
Chao Liu committed
130
#elif 1
Chao Liu's avatar
Chao Liu committed
131
132
133
134
135
136
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
137
138
139
140
141
142
143
144
145
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
146
147
148
149
150
151
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
152
153
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
154

Chao Liu's avatar
Chao Liu committed
155
    using ConvStrides   = Sequence<2, 2>;
156
157
    using ConvDilations = Sequence<1, 1>;

158
159
160
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
161
    // 3x3, 147x147
162
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
163
164
165
166
    constexpr index_t C  = 32;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 64;
167
168
169
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
186
187
    using ConvDilations = Sequence<1, 1>;

188
189
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
190
#elif 0
Chao Liu's avatar
Chao Liu committed
191
192
193
194
195
196
197
198
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
199

Chao Liu's avatar
Chao Liu committed
200
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
201
202
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
203
204
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
205
#elif 0
Chao Liu's avatar
Chao Liu committed
206
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
207
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
208
209
210
211
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
212
213
214
215
216
217
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
218
219
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
220
#elif 0
Chao Liu's avatar
Chao Liu committed
221
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
222
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
223
224
225
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
226
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
227
228
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
229

Chao Liu's avatar
Chao Liu committed
230
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
231
232
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
233
234
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
235
#elif 0
Chao Liu's avatar
Chao Liu committed
236
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
237
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
238
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
239
240
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
241
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
242
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
243
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
244
245
246
247

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
248
249
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
250
#elif 0
Chao Liu's avatar
Chao Liu committed
251
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
252
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
253
254
255
256
257
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
258
259
260
261
262
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
278
279
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
280
#elif 0
Chao Liu's avatar
Chao Liu committed
281
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
282
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
283
284
285
286
287
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
288
289
290
291
292
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
308
309
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
310
#elif 0
Chao Liu's avatar
Chao Liu committed
311
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
312
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
313
314
315
316
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
317
318
319
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
320
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
321
322
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
323
324
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
325
#elif 0
Chao Liu's avatar
Chao Liu committed
326
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
327
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
328
329
330
331
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
332
333
334
335
336
337
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
338
339
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
340
#elif 0
Chao Liu's avatar
Chao Liu committed
341
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
342
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
343
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
344
345
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
346
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
347
348
349
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
350
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
351
352
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
353
354
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
355
#elif 1
Chao Liu's avatar
Chao Liu committed
356
357
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
358
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
359
360
361
362
363
364
365
366
367
368
369
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
370
#elif 0
Chao Liu's avatar
Chao Liu committed
371
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
372
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
373
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
374
375
376
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
377
378
379
380
381
382
383
384
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
385
#elif 0
Chao Liu's avatar
Chao Liu committed
386
387
388
389
390
391
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
392
393
394
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
395
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
396
397
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
398
399
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
400
#elif 0
Chao Liu's avatar
Chao Liu committed
401
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
402
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
422
423
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
424

Chao Liu's avatar
Chao Liu committed
425
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
426
427
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
428
429
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
430
#elif 0
Chao Liu's avatar
Chao Liu committed
431
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
432
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
433
434
435
436
437
438
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
439
440
441
442
443
444

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
445
#elif 0
Chao Liu's avatar
Chao Liu committed
446
    // 1x1, 7x7
447
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
448
    constexpr index_t C  = 512;
449
450
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
451
452
453
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
454
455
456
457

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
458
459
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
460
#elif 0
Chao Liu's avatar
Chao Liu committed
461
    // 3x3, 7x7
462
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
463
464
465
466
467
468
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
469
470
471
472

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
473
474
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
475
#elif 0
Chao Liu's avatar
Chao Liu committed
476
    // 1x1, 56x56
477
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
478
479
480
481
482
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
483
    constexpr index_t X  = 1;
484
485
486
487

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
488
489
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
490
#elif 0
Chao Liu's avatar
Chao Liu committed
491
492
493
494
495
496
497
498
499
500
501
502
503
504
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
505
#endif
Chao Liu's avatar
Chao Liu committed
506

Chao Liu's avatar
Chao Liu committed
507
508
509
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
510
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
511

Chao Liu's avatar
Chao Liu committed
512
513
514
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
515
516
517
518
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
519

Chao Liu's avatar
Chao Liu committed
520
#if 1
Chao Liu's avatar
Chao Liu committed
521
522
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
523
524
525
526
527
528
529
530
531
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
532

Chao Liu's avatar
Chao Liu committed
533
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
534

Chao Liu's avatar
Chao Liu committed
535
536
537
538
539
540
541
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
542
    index_t nrepeat      = atoi(argv[2]);
543
544
545

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
546
#if 0
547
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
548
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
549
550
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
551
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
552
553
554
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
555
#elif 1
556
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
557
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
558
#elif 0
559
560
561
562
563
564
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
565
#endif
566
    }
Chao Liu's avatar
Chao Liu committed
567

Chao Liu's avatar
Chao Liu committed
568
#if 0
569
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
Chao Liu's avatar
Chao Liu committed
570
571
572
573
574
575
576
                                                         in_nchw,
                                                         wei_kcyx_desc,
                                                         wei_kcyx,
                                                         out_nkhw_desc,
                                                         out_nkhw_device,
                                                         ConvStrides{},
                                                         ConvDilations{},
577
578
                                                         LeftPads{},
                                                         RightPads{},
Chao Liu's avatar
Chao Liu committed
579
                                                         nrepeat);
580
#elif 1
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
604
#elif 0
Chao Liu's avatar
Chao Liu committed
605
606
607
608
609
610
611
612
613
614
615
    device_dummy_static_transform(in_nchw_desc,
                                  in_nchw,
                                  wei_kcyx_desc,
                                  wei_kcyx,
                                  out_nkhw_desc,
                                  out_nkhw_device,
                                  ConvStrides{},
                                  ConvDilations{},
                                  LeftPads{},
                                  RightPads{},
                                  nrepeat);
Chao Liu's avatar
Chao Liu committed
616
#elif 0
Chao Liu's avatar
Chao Liu committed
617
618
619
620
621
622
623
624
625
626
627
    device_dummy_dynamic_transform_v1(in_nchw_desc,
                                      in_nchw,
                                      wei_kcyx_desc,
                                      wei_kcyx,
                                      out_nkhw_desc,
                                      out_nkhw_device,
                                      ConvStrides{},
                                      ConvDilations{},
                                      LeftPads{},
                                      RightPads{},
                                      nrepeat);
Chao Liu's avatar
Chao Liu committed
628
#elif 1
Chao Liu's avatar
Chao Liu committed
629
630
631
632
633
634
635
636
637
638
639
    device_dummy_dynamic_transform(in_nchw_desc,
                                   in_nchw,
                                   wei_kcyx_desc,
                                   wei_kcyx,
                                   out_nkhw_desc,
                                   out_nkhw_device,
                                   ConvStrides{},
                                   ConvDilations{},
                                   LeftPads{},
                                   RightPads{},
                                   nrepeat);
640
#endif
Chao Liu's avatar
Chao Liu committed
641

642
    if(do_verification)
643
    {
Chao Liu's avatar
Chao Liu committed
644
#if 0
645
646
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
647
        {
Chao Liu's avatar
Chao Liu committed
648
649
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
650
651
        }
        else
Chao Liu's avatar
Chao Liu committed
652
#endif
653
        {
654
655
656
657
658
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
659
660
                                    LeftPads{},
                                    RightPads{});
661
662
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
663

Chao Liu's avatar
Chao Liu committed
664
#if 0
665
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
666
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
667
668
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
669
#endif
670
    }
671
}