conv_driver.cpp 20.3 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#include <iostream>
Chao Liu's avatar
Chao Liu committed
2
3
#include <numeric>
#include <initializer_list>
Chao Liu's avatar
Chao Liu committed
4
#include <cstdlib>
Chao Liu's avatar
Chao Liu committed
5
#include <stdlib.h>
Chao Liu's avatar
Chao Liu committed
6
#include <half.hpp>
Chao Liu's avatar
Chao Liu committed
7
#include "config.hpp"
Chao Liu's avatar
Chao Liu committed
8
#include "print.hpp"
Chao Liu's avatar
Chao Liu committed
9
#include "device.hpp"
Chao Liu's avatar
Chao Liu committed
10
#include "host_tensor_generator.hpp"
Chao Liu's avatar
Chao Liu committed
11
#include "conv_common.hpp"
12
#include "host_conv.hpp"
Chao Liu's avatar
Chao Liu committed
13
#include "device_tensor.hpp"
14
15
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
16
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
17

Chao Liu's avatar
Chao Liu committed
18
int main(int argc, char* argv[])
Chao Liu's avatar
Chao Liu committed
19
{
Chao Liu's avatar
Chao Liu committed
20
21
    using namespace ck;

22
#if 0
Chao Liu's avatar
Chao Liu committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
    constexpr index_t N  = 1;
    constexpr index_t C  = 32;
    constexpr index_t HI = 540;
    constexpr index_t WI = 960;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
37
38
39
40
41
42
43
44
45
46
47
48
49
50
    // 3x3, 36x36, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 37;
    constexpr index_t WI = 37;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
51
#elif 0
52
53
54
55
56
57
58
59
    // 3x3, 35x35, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 384;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
60

61
    using ConvStrides   = Sequence<2, 2>;
62
63
64
65
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
66
#elif 1
Chao Liu's avatar
Chao Liu committed
67
    // 3x3, 71x71
Chao Liu's avatar
Chao Liu committed
68
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
69
70
71
72
73
74
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
75

Chao Liu's avatar
Chao Liu committed
76
    using ConvStrides   = Sequence<2, 2>;
77
    using ConvDilations = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
78

Chao Liu's avatar
Chao Liu committed
79
80
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
81
#elif 1
Chao Liu's avatar
Chao Liu committed
82
    // 1x1, 8x8
83
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
    constexpr index_t C  = 1536;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 160;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 3x3, 35x35
    constexpr index_t N  = 128;
    constexpr index_t C  = 96;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
117
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 71;
    constexpr index_t WI = 71;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
141
#elif 1
Chao Liu's avatar
Chao Liu committed
142
143
144
    // 7x1, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
145
146
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
147
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
148
149
150
151
152
153
154
155
    constexpr index_t Y  = 7;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
156
#elif 0
Chao Liu's avatar
Chao Liu committed
157
158
159
160
161
162
    // 1x7, 17x17
    constexpr index_t N  = 128;
    constexpr index_t C  = 128;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 128;
163
164
165
166
167
168
169
170
171
    constexpr index_t Y  = 1;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 3>;
    using RightPads = Sequence<0, 3>;
#elif 0
Chao Liu's avatar
Chao Liu committed
172
173
174
175
176
177
    // 3x3, 299x299 stride=2
    constexpr index_t N  = 128;
    constexpr index_t C  = 3;
    constexpr index_t HI = 299;
    constexpr index_t WI = 299;
    constexpr index_t K  = 32;
178
179
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
180

Chao Liu's avatar
Chao Liu committed
181
    using ConvStrides   = Sequence<2, 2>;
182
183
    using ConvDilations = Sequence<1, 1>;

184
185
186
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
Chao Liu's avatar
Chao Liu committed
187
    // 3x3, 147x147
188
    constexpr index_t N  = 128;
189
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
190
191
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
192
    constexpr index_t K  = 128;
193
194
195
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

Chao Liu's avatar
Chao Liu committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
#elif 0
    // 3x3, 149x149
    constexpr index_t N  = 128;
    constexpr index_t C  = 32;
    constexpr index_t HI = 149;
    constexpr index_t WI = 149;
    constexpr index_t K  = 32;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
212
213
    using ConvDilations = Sequence<1, 1>;

214
215
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
216
#elif 0
Chao Liu's avatar
Chao Liu committed
217
218
219
220
221
222
223
224
    // 3x3, 17x17, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
    constexpr index_t HI = 17;
    constexpr index_t WI = 17;
    constexpr index_t K  = 192;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
225

Chao Liu's avatar
Chao Liu committed
226
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
227
228
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
229
230
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
231
#elif 0
Chao Liu's avatar
Chao Liu committed
232
    // 1x1, 35x35
Chao Liu's avatar
Chao Liu committed
233
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
234
235
236
237
    constexpr index_t C  = 384;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
    constexpr index_t K  = 96;
Chao Liu's avatar
Chao Liu committed
238
239
240
241
242
243
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
244
245
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
246
#elif 0
Chao Liu's avatar
Chao Liu committed
247
    // 3x3, 35x35, stride 2
Chao Liu's avatar
Chao Liu committed
248
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
249
250
251
    constexpr index_t C  = 288;
    constexpr index_t HI = 35;
    constexpr index_t WI = 35;
Chao Liu's avatar
Chao Liu committed
252
    constexpr index_t K  = 384;
Chao Liu's avatar
Chao Liu committed
253
254
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
255

Chao Liu's avatar
Chao Liu committed
256
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
257
258
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
259
260
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
261
#elif 0
Chao Liu's avatar
Chao Liu committed
262
    // 1x3, 8x8
Chao Liu's avatar
Chao Liu committed
263
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
264
    constexpr index_t C  = 384;
Chao Liu's avatar
Chao Liu committed
265
266
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
Chao Liu's avatar
Chao Liu committed
267
    constexpr index_t K  = 448;
Chao Liu's avatar
Chao Liu committed
268
    constexpr index_t Y  = 1;
Chao Liu's avatar
Chao Liu committed
269
    constexpr index_t X  = 3;
Chao Liu's avatar
Chao Liu committed
270
271
272
273

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
274
275
    using LeftPads  = Sequence<0, 1>;
    using RightPads = Sequence<0, 1>;
Chao Liu's avatar
Chao Liu committed
276
#elif 0
Chao Liu's avatar
Chao Liu committed
277
    // 3x1, 8x8
Chao Liu's avatar
Chao Liu committed
278
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
279
280
281
282
283
    constexpr index_t C  = 448;
    constexpr index_t HI = 8;
    constexpr index_t WI = 8;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
Chao Liu's avatar
Chao Liu committed
284
285
286
287
288
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
    using LeftPads  = Sequence<1, 0>;
    using RightPads = Sequence<1, 0>;
#elif 0
    // 3x3, 147x147
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 147;
    constexpr index_t WI = 147;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
304
305
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
306
#elif 0
Chao Liu's avatar
Chao Liu committed
307
    // 7x1, 73x73
Chao Liu's avatar
Chao Liu committed
308
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
309
310
311
312
313
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
Chao Liu's avatar
Chao Liu committed
314
315
316
317
318
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
    using LeftPads  = Sequence<3, 0>;
    using RightPads = Sequence<3, 0>;
#elif 0
    // 3x3, 73x73
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 73;
    constexpr index_t WI = 73;
    constexpr index_t K  = 96;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
334
335
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
336
#elif 0
Chao Liu's avatar
Chao Liu committed
337
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
338
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
339
340
341
342
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 2048;
Chao Liu's avatar
Chao Liu committed
343
344
345
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
346
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
347
348
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
349
350
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
351
#elif 0
Chao Liu's avatar
Chao Liu committed
352
    // 1x1, 14x14
Chao Liu's avatar
Chao Liu committed
353
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
354
355
356
357
    constexpr index_t C  = 1024;
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
358
359
360
361
362
363
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
364
365
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
366
#elif 0
Chao Liu's avatar
Chao Liu committed
367
    // 1x1, 14x14, stride 2
Chao Liu's avatar
Chao Liu committed
368
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
369
    constexpr index_t C  = 1024;
Chao Liu's avatar
Chao Liu committed
370
371
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
Chao Liu's avatar
Chao Liu committed
372
    constexpr index_t K  = 512;
Chao Liu's avatar
Chao Liu committed
373
374
375
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
376
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
377
378
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
379
380
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
381
#elif 1
Chao Liu's avatar
Chao Liu committed
382
383
    // 3x3, 28x28
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
384
    constexpr index_t C  = 128;
Chao Liu's avatar
Chao Liu committed
385
386
387
388
389
390
391
392
393
394
395
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 128;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
396
#elif 1
Chao Liu's avatar
Chao Liu committed
397
    // 3x3, 14x14
Chao Liu's avatar
Chao Liu committed
398
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
399
    constexpr index_t C  = 256;
Chao Liu's avatar
Chao Liu committed
400
401
402
    constexpr index_t HI = 14;
    constexpr index_t WI = 14;
    constexpr index_t K  = 256;
Chao Liu's avatar
Chao Liu committed
403
404
405
406
407
408
409
410
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
411
#elif 0
Chao Liu's avatar
Chao Liu committed
412
413
414
415
416
417
    // 1x1, 56x56, stride 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 256;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 128;
Chao Liu's avatar
Chao Liu committed
418
419
420
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;

Chao Liu's avatar
Chao Liu committed
421
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
422
423
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
424
425
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
426
#elif 0
Chao Liu's avatar
Chao Liu committed
427
    // 7x7, 230x230 stride=2
Chao Liu's avatar
Chao Liu committed
428
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    constexpr index_t C  = 3;
    constexpr index_t HI = 230;
    constexpr index_t WI = 230;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 7;
    constexpr index_t X  = 7;

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
#elif 0
    // 1x1, 28x28, stride = 2
    constexpr index_t N  = 128;
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 1024;
Chao Liu's avatar
Chao Liu committed
448
449
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
450

Chao Liu's avatar
Chao Liu committed
451
    using ConvStrides   = Sequence<2, 2>;
Chao Liu's avatar
Chao Liu committed
452
453
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
454
455
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
456
#elif 0
Chao Liu's avatar
Chao Liu committed
457
    // 1x1, 28x28, stride 2
Chao Liu's avatar
Chao Liu committed
458
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
459
460
461
462
463
464
    constexpr index_t C  = 512;
    constexpr index_t HI = 28;
    constexpr index_t WI = 28;
    constexpr index_t K  = 256;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
Chao Liu's avatar
Chao Liu committed
465
466
467
468
469
470

    using ConvStrides   = Sequence<2, 2>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
471
#elif 1
Chao Liu's avatar
Chao Liu committed
472
    // 1x1, 7x7
473
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
474
    constexpr index_t C  = 512;
475
476
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
Chao Liu's avatar
Chao Liu committed
477
478
479
    constexpr index_t K  = 2048;
    constexpr index_t Y  = 1;
    constexpr index_t X  = 1;
480
481
482
483

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
484
485
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
486
#elif 0
Chao Liu's avatar
Chao Liu committed
487
    // 3x3, 7x7
488
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
489
490
491
492
493
494
    constexpr index_t C  = 512;
    constexpr index_t HI = 7;
    constexpr index_t WI = 7;
    constexpr index_t K  = 512;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;
495
496
497
498

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
499
500
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
501
#elif 0
Chao Liu's avatar
Chao Liu committed
502
    // 1x1, 56x56
503
    constexpr index_t N  = 128;
Chao Liu's avatar
Chao Liu committed
504
505
506
507
508
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 1;
509
    constexpr index_t X  = 1;
510
511
512
513

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

Chao Liu's avatar
Chao Liu committed
514
515
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
Chao Liu's avatar
Chao Liu committed
516
#elif 0
Chao Liu's avatar
Chao Liu committed
517
518
519
520
521
522
523
524
525
526
527
528
529
530
    // 3x3, 56x56
    constexpr index_t N  = 128;
    constexpr index_t C  = 64;
    constexpr index_t HI = 56;
    constexpr index_t WI = 56;
    constexpr index_t K  = 64;
    constexpr index_t Y  = 3;
    constexpr index_t X  = 3;

    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;

    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
Chao Liu's avatar
Chao Liu committed
531
#endif
Chao Liu's avatar
Chao Liu committed
532

Chao Liu's avatar
Chao Liu committed
533
534
535
    auto in_nchw_desc  = make_native_tensor_descriptor_packed(Sequence<N, C, HI, WI>{});
    auto wei_kcyx_desc = make_native_tensor_descriptor_packed(Sequence<K, C, Y, X>{});
    auto out_nkhw_desc = get_convolution_output_default_4d_tensor_descriptor(
Chao Liu's avatar
Chao Liu committed
536
        in_nchw_desc, wei_kcyx_desc, ConvStrides{}, ConvDilations{}, LeftPads{}, RightPads{});
Chao Liu's avatar
Chao Liu committed
537

Chao Liu's avatar
Chao Liu committed
538
539
540
    ostream_tensor_descriptor(in_nchw_desc, std::cout << "in_nchw_desc: ");
    ostream_tensor_descriptor(wei_kcyx_desc, std::cout << "wei_kcyx_desc: ");
    ostream_tensor_descriptor(out_nkhw_desc, std::cout << "out_nkhw_desc: ");
Chao Liu's avatar
Chao Liu committed
541
542
543
544
    print_array("LeftPads", to_multi_index(LeftPads{}));
    print_array("RightPads", to_multi_index(RightPads{}));
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
Chao Liu's avatar
Chao Liu committed
545

Chao Liu's avatar
Chao Liu committed
546
#if 1
Chao Liu's avatar
Chao Liu committed
547
548
    using in_data_t  = float;
    using out_data_t = float;
Chao Liu's avatar
Chao Liu committed
549
550
551
552
553
554
555
556
557
#else
    using in_data_t  = half_float::half;
    using out_data_t = half_float::half;
#endif

    Tensor<in_data_t> in_nchw(make_HostTensorDescriptor(in_nchw_desc));
    Tensor<in_data_t> wei_kcyx(make_HostTensorDescriptor(wei_kcyx_desc));
    Tensor<out_data_t> out_nkhw_host(make_HostTensorDescriptor(out_nkhw_desc));
    Tensor<out_data_t> out_nkhw_device(make_HostTensorDescriptor(out_nkhw_desc));
Chao Liu's avatar
Chao Liu committed
558

Chao Liu's avatar
Chao Liu committed
559
    std::size_t num_thread = std::thread::hardware_concurrency();
Chao Liu's avatar
Chao Liu committed
560

Chao Liu's avatar
Chao Liu committed
561
562
563
564
565
566
567
    if(argc != 3)
    {
        printf("arg1: do_verification, arg2: nrepeat\n");
        exit(1);
    }

    bool do_verification = atoi(argv[1]);
Chao Liu's avatar
Chao Liu committed
568
    index_t nrepeat      = atoi(argv[2]);
569
570
571

    if(do_verification)
    {
Chao Liu's avatar
Chao Liu committed
572
#if 0
573
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
574
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
575
576
#elif 0
        in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
bug fix  
Chao Liu committed
577
        wei_kcyx.GenerateTensorValue(GeneratorTensor_3{}, num_thread);
578
#elif 0
579
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
580
        wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
Chao Liu's avatar
Chao Liu committed
581
#elif 1
582
        in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
583
        wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread);
Chao Liu's avatar
Chao Liu committed
584
#elif 0
585
586
587
588
589
590
        in_nchw.GenerateTensorValue(GeneratorTensor_2{1, 5}, num_thread);

        auto gen_wei = [](auto... is) {
            return GeneratorTensor_2{1, 5}(is...) * GeneratorTensor_Checkboard{}(is...);
        };
        wei_kcyx.GenerateTensorValue(gen_wei, num_thread);
Chao Liu's avatar
Chao Liu committed
591
#endif
592
    }
Chao Liu's avatar
Chao Liu committed
593

Chao Liu's avatar
Chao Liu committed
594
#if 0
595
    device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
596
597
598
599
600
601
602
603
604
605
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
606
#elif 0
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
    device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                 in_nchw,
                                                                 wei_kcyx_desc,
                                                                 wei_kcyx,
                                                                 out_nkhw_desc,
                                                                 out_nkhw_device,
                                                                 ConvStrides{},
                                                                 ConvDilations{},
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
                                                                         in_nchw,
                                                                         wei_kcyx_desc,
                                                                         wei_kcyx,
                                                                         out_nkhw_desc,
                                                                         out_nkhw_device,
                                                                         ConvStrides{},
                                                                         ConvDilations{},
                                                                         LeftPads{},
                                                                         RightPads{},
                                                                         nrepeat);
630
#endif
Chao Liu's avatar
Chao Liu committed
631

632
    if(do_verification)
633
    {
Chao Liu's avatar
Chao Liu committed
634
#if 0
635
636
        if(Y == 3 && X == 3 && ConvStrides{}[0] == 1 && ConvStrides{}[1] == 1 &&
           ConvDilations{}[0] == 1 && ConvDilations{}[1] == 1)
637
        {
Chao Liu's avatar
Chao Liu committed
638
639
            host_winograd_3x3_convolution(
                in_nchw, wei_kcyx, out_nkhw_host, LeftPads{}, RightPads{});
640
641
        }
        else
Chao Liu's avatar
Chao Liu committed
642
#endif
643
        {
644
645
646
647
648
            host_direct_convolution(in_nchw,
                                    wei_kcyx,
                                    out_nkhw_host,
                                    ConvStrides{},
                                    ConvDilations{},
Chao Liu's avatar
Chao Liu committed
649
650
                                    LeftPads{},
                                    RightPads{});
651
652
        }
        check_error(out_nkhw_host, out_nkhw_device);
Chao Liu's avatar
Chao Liu committed
653

Chao Liu's avatar
Chao Liu committed
654
#if 0
655
        LogRange(std::cout << "in_nchw : ", in_nchw.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
656
        LogRange(std::cout << "wei_kcyx: ", wei_kcyx.mData, ",") << std::endl;
657
658
        LogRange(std::cout << "out_nkhw_host  : ", out_nkhw_host.mData, ",") << std::endl;
        LogRange(std::cout << "out_nkhw_device: ", out_nkhw_device.mData, ",") << std::endl;
Chao Liu's avatar
Chao Liu committed
659
#endif
660
    }
661
}