amd_inline_asm.hpp 17 KB
Newer Older
1
2
3
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP

Chao Liu's avatar
Chao Liu committed
4
#include "vector_type.hpp"
Jing Zhang's avatar
Jing Zhang committed
5

6
7
namespace ck {

Chao Liu's avatar
Chao Liu committed
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// cast a pointer of LDS to its address
extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];

__device__ void vmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

__device__ void lgkmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(2) \n \
                " ::);
    }
    else if(cnt == 3)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(3) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(4) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

Chao Liu's avatar
Chao Liu committed
81
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
Chao Liu's avatar
Chao Liu committed
82
{
Jing Zhang's avatar
Jing Zhang committed
83
84
85
86
87
88
    asm volatile("\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
Chao Liu's avatar
Chao Liu committed
89
90
91
92
93
94
95
96
97
98
                 : "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
                 : "v"(a[0]),
                   "v"(b[0]),
                   "v"(b[1]),
                   "v"(b[2]),
                   "v"(b[3]),
                   "0"(c[0]),
                   "1"(c[1]),
                   "2"(c[2]),
                   "3"(c[3]));
Jing Zhang's avatar
Jing Zhang committed
99
100
}

Chao Liu's avatar
Chao Liu committed
101
102
103
__device__ void outerProduct1x4(const float& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c)
Chao Liu's avatar
Chao Liu committed
104
105
{
    outerProduct1x4(&a, (float*)&b, (float*)&c);
Jing Zhang's avatar
Jing Zhang committed
106
107
}

Chao Liu's avatar
Chao Liu committed
108
109
110
111
112
113
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1,
                                vector_type<float, 4>::MemoryType& c2,
                                vector_type<float, 4>::MemoryType& c3)
Chao Liu's avatar
Chao Liu committed
114
{
Jing Zhang's avatar
Jing Zhang committed
115
116
117
118
119
120
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
    outerProduct1x4(a.z, b, c2);
    outerProduct1x4(a.w, b, c3);
}

Chao Liu's avatar
Chao Liu committed
121
122
123
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
                                const vector_type<float, 4>::MemoryType* b,
                                vector_type<float, 4>::MemoryType* c)
Jing Zhang's avatar
Jing Zhang committed
124
125
126
127
128
129
130
{
    outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
    outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
    outerProduct4x4(a[1], b[0], c[8], c[10], c[12], c[14]);
    outerProduct4x4(a[1], b[1], c[9], c[11], c[13], c[15]);
}

Chao Liu's avatar
Chao Liu committed
131
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
132
133
134
135
{
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
136
                ds_read_b128 %0, %1 offset:0\n \
Jing Zhang's avatar
Jing Zhang committed
137
                "
Chao Liu's avatar
Chao Liu committed
138
139
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
140
    }
Jing Zhang's avatar
Jing Zhang committed
141
    if(offset == 64)
Jing Zhang's avatar
Jing Zhang committed
142
143
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
144
                ds_read_b128 %0, %1 offset:64\n \
Jing Zhang's avatar
Jing Zhang committed
145
                "
Chao Liu's avatar
Chao Liu committed
146
147
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
148
    }
Jing Zhang's avatar
Jing Zhang committed
149
    if(offset == 128)
Jing Zhang's avatar
Jing Zhang committed
150
151
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
152
                ds_read_b128 %0, %1 offset:128\n \
Jing Zhang's avatar
Jing Zhang committed
153
                "
Chao Liu's avatar
Chao Liu committed
154
155
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
156
    }
Jing Zhang's avatar
Jing Zhang committed
157
    if(offset == 192)
Jing Zhang's avatar
Jing Zhang committed
158
159
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
160
                ds_read_b128 %0, %1 offset:192\n \
Jing Zhang's avatar
Jing Zhang committed
161
                "
Chao Liu's avatar
Chao Liu committed
162
163
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
164
    }
Jing Zhang's avatar
Jing Zhang committed
165
    if(offset == 256)
Jing Zhang's avatar
Jing Zhang committed
166
167
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
168
                ds_read_b128 %0, %1 offset:256\n \
Jing Zhang's avatar
Jing Zhang committed
169
                "
Chao Liu's avatar
Chao Liu committed
170
171
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
172
    }
Jing Zhang's avatar
Jing Zhang committed
173
    if(offset == 320)
Jing Zhang's avatar
Jing Zhang committed
174
175
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
176
                ds_read_b128 %0, %1 offset:320\n \
Jing Zhang's avatar
Jing Zhang committed
177
                "
Chao Liu's avatar
Chao Liu committed
178
179
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
180
    }
Jing Zhang's avatar
Jing Zhang committed
181
    if(offset == 384)
Jing Zhang's avatar
Jing Zhang committed
182
183
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
184
                ds_read_b128 %0, %1 offset:384\n \
Jing Zhang's avatar
Jing Zhang committed
185
                "
Chao Liu's avatar
Chao Liu committed
186
187
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
188
    }
Jing Zhang's avatar
Jing Zhang committed
189
    if(offset == 448)
Jing Zhang's avatar
Jing Zhang committed
190
191
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
192
                ds_read_b128 %0, %1 offset:448\n \
Jing Zhang's avatar
Jing Zhang committed
193
                "
Chao Liu's avatar
Chao Liu committed
194
195
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
196
    }
Jing Zhang's avatar
Jing Zhang committed
197
    if(offset == 512)
Jing Zhang's avatar
Jing Zhang committed
198
199
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
200
                ds_read_b128 %0, %1 offset:512\n \
Jing Zhang's avatar
Jing Zhang committed
201
                "
Chao Liu's avatar
Chao Liu committed
202
203
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
204
    }
Jing Zhang's avatar
Jing Zhang committed
205
    if(offset == 576)
Jing Zhang's avatar
Jing Zhang committed
206
207
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
208
                ds_read_b128 %0, %1 offset:576\n \
Jing Zhang's avatar
Jing Zhang committed
209
                "
Chao Liu's avatar
Chao Liu committed
210
211
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
212
    }
Jing Zhang's avatar
Jing Zhang committed
213
    if(offset == 640)
Jing Zhang's avatar
Jing Zhang committed
214
215
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
216
                ds_read_b128 %0, %1 offset:640\n \
Jing Zhang's avatar
Jing Zhang committed
217
                "
Chao Liu's avatar
Chao Liu committed
218
219
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
220
    }
Jing Zhang's avatar
Jing Zhang committed
221
    if(offset == 704)
Jing Zhang's avatar
Jing Zhang committed
222
223
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
224
                ds_read_b128 %0, %1 offset:704\n \
Jing Zhang's avatar
Jing Zhang committed
225
                "
Chao Liu's avatar
Chao Liu committed
226
227
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
228
    }
Jing Zhang's avatar
Jing Zhang committed
229
    if(offset == 768)
Jing Zhang's avatar
Jing Zhang committed
230
231
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
232
                ds_read_b128 %0, %1 offset:768\n \
Jing Zhang's avatar
Jing Zhang committed
233
                "
Chao Liu's avatar
Chao Liu committed
234
235
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
236
    }
Jing Zhang's avatar
Jing Zhang committed
237
    if(offset == 832)
Jing Zhang's avatar
Jing Zhang committed
238
239
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
240
                ds_read_b128 %0, %1 offset:832\n \
Jing Zhang's avatar
Jing Zhang committed
241
                "
Chao Liu's avatar
Chao Liu committed
242
243
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
244
    }
Jing Zhang's avatar
Jing Zhang committed
245
    if(offset == 896)
Jing Zhang's avatar
Jing Zhang committed
246
247
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
248
                ds_read_b128 %0, %1 offset:896\n \
Jing Zhang's avatar
Jing Zhang committed
249
                "
Chao Liu's avatar
Chao Liu committed
250
251
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
252
    }
Jing Zhang's avatar
Jing Zhang committed
253
    if(offset == 960)
Jing Zhang's avatar
Jing Zhang committed
254
255
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
256
                ds_read_b128 %0, %1 offset:960\n \
Jing Zhang's avatar
Jing Zhang committed
257
                "
Chao Liu's avatar
Chao Liu committed
258
259
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
260
    }
Jing Zhang's avatar
Jing Zhang committed
261
    if(offset == 1024)
Jing Zhang's avatar
Jing Zhang committed
262
263
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
264
                ds_read_b128 %0, %1 offset:1024\n \
Jing Zhang's avatar
Jing Zhang committed
265
                "
Chao Liu's avatar
Chao Liu committed
266
267
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
268
    }
Jing Zhang's avatar
Jing Zhang committed
269
    if(offset == 1088)
Jing Zhang's avatar
Jing Zhang committed
270
271
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
272
                ds_read_b128 %0, %1 offset:1088\n \
Jing Zhang's avatar
Jing Zhang committed
273
                "
Chao Liu's avatar
Chao Liu committed
274
275
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
276
    }
Jing Zhang's avatar
Jing Zhang committed
277
    if(offset == 1152)
Jing Zhang's avatar
Jing Zhang committed
278
279
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
280
                ds_read_b128 %0, %1 offset:1152\n \
Jing Zhang's avatar
Jing Zhang committed
281
                "
Chao Liu's avatar
Chao Liu committed
282
283
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
284
    }
Jing Zhang's avatar
Jing Zhang committed
285
    if(offset == 1216)
Chao Liu's avatar
Chao Liu committed
286
287
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
288
                ds_read_b128 %0, %1 offset:1216\n \
Chao Liu's avatar
Chao Liu committed
289
                "
Chao Liu's avatar
Chao Liu committed
290
291
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
292
    }
Jing Zhang's avatar
Jing Zhang committed
293
    if(offset == 1280)
Jing Zhang's avatar
Jing Zhang committed
294
295
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
296
                ds_read_b128 %0, %1 offset:1280\n \
Jing Zhang's avatar
Jing Zhang committed
297
                "
Chao Liu's avatar
Chao Liu committed
298
299
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
300
    }
Jing Zhang's avatar
Jing Zhang committed
301
    if(offset == 1344)
Chao Liu's avatar
Chao Liu committed
302
303
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
304
                ds_read_b128 %0, %1 offset:1344\n \
Chao Liu's avatar
Chao Liu committed
305
                "
Chao Liu's avatar
Chao Liu committed
306
307
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
308
    }
Jing Zhang's avatar
Jing Zhang committed
309
    if(offset == 1408)
Jing Zhang's avatar
Jing Zhang committed
310
311
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
312
                ds_read_b128 %0, %1 offset:1408\n \
Jing Zhang's avatar
Jing Zhang committed
313
                "
Chao Liu's avatar
Chao Liu committed
314
315
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
316
    }
Jing Zhang's avatar
Jing Zhang committed
317
    if(offset == 1472)
Chao Liu's avatar
Chao Liu committed
318
319
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
320
                ds_read_b128 %0, %1 offset:1472\n \
Chao Liu's avatar
Chao Liu committed
321
                "
Chao Liu's avatar
Chao Liu committed
322
323
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
324
    }
Jing Zhang's avatar
Jing Zhang committed
325
    if(offset == 1536)
Jing Zhang's avatar
Jing Zhang committed
326
327
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
328
                ds_read_b128 %0, %1 offset:1536\n \
Jing Zhang's avatar
Jing Zhang committed
329
                "
Chao Liu's avatar
Chao Liu committed
330
331
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
332
    }
Jing Zhang's avatar
Jing Zhang committed
333
    if(offset == 1600)
Chao Liu's avatar
Chao Liu committed
334
335
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
336
                ds_read_b128 %0, %1 offset:1600\n \
Chao Liu's avatar
Chao Liu committed
337
                "
Chao Liu's avatar
Chao Liu committed
338
339
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
340
    }
Jing Zhang's avatar
Jing Zhang committed
341
    if(offset == 1664)
Jing Zhang's avatar
Jing Zhang committed
342
343
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
344
                ds_read_b128 %0, %1 offset:1664\n \
Jing Zhang's avatar
Jing Zhang committed
345
                "
Chao Liu's avatar
Chao Liu committed
346
347
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
348
    }
Jing Zhang's avatar
Jing Zhang committed
349
    if(offset == 1728)
Chao Liu's avatar
Chao Liu committed
350
351
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
352
                ds_read_b128 %0, %1 offset:1728\n \
Chao Liu's avatar
Chao Liu committed
353
                "
Chao Liu's avatar
Chao Liu committed
354
355
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
356
    }
Jing Zhang's avatar
Jing Zhang committed
357
    if(offset == 1792)
Jing Zhang's avatar
Jing Zhang committed
358
359
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
360
                ds_read_b128 %0, %1 offset:1792\n \
Jing Zhang's avatar
Jing Zhang committed
361
                "
Chao Liu's avatar
Chao Liu committed
362
363
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
364
    }
Jing Zhang's avatar
Jing Zhang committed
365
    if(offset == 1856)
Chao Liu's avatar
Chao Liu committed
366
367
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
368
                ds_read_b128 %0, %1 offset:1856\n \
Chao Liu's avatar
Chao Liu committed
369
                "
Chao Liu's avatar
Chao Liu committed
370
371
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
372
    }
Jing Zhang's avatar
Jing Zhang committed
373
    if(offset == 1920)
Jing Zhang's avatar
Jing Zhang committed
374
375
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
376
                ds_read_b128 %0, %1 offset:1920\n \
Jing Zhang's avatar
Jing Zhang committed
377
                "
Chao Liu's avatar
Chao Liu committed
378
379
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
380
    }
Jing Zhang's avatar
Jing Zhang committed
381
    if(offset == 1984)
Chao Liu's avatar
Chao Liu committed
382
383
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
384
                ds_read_b128 %0, %1 offset:1984\n \
Chao Liu's avatar
Chao Liu committed
385
                "
Chao Liu's avatar
Chao Liu committed
386
387
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
388
    }
Jing Zhang's avatar
Jing Zhang committed
389
    if(offset == 2048)
Jing Zhang's avatar
Jing Zhang committed
390
391
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
392
                ds_read_b128 %0, %1 offset:2048\n \
Jing Zhang's avatar
Jing Zhang committed
393
                "
Chao Liu's avatar
Chao Liu committed
394
395
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
396
    }
Jing Zhang's avatar
Jing Zhang committed
397
    if(offset == 2112)
Jing Zhang's avatar
Jing Zhang committed
398
399
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
400
                ds_read_b128 %0, %1 offset:2112\n \
Jing Zhang's avatar
Jing Zhang committed
401
                "
Chao Liu's avatar
Chao Liu committed
402
403
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
404
    }
Jing Zhang's avatar
Jing Zhang committed
405
    if(offset == 2176)
Jing Zhang's avatar
Jing Zhang committed
406
    {
Jing Zhang's avatar
Jing Zhang committed
407
408
409
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2176\n \
                "
Chao Liu's avatar
Chao Liu committed
410
411
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
412
413
414
415
416
417
    }
    if(offset == 2240)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2240\n \
                "
Chao Liu's avatar
Chao Liu committed
418
419
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
420
421
422
423
424
425
    }
    if(offset == 2304)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2304\n \
                "
Chao Liu's avatar
Chao Liu committed
426
427
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
428
429
430
431
432
433
    }
    if(offset == 2368)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2368\n \
                "
Chao Liu's avatar
Chao Liu committed
434
435
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
436
437
438
439
440
441
    }
    if(offset == 2432)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2432\n \
                "
Chao Liu's avatar
Chao Liu committed
442
443
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
444
445
446
447
448
449
    }
    if(offset == 2496)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2496\n \
                "
Chao Liu's avatar
Chao Liu committed
450
451
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
452
453
454
455
456
457
    }
    if(offset == 2560)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2560\n \
                "
Chao Liu's avatar
Chao Liu committed
458
459
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
460
461
462
463
464
465
    }
    if(offset == 2624)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2624\n \
                "
Chao Liu's avatar
Chao Liu committed
466
467
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
468
469
470
471
472
473
    }
    if(offset == 2688)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2688\n \
                "
Chao Liu's avatar
Chao Liu committed
474
475
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
476
477
478
479
480
481
    }
    if(offset == 2752)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2752\n \
                "
Chao Liu's avatar
Chao Liu committed
482
483
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
484
485
486
487
488
489
    }
    if(offset == 2816)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2816\n \
                "
Chao Liu's avatar
Chao Liu committed
490
491
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
492
493
494
495
496
497
    }
    if(offset == 2880)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2880\n \
                "
Chao Liu's avatar
Chao Liu committed
498
499
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
500
501
502
503
504
505
    }
    if(offset == 2944)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2944\n \
                "
Chao Liu's avatar
Chao Liu committed
506
507
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
508
509
510
511
512
513
    }
    if(offset == 3008)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3008\n \
                "
Chao Liu's avatar
Chao Liu committed
514
515
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
516
517
518
519
520
521
    }
    if(offset == 3072)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3072\n \
                "
Chao Liu's avatar
Chao Liu committed
522
523
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
524
525
526
527
528
529
    }
    if(offset == 3136)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3136\n \
                "
Chao Liu's avatar
Chao Liu committed
530
531
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
532
533
534
535
536
537
    }
    if(offset == 3200)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3200\n \
                "
Chao Liu's avatar
Chao Liu committed
538
539
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
540
541
542
543
544
545
    }
    if(offset == 3264)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3264\n \
                "
Chao Liu's avatar
Chao Liu committed
546
547
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
548
549
550
551
552
553
    }
    if(offset == 3328)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3328\n \
                "
Chao Liu's avatar
Chao Liu committed
554
555
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
556
557
558
559
560
561
    }
    if(offset == 3392)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3392\n \
                "
Chao Liu's avatar
Chao Liu committed
562
563
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
564
565
566
567
568
569
    }
    if(offset == 3456)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3456\n \
                "
Chao Liu's avatar
Chao Liu committed
570
571
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
572
573
574
575
576
577
    }
    if(offset == 3520)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3520\n \
                "
Chao Liu's avatar
Chao Liu committed
578
579
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
580
581
582
583
584
585
    }
    if(offset == 3584)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3584\n \
                "
Chao Liu's avatar
Chao Liu committed
586
587
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
588
589
590
591
592
593
    }
    if(offset == 3648)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3648\n \
                "
Chao Liu's avatar
Chao Liu committed
594
595
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
596
597
598
599
600
601
    }
    if(offset == 3712)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3712\n \
                "
Chao Liu's avatar
Chao Liu committed
602
603
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
604
605
606
607
608
609
    }
    if(offset == 3776)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3776\n \
                "
Chao Liu's avatar
Chao Liu committed
610
611
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
612
613
614
615
616
617
    }
    if(offset == 3840)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3840\n \
                "
Chao Liu's avatar
Chao Liu committed
618
619
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
620
621
622
623
624
625
    }
    if(offset == 3904)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3904\n \
                "
Chao Liu's avatar
Chao Liu committed
626
627
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
628
629
630
631
632
633
    }
    if(offset == 3968)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3968\n \
                "
Chao Liu's avatar
Chao Liu committed
634
635
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
636
637
638
639
640
641
    }
    if(offset == 4032)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4032\n \
                "
Chao Liu's avatar
Chao Liu committed
642
643
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
644
645
646
647
648
649
    }
    if(offset == 4096)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4096\n \
                "
Chao Liu's avatar
Chao Liu committed
650
651
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
652
    }
Jing Zhang's avatar
Jing Zhang committed
653
654
}

Chao Liu's avatar
Chao Liu committed
655
656
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
657
{
658
659
660
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
661
662
            ds_write_b128 %0, %1 \n \
            "
663
664
665
666
667
668
669
                     :
                     : "v"(__to_local(lds)), "v"(r));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
670
}
671
672
673

} // namespace ck
#endif