amd_inline_asm.hpp 17.1 KB
Newer Older
1
2
3
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP

Chao Liu's avatar
Chao Liu committed
4
#include "vector_type.hpp"
Jing Zhang's avatar
Jing Zhang committed
5

6
7
namespace ck {

Chao Liu's avatar
Chao Liu committed
8
// cast a pointer of LDS to its address
Chao Liu's avatar
Chao Liu committed
9
extern "C" __attribute__((address_space(3))) __device__ void* __to_local(void* p);
Chao Liu's avatar
Chao Liu committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

__device__ void vmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

__device__ void lgkmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(2) \n \
                " ::);
    }
    else if(cnt == 3)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(3) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(4) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

Chao Liu's avatar
Chao Liu committed
81
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
Chao Liu's avatar
Chao Liu committed
82
{
Jing Zhang's avatar
Jing Zhang committed
83
84
85
86
87
88
    asm volatile("\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
Chao Liu's avatar
Chao Liu committed
89
90
91
92
93
94
95
96
97
98
                 : "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
                 : "v"(a[0]),
                   "v"(b[0]),
                   "v"(b[1]),
                   "v"(b[2]),
                   "v"(b[3]),
                   "0"(c[0]),
                   "1"(c[1]),
                   "2"(c[2]),
                   "3"(c[3]));
Jing Zhang's avatar
Jing Zhang committed
99
100
}

Chao Liu's avatar
Chao Liu committed
101
102
103
__device__ void outerProduct1x4(const float& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c)
Chao Liu's avatar
Chao Liu committed
104
{
Chao Liu's avatar
Chao Liu committed
105
    outerProduct1x4(&a, reinterpret_cast<const float*>(&b), reinterpret_cast<float*>(&c));
Jing Zhang's avatar
Jing Zhang committed
106
107
}

Chao Liu's avatar
Chao Liu committed
108
109
110
111
112
113
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1,
                                vector_type<float, 4>::MemoryType& c2,
                                vector_type<float, 4>::MemoryType& c3)
Chao Liu's avatar
Chao Liu committed
114
{
Jing Zhang's avatar
Jing Zhang committed
115
116
117
118
119
120
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
    outerProduct1x4(a.z, b, c2);
    outerProduct1x4(a.w, b, c3);
}

Chao Liu's avatar
Chao Liu committed
121
122
123
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
                                const vector_type<float, 4>::MemoryType* b,
                                vector_type<float, 4>::MemoryType* c)
Jing Zhang's avatar
Jing Zhang committed
124
125
126
127
128
129
130
{
    outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
    outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
    outerProduct4x4(a[1], b[0], c[8], c[10], c[12], c[14]);
    outerProduct4x4(a[1], b[1], c[9], c[11], c[13], c[15]);
}

Chao Liu's avatar
Chao Liu committed
131
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
132
133
134
135
{
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
136
                ds_read_b128 %0, %1 offset:0\n \
Jing Zhang's avatar
Jing Zhang committed
137
                "
Chao Liu's avatar
Chao Liu committed
138
139
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
140
    }
Jing Zhang's avatar
Jing Zhang committed
141
    if(offset == 64)
Jing Zhang's avatar
Jing Zhang committed
142
143
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
144
                ds_read_b128 %0, %1 offset:64\n \
Jing Zhang's avatar
Jing Zhang committed
145
                "
Chao Liu's avatar
Chao Liu committed
146
147
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
148
    }
Jing Zhang's avatar
Jing Zhang committed
149
    if(offset == 128)
Jing Zhang's avatar
Jing Zhang committed
150
151
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
152
                ds_read_b128 %0, %1 offset:128\n \
Jing Zhang's avatar
Jing Zhang committed
153
                "
Chao Liu's avatar
Chao Liu committed
154
155
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
156
    }
Jing Zhang's avatar
Jing Zhang committed
157
    if(offset == 192)
Jing Zhang's avatar
Jing Zhang committed
158
159
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
160
                ds_read_b128 %0, %1 offset:192\n \
Jing Zhang's avatar
Jing Zhang committed
161
                "
Chao Liu's avatar
Chao Liu committed
162
163
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
164
    }
Jing Zhang's avatar
Jing Zhang committed
165
    if(offset == 256)
Jing Zhang's avatar
Jing Zhang committed
166
167
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
168
                ds_read_b128 %0, %1 offset:256\n \
Jing Zhang's avatar
Jing Zhang committed
169
                "
Chao Liu's avatar
Chao Liu committed
170
171
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
172
    }
Jing Zhang's avatar
Jing Zhang committed
173
    if(offset == 320)
Jing Zhang's avatar
Jing Zhang committed
174
175
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
176
                ds_read_b128 %0, %1 offset:320\n \
Jing Zhang's avatar
Jing Zhang committed
177
                "
Chao Liu's avatar
Chao Liu committed
178
179
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
180
    }
Jing Zhang's avatar
Jing Zhang committed
181
    if(offset == 384)
Jing Zhang's avatar
Jing Zhang committed
182
183
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
184
                ds_read_b128 %0, %1 offset:384\n \
Jing Zhang's avatar
Jing Zhang committed
185
                "
Chao Liu's avatar
Chao Liu committed
186
187
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
188
    }
Jing Zhang's avatar
Jing Zhang committed
189
    if(offset == 448)
Jing Zhang's avatar
Jing Zhang committed
190
191
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
192
                ds_read_b128 %0, %1 offset:448\n \
Jing Zhang's avatar
Jing Zhang committed
193
                "
Chao Liu's avatar
Chao Liu committed
194
195
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
196
    }
Jing Zhang's avatar
Jing Zhang committed
197
    if(offset == 512)
Jing Zhang's avatar
Jing Zhang committed
198
199
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
200
                ds_read_b128 %0, %1 offset:512\n \
Jing Zhang's avatar
Jing Zhang committed
201
                "
Chao Liu's avatar
Chao Liu committed
202
203
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
204
    }
Jing Zhang's avatar
Jing Zhang committed
205
    if(offset == 576)
Jing Zhang's avatar
Jing Zhang committed
206
207
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
208
                ds_read_b128 %0, %1 offset:576\n \
Jing Zhang's avatar
Jing Zhang committed
209
                "
Chao Liu's avatar
Chao Liu committed
210
211
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
212
    }
Jing Zhang's avatar
Jing Zhang committed
213
    if(offset == 640)
Jing Zhang's avatar
Jing Zhang committed
214
215
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
216
                ds_read_b128 %0, %1 offset:640\n \
Jing Zhang's avatar
Jing Zhang committed
217
                "
Chao Liu's avatar
Chao Liu committed
218
219
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
220
    }
Jing Zhang's avatar
Jing Zhang committed
221
    if(offset == 704)
Jing Zhang's avatar
Jing Zhang committed
222
223
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
224
                ds_read_b128 %0, %1 offset:704\n \
Jing Zhang's avatar
Jing Zhang committed
225
                "
Chao Liu's avatar
Chao Liu committed
226
227
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
228
    }
Jing Zhang's avatar
Jing Zhang committed
229
    if(offset == 768)
Jing Zhang's avatar
Jing Zhang committed
230
231
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
232
                ds_read_b128 %0, %1 offset:768\n \
Jing Zhang's avatar
Jing Zhang committed
233
                "
Chao Liu's avatar
Chao Liu committed
234
235
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
236
    }
Jing Zhang's avatar
Jing Zhang committed
237
    if(offset == 832)
Jing Zhang's avatar
Jing Zhang committed
238
239
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
240
                ds_read_b128 %0, %1 offset:832\n \
Jing Zhang's avatar
Jing Zhang committed
241
                "
Chao Liu's avatar
Chao Liu committed
242
243
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
244
    }
Jing Zhang's avatar
Jing Zhang committed
245
    if(offset == 896)
Jing Zhang's avatar
Jing Zhang committed
246
247
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
248
                ds_read_b128 %0, %1 offset:896\n \
Jing Zhang's avatar
Jing Zhang committed
249
                "
Chao Liu's avatar
Chao Liu committed
250
251
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
252
    }
Jing Zhang's avatar
Jing Zhang committed
253
    if(offset == 960)
Jing Zhang's avatar
Jing Zhang committed
254
255
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
256
                ds_read_b128 %0, %1 offset:960\n \
Jing Zhang's avatar
Jing Zhang committed
257
                "
Chao Liu's avatar
Chao Liu committed
258
259
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
260
    }
Jing Zhang's avatar
Jing Zhang committed
261
    if(offset == 1024)
Jing Zhang's avatar
Jing Zhang committed
262
263
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
264
                ds_read_b128 %0, %1 offset:1024\n \
Jing Zhang's avatar
Jing Zhang committed
265
                "
Chao Liu's avatar
Chao Liu committed
266
267
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
268
    }
Jing Zhang's avatar
Jing Zhang committed
269
    if(offset == 1088)
Jing Zhang's avatar
Jing Zhang committed
270
271
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
272
                ds_read_b128 %0, %1 offset:1088\n \
Jing Zhang's avatar
Jing Zhang committed
273
                "
Chao Liu's avatar
Chao Liu committed
274
275
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
276
    }
Jing Zhang's avatar
Jing Zhang committed
277
    if(offset == 1152)
Jing Zhang's avatar
Jing Zhang committed
278
279
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
280
                ds_read_b128 %0, %1 offset:1152\n \
Jing Zhang's avatar
Jing Zhang committed
281
                "
Chao Liu's avatar
Chao Liu committed
282
283
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
284
    }
Jing Zhang's avatar
Jing Zhang committed
285
    if(offset == 1216)
Chao Liu's avatar
Chao Liu committed
286
287
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
288
                ds_read_b128 %0, %1 offset:1216\n \
Chao Liu's avatar
Chao Liu committed
289
                "
Chao Liu's avatar
Chao Liu committed
290
291
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
292
    }
Jing Zhang's avatar
Jing Zhang committed
293
    if(offset == 1280)
Jing Zhang's avatar
Jing Zhang committed
294
295
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
296
                ds_read_b128 %0, %1 offset:1280\n \
Jing Zhang's avatar
Jing Zhang committed
297
                "
Chao Liu's avatar
Chao Liu committed
298
299
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
300
    }
Jing Zhang's avatar
Jing Zhang committed
301
    if(offset == 1344)
Chao Liu's avatar
Chao Liu committed
302
303
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
304
                ds_read_b128 %0, %1 offset:1344\n \
Chao Liu's avatar
Chao Liu committed
305
                "
Chao Liu's avatar
Chao Liu committed
306
307
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
308
    }
Jing Zhang's avatar
Jing Zhang committed
309
    if(offset == 1408)
Jing Zhang's avatar
Jing Zhang committed
310
311
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
312
                ds_read_b128 %0, %1 offset:1408\n \
Jing Zhang's avatar
Jing Zhang committed
313
                "
Chao Liu's avatar
Chao Liu committed
314
315
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
316
    }
Jing Zhang's avatar
Jing Zhang committed
317
    if(offset == 1472)
Chao Liu's avatar
Chao Liu committed
318
319
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
320
                ds_read_b128 %0, %1 offset:1472\n \
Chao Liu's avatar
Chao Liu committed
321
                "
Chao Liu's avatar
Chao Liu committed
322
323
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
324
    }
Jing Zhang's avatar
Jing Zhang committed
325
    if(offset == 1536)
Jing Zhang's avatar
Jing Zhang committed
326
327
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
328
                ds_read_b128 %0, %1 offset:1536\n \
Jing Zhang's avatar
Jing Zhang committed
329
                "
Chao Liu's avatar
Chao Liu committed
330
331
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
332
    }
Jing Zhang's avatar
Jing Zhang committed
333
    if(offset == 1600)
Chao Liu's avatar
Chao Liu committed
334
335
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
336
                ds_read_b128 %0, %1 offset:1600\n \
Chao Liu's avatar
Chao Liu committed
337
                "
Chao Liu's avatar
Chao Liu committed
338
339
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
340
    }
Jing Zhang's avatar
Jing Zhang committed
341
    if(offset == 1664)
Jing Zhang's avatar
Jing Zhang committed
342
343
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
344
                ds_read_b128 %0, %1 offset:1664\n \
Jing Zhang's avatar
Jing Zhang committed
345
                "
Chao Liu's avatar
Chao Liu committed
346
347
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
348
    }
Jing Zhang's avatar
Jing Zhang committed
349
    if(offset == 1728)
Chao Liu's avatar
Chao Liu committed
350
351
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
352
                ds_read_b128 %0, %1 offset:1728\n \
Chao Liu's avatar
Chao Liu committed
353
                "
Chao Liu's avatar
Chao Liu committed
354
355
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
356
    }
Jing Zhang's avatar
Jing Zhang committed
357
    if(offset == 1792)
Jing Zhang's avatar
Jing Zhang committed
358
359
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
360
                ds_read_b128 %0, %1 offset:1792\n \
Jing Zhang's avatar
Jing Zhang committed
361
                "
Chao Liu's avatar
Chao Liu committed
362
363
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
364
    }
Jing Zhang's avatar
Jing Zhang committed
365
    if(offset == 1856)
Chao Liu's avatar
Chao Liu committed
366
367
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
368
                ds_read_b128 %0, %1 offset:1856\n \
Chao Liu's avatar
Chao Liu committed
369
                "
Chao Liu's avatar
Chao Liu committed
370
371
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
372
    }
Jing Zhang's avatar
Jing Zhang committed
373
    if(offset == 1920)
Jing Zhang's avatar
Jing Zhang committed
374
375
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
376
                ds_read_b128 %0, %1 offset:1920\n \
Jing Zhang's avatar
Jing Zhang committed
377
                "
Chao Liu's avatar
Chao Liu committed
378
379
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
380
    }
Jing Zhang's avatar
Jing Zhang committed
381
    if(offset == 1984)
Chao Liu's avatar
Chao Liu committed
382
383
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
384
                ds_read_b128 %0, %1 offset:1984\n \
Chao Liu's avatar
Chao Liu committed
385
                "
Chao Liu's avatar
Chao Liu committed
386
387
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
388
    }
Jing Zhang's avatar
Jing Zhang committed
389
    if(offset == 2048)
Jing Zhang's avatar
Jing Zhang committed
390
391
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
392
                ds_read_b128 %0, %1 offset:2048\n \
Jing Zhang's avatar
Jing Zhang committed
393
                "
Chao Liu's avatar
Chao Liu committed
394
395
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
396
    }
Jing Zhang's avatar
Jing Zhang committed
397
    if(offset == 2112)
Jing Zhang's avatar
Jing Zhang committed
398
399
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
400
                ds_read_b128 %0, %1 offset:2112\n \
Jing Zhang's avatar
Jing Zhang committed
401
                "
Chao Liu's avatar
Chao Liu committed
402
403
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
404
    }
Jing Zhang's avatar
Jing Zhang committed
405
    if(offset == 2176)
Jing Zhang's avatar
Jing Zhang committed
406
    {
Jing Zhang's avatar
Jing Zhang committed
407
408
409
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2176\n \
                "
Chao Liu's avatar
Chao Liu committed
410
411
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
412
413
414
415
416
417
    }
    if(offset == 2240)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2240\n \
                "
Chao Liu's avatar
Chao Liu committed
418
419
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
420
421
422
423
424
425
    }
    if(offset == 2304)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2304\n \
                "
Chao Liu's avatar
Chao Liu committed
426
427
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
428
429
430
431
432
433
    }
    if(offset == 2368)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2368\n \
                "
Chao Liu's avatar
Chao Liu committed
434
435
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
436
437
438
439
440
441
    }
    if(offset == 2432)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2432\n \
                "
Chao Liu's avatar
Chao Liu committed
442
443
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
444
445
446
447
448
449
    }
    if(offset == 2496)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2496\n \
                "
Chao Liu's avatar
Chao Liu committed
450
451
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
452
453
454
455
456
457
    }
    if(offset == 2560)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2560\n \
                "
Chao Liu's avatar
Chao Liu committed
458
459
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
460
461
462
463
464
465
    }
    if(offset == 2624)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2624\n \
                "
Chao Liu's avatar
Chao Liu committed
466
467
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
468
469
470
471
472
473
    }
    if(offset == 2688)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2688\n \
                "
Chao Liu's avatar
Chao Liu committed
474
475
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
476
477
478
479
480
481
    }
    if(offset == 2752)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2752\n \
                "
Chao Liu's avatar
Chao Liu committed
482
483
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
484
485
486
487
488
489
    }
    if(offset == 2816)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2816\n \
                "
Chao Liu's avatar
Chao Liu committed
490
491
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
492
493
494
495
496
497
    }
    if(offset == 2880)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2880\n \
                "
Chao Liu's avatar
Chao Liu committed
498
499
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
500
501
502
503
504
505
    }
    if(offset == 2944)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2944\n \
                "
Chao Liu's avatar
Chao Liu committed
506
507
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
508
509
510
511
512
513
    }
    if(offset == 3008)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3008\n \
                "
Chao Liu's avatar
Chao Liu committed
514
515
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
516
517
518
519
520
521
    }
    if(offset == 3072)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3072\n \
                "
Chao Liu's avatar
Chao Liu committed
522
523
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
524
525
526
527
528
529
    }
    if(offset == 3136)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3136\n \
                "
Chao Liu's avatar
Chao Liu committed
530
531
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
532
533
534
535
536
537
    }
    if(offset == 3200)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3200\n \
                "
Chao Liu's avatar
Chao Liu committed
538
539
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
540
541
542
543
544
545
    }
    if(offset == 3264)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3264\n \
                "
Chao Liu's avatar
Chao Liu committed
546
547
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
548
549
550
551
552
553
    }
    if(offset == 3328)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3328\n \
                "
Chao Liu's avatar
Chao Liu committed
554
555
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
556
557
558
559
560
561
    }
    if(offset == 3392)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3392\n \
                "
Chao Liu's avatar
Chao Liu committed
562
563
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
564
565
566
567
568
569
    }
    if(offset == 3456)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3456\n \
                "
Chao Liu's avatar
Chao Liu committed
570
571
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
572
573
574
575
576
577
    }
    if(offset == 3520)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3520\n \
                "
Chao Liu's avatar
Chao Liu committed
578
579
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
580
581
582
583
584
585
    }
    if(offset == 3584)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3584\n \
                "
Chao Liu's avatar
Chao Liu committed
586
587
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
588
589
590
591
592
593
    }
    if(offset == 3648)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3648\n \
                "
Chao Liu's avatar
Chao Liu committed
594
595
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
596
597
598
599
600
601
    }
    if(offset == 3712)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3712\n \
                "
Chao Liu's avatar
Chao Liu committed
602
603
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
604
605
606
607
608
609
    }
    if(offset == 3776)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3776\n \
                "
Chao Liu's avatar
Chao Liu committed
610
611
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
612
613
614
615
616
617
    }
    if(offset == 3840)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3840\n \
                "
Chao Liu's avatar
Chao Liu committed
618
619
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
620
621
622
623
624
625
    }
    if(offset == 3904)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3904\n \
                "
Chao Liu's avatar
Chao Liu committed
626
627
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
628
629
630
631
632
633
    }
    if(offset == 3968)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3968\n \
                "
Chao Liu's avatar
Chao Liu committed
634
635
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
636
637
638
639
640
641
    }
    if(offset == 4032)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4032\n \
                "
Chao Liu's avatar
Chao Liu committed
642
643
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
644
645
646
647
648
649
    }
    if(offset == 4096)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4096\n \
                "
Chao Liu's avatar
Chao Liu committed
650
651
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
652
    }
Jing Zhang's avatar
Jing Zhang committed
653
654
}

Chao Liu's avatar
Chao Liu committed
655
656
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
657
{
658
659
660
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
661
662
            ds_write_b128 %0, %1 \n \
            "
663
664
665
666
667
668
669
                     :
                     : "v"(__to_local(lds)), "v"(r));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
670
}
671
672
673

} // namespace ck
#endif