amd_inline_asm.hpp 17.4 KB
Newer Older
1
2
3
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP

Chao Liu's avatar
Chao Liu committed
4
#include "vector_type.hpp"
Jing Zhang's avatar
Jing Zhang committed
5

6
7
namespace ck {

Chao Liu's avatar
Chao Liu committed
8
// cast a pointer of LDS to its address
Chao Liu's avatar
Chao Liu committed
9
extern "C" __attribute__((address_space(3))) __device__ void* __to_local(void* p);
Chao Liu's avatar
Chao Liu committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

__device__ void vmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt vmcnt(2) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

__device__ void lgkmcnt(index_t cnt)
{
    if(cnt == 0)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(0) \n \
                " ::);
    }
    else if(cnt == 1)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(1) \n \
                " ::);
    }
    else if(cnt == 2)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(2) \n \
                " ::);
    }
    else if(cnt == 3)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(3) \n \
                " ::);
    }
    else if(cnt == 4)
    {
        asm volatile("\n \
                s_waitcnt lgkmcnt(4) \n \
                " ::);
    }
    else
    {
        assert(false);
    }
}

Chao Liu's avatar
Chao Liu committed
81
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
Chao Liu's avatar
Chao Liu committed
82
{
Jing Zhang's avatar
Jing Zhang committed
83
84
85
86
87
88
    asm volatile("\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
Chao Liu's avatar
Chao Liu committed
89
90
91
92
93
94
95
96
97
98
                 : "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
                 : "v"(a[0]),
                   "v"(b[0]),
                   "v"(b[1]),
                   "v"(b[2]),
                   "v"(b[3]),
                   "0"(c[0]),
                   "1"(c[1]),
                   "2"(c[2]),
                   "3"(c[3]));
Jing Zhang's avatar
Jing Zhang committed
99
100
}

Chao Liu's avatar
Chao Liu committed
101
102
103
__device__ void outerProduct1x4(const float& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c)
Chao Liu's avatar
Chao Liu committed
104
{
Chao Liu's avatar
Chao Liu committed
105
    outerProduct1x4(&a, reinterpret_cast<const float*>(&b), reinterpret_cast<float*>(&c));
Jing Zhang's avatar
Jing Zhang committed
106
107
}

Chao Liu's avatar
Chao Liu committed
108
109
110
111
112
113
114
115
116
__device__ void outerProduct2x4(const vector_type<float, 2>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1)
{
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
}

Chao Liu's avatar
Chao Liu committed
117
118
119
120
121
122
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1,
                                vector_type<float, 4>::MemoryType& c2,
                                vector_type<float, 4>::MemoryType& c3)
Chao Liu's avatar
Chao Liu committed
123
{
Jing Zhang's avatar
Jing Zhang committed
124
125
126
127
128
129
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
    outerProduct1x4(a.z, b, c2);
    outerProduct1x4(a.w, b, c3);
}

Chao Liu's avatar
Chao Liu committed
130
131
132
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
                                const vector_type<float, 4>::MemoryType* b,
                                vector_type<float, 4>::MemoryType* c)
Jing Zhang's avatar
Jing Zhang committed
133
134
135
136
137
138
139
{
    outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
    outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
    outerProduct4x4(a[1], b[0], c[8], c[10], c[12], c[14]);
    outerProduct4x4(a[1], b[1], c[9], c[11], c[13], c[15]);
}

Chao Liu's avatar
Chao Liu committed
140
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
141
142
143
144
{
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
145
                ds_read_b128 %0, %1 offset:0\n \
Jing Zhang's avatar
Jing Zhang committed
146
                "
Chao Liu's avatar
Chao Liu committed
147
148
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
149
    }
Jing Zhang's avatar
Jing Zhang committed
150
    if(offset == 64)
Jing Zhang's avatar
Jing Zhang committed
151
152
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
153
                ds_read_b128 %0, %1 offset:64\n \
Jing Zhang's avatar
Jing Zhang committed
154
                "
Chao Liu's avatar
Chao Liu committed
155
156
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
157
    }
Jing Zhang's avatar
Jing Zhang committed
158
    if(offset == 128)
Jing Zhang's avatar
Jing Zhang committed
159
160
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
161
                ds_read_b128 %0, %1 offset:128\n \
Jing Zhang's avatar
Jing Zhang committed
162
                "
Chao Liu's avatar
Chao Liu committed
163
164
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
165
    }
Jing Zhang's avatar
Jing Zhang committed
166
    if(offset == 192)
Jing Zhang's avatar
Jing Zhang committed
167
168
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
169
                ds_read_b128 %0, %1 offset:192\n \
Jing Zhang's avatar
Jing Zhang committed
170
                "
Chao Liu's avatar
Chao Liu committed
171
172
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
173
    }
Jing Zhang's avatar
Jing Zhang committed
174
    if(offset == 256)
Jing Zhang's avatar
Jing Zhang committed
175
176
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
177
                ds_read_b128 %0, %1 offset:256\n \
Jing Zhang's avatar
Jing Zhang committed
178
                "
Chao Liu's avatar
Chao Liu committed
179
180
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
181
    }
Jing Zhang's avatar
Jing Zhang committed
182
    if(offset == 320)
Jing Zhang's avatar
Jing Zhang committed
183
184
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
185
                ds_read_b128 %0, %1 offset:320\n \
Jing Zhang's avatar
Jing Zhang committed
186
                "
Chao Liu's avatar
Chao Liu committed
187
188
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
189
    }
Jing Zhang's avatar
Jing Zhang committed
190
    if(offset == 384)
Jing Zhang's avatar
Jing Zhang committed
191
192
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
193
                ds_read_b128 %0, %1 offset:384\n \
Jing Zhang's avatar
Jing Zhang committed
194
                "
Chao Liu's avatar
Chao Liu committed
195
196
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
197
    }
Jing Zhang's avatar
Jing Zhang committed
198
    if(offset == 448)
Jing Zhang's avatar
Jing Zhang committed
199
200
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
201
                ds_read_b128 %0, %1 offset:448\n \
Jing Zhang's avatar
Jing Zhang committed
202
                "
Chao Liu's avatar
Chao Liu committed
203
204
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
205
    }
Jing Zhang's avatar
Jing Zhang committed
206
    if(offset == 512)
Jing Zhang's avatar
Jing Zhang committed
207
208
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
209
                ds_read_b128 %0, %1 offset:512\n \
Jing Zhang's avatar
Jing Zhang committed
210
                "
Chao Liu's avatar
Chao Liu committed
211
212
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
213
    }
Jing Zhang's avatar
Jing Zhang committed
214
    if(offset == 576)
Jing Zhang's avatar
Jing Zhang committed
215
216
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
217
                ds_read_b128 %0, %1 offset:576\n \
Jing Zhang's avatar
Jing Zhang committed
218
                "
Chao Liu's avatar
Chao Liu committed
219
220
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
221
    }
Jing Zhang's avatar
Jing Zhang committed
222
    if(offset == 640)
Jing Zhang's avatar
Jing Zhang committed
223
224
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
225
                ds_read_b128 %0, %1 offset:640\n \
Jing Zhang's avatar
Jing Zhang committed
226
                "
Chao Liu's avatar
Chao Liu committed
227
228
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
229
    }
Jing Zhang's avatar
Jing Zhang committed
230
    if(offset == 704)
Jing Zhang's avatar
Jing Zhang committed
231
232
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
233
                ds_read_b128 %0, %1 offset:704\n \
Jing Zhang's avatar
Jing Zhang committed
234
                "
Chao Liu's avatar
Chao Liu committed
235
236
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
237
    }
Jing Zhang's avatar
Jing Zhang committed
238
    if(offset == 768)
Jing Zhang's avatar
Jing Zhang committed
239
240
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
241
                ds_read_b128 %0, %1 offset:768\n \
Jing Zhang's avatar
Jing Zhang committed
242
                "
Chao Liu's avatar
Chao Liu committed
243
244
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
245
    }
Jing Zhang's avatar
Jing Zhang committed
246
    if(offset == 832)
Jing Zhang's avatar
Jing Zhang committed
247
248
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
249
                ds_read_b128 %0, %1 offset:832\n \
Jing Zhang's avatar
Jing Zhang committed
250
                "
Chao Liu's avatar
Chao Liu committed
251
252
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
253
    }
Jing Zhang's avatar
Jing Zhang committed
254
    if(offset == 896)
Jing Zhang's avatar
Jing Zhang committed
255
256
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
257
                ds_read_b128 %0, %1 offset:896\n \
Jing Zhang's avatar
Jing Zhang committed
258
                "
Chao Liu's avatar
Chao Liu committed
259
260
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
261
    }
Jing Zhang's avatar
Jing Zhang committed
262
    if(offset == 960)
Jing Zhang's avatar
Jing Zhang committed
263
264
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
265
                ds_read_b128 %0, %1 offset:960\n \
Jing Zhang's avatar
Jing Zhang committed
266
                "
Chao Liu's avatar
Chao Liu committed
267
268
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
269
    }
Jing Zhang's avatar
Jing Zhang committed
270
    if(offset == 1024)
Jing Zhang's avatar
Jing Zhang committed
271
272
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
273
                ds_read_b128 %0, %1 offset:1024\n \
Jing Zhang's avatar
Jing Zhang committed
274
                "
Chao Liu's avatar
Chao Liu committed
275
276
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
277
    }
Jing Zhang's avatar
Jing Zhang committed
278
    if(offset == 1088)
Jing Zhang's avatar
Jing Zhang committed
279
280
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
281
                ds_read_b128 %0, %1 offset:1088\n \
Jing Zhang's avatar
Jing Zhang committed
282
                "
Chao Liu's avatar
Chao Liu committed
283
284
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
285
    }
Jing Zhang's avatar
Jing Zhang committed
286
    if(offset == 1152)
Jing Zhang's avatar
Jing Zhang committed
287
288
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
289
                ds_read_b128 %0, %1 offset:1152\n \
Jing Zhang's avatar
Jing Zhang committed
290
                "
Chao Liu's avatar
Chao Liu committed
291
292
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
293
    }
Jing Zhang's avatar
Jing Zhang committed
294
    if(offset == 1216)
Chao Liu's avatar
Chao Liu committed
295
296
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
297
                ds_read_b128 %0, %1 offset:1216\n \
Chao Liu's avatar
Chao Liu committed
298
                "
Chao Liu's avatar
Chao Liu committed
299
300
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
301
    }
Jing Zhang's avatar
Jing Zhang committed
302
    if(offset == 1280)
Jing Zhang's avatar
Jing Zhang committed
303
304
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
305
                ds_read_b128 %0, %1 offset:1280\n \
Jing Zhang's avatar
Jing Zhang committed
306
                "
Chao Liu's avatar
Chao Liu committed
307
308
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
309
    }
Jing Zhang's avatar
Jing Zhang committed
310
    if(offset == 1344)
Chao Liu's avatar
Chao Liu committed
311
312
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
313
                ds_read_b128 %0, %1 offset:1344\n \
Chao Liu's avatar
Chao Liu committed
314
                "
Chao Liu's avatar
Chao Liu committed
315
316
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
317
    }
Jing Zhang's avatar
Jing Zhang committed
318
    if(offset == 1408)
Jing Zhang's avatar
Jing Zhang committed
319
320
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
321
                ds_read_b128 %0, %1 offset:1408\n \
Jing Zhang's avatar
Jing Zhang committed
322
                "
Chao Liu's avatar
Chao Liu committed
323
324
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
325
    }
Jing Zhang's avatar
Jing Zhang committed
326
    if(offset == 1472)
Chao Liu's avatar
Chao Liu committed
327
328
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
329
                ds_read_b128 %0, %1 offset:1472\n \
Chao Liu's avatar
Chao Liu committed
330
                "
Chao Liu's avatar
Chao Liu committed
331
332
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
333
    }
Jing Zhang's avatar
Jing Zhang committed
334
    if(offset == 1536)
Jing Zhang's avatar
Jing Zhang committed
335
336
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
337
                ds_read_b128 %0, %1 offset:1536\n \
Jing Zhang's avatar
Jing Zhang committed
338
                "
Chao Liu's avatar
Chao Liu committed
339
340
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
341
    }
Jing Zhang's avatar
Jing Zhang committed
342
    if(offset == 1600)
Chao Liu's avatar
Chao Liu committed
343
344
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
345
                ds_read_b128 %0, %1 offset:1600\n \
Chao Liu's avatar
Chao Liu committed
346
                "
Chao Liu's avatar
Chao Liu committed
347
348
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
349
    }
Jing Zhang's avatar
Jing Zhang committed
350
    if(offset == 1664)
Jing Zhang's avatar
Jing Zhang committed
351
352
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
353
                ds_read_b128 %0, %1 offset:1664\n \
Jing Zhang's avatar
Jing Zhang committed
354
                "
Chao Liu's avatar
Chao Liu committed
355
356
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
357
    }
Jing Zhang's avatar
Jing Zhang committed
358
    if(offset == 1728)
Chao Liu's avatar
Chao Liu committed
359
360
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
361
                ds_read_b128 %0, %1 offset:1728\n \
Chao Liu's avatar
Chao Liu committed
362
                "
Chao Liu's avatar
Chao Liu committed
363
364
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
365
    }
Jing Zhang's avatar
Jing Zhang committed
366
    if(offset == 1792)
Jing Zhang's avatar
Jing Zhang committed
367
368
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
369
                ds_read_b128 %0, %1 offset:1792\n \
Jing Zhang's avatar
Jing Zhang committed
370
                "
Chao Liu's avatar
Chao Liu committed
371
372
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
373
    }
Jing Zhang's avatar
Jing Zhang committed
374
    if(offset == 1856)
Chao Liu's avatar
Chao Liu committed
375
376
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
377
                ds_read_b128 %0, %1 offset:1856\n \
Chao Liu's avatar
Chao Liu committed
378
                "
Chao Liu's avatar
Chao Liu committed
379
380
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
381
    }
Jing Zhang's avatar
Jing Zhang committed
382
    if(offset == 1920)
Jing Zhang's avatar
Jing Zhang committed
383
384
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
385
                ds_read_b128 %0, %1 offset:1920\n \
Jing Zhang's avatar
Jing Zhang committed
386
                "
Chao Liu's avatar
Chao Liu committed
387
388
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
389
    }
Jing Zhang's avatar
Jing Zhang committed
390
    if(offset == 1984)
Chao Liu's avatar
Chao Liu committed
391
392
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
393
                ds_read_b128 %0, %1 offset:1984\n \
Chao Liu's avatar
Chao Liu committed
394
                "
Chao Liu's avatar
Chao Liu committed
395
396
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
397
    }
Jing Zhang's avatar
Jing Zhang committed
398
    if(offset == 2048)
Jing Zhang's avatar
Jing Zhang committed
399
400
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
401
                ds_read_b128 %0, %1 offset:2048\n \
Jing Zhang's avatar
Jing Zhang committed
402
                "
Chao Liu's avatar
Chao Liu committed
403
404
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
405
    }
Jing Zhang's avatar
Jing Zhang committed
406
    if(offset == 2112)
Jing Zhang's avatar
Jing Zhang committed
407
408
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
409
                ds_read_b128 %0, %1 offset:2112\n \
Jing Zhang's avatar
Jing Zhang committed
410
                "
Chao Liu's avatar
Chao Liu committed
411
412
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
413
    }
Jing Zhang's avatar
Jing Zhang committed
414
    if(offset == 2176)
Jing Zhang's avatar
Jing Zhang committed
415
    {
Jing Zhang's avatar
Jing Zhang committed
416
417
418
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2176\n \
                "
Chao Liu's avatar
Chao Liu committed
419
420
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
421
422
423
424
425
426
    }
    if(offset == 2240)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2240\n \
                "
Chao Liu's avatar
Chao Liu committed
427
428
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
429
430
431
432
433
434
    }
    if(offset == 2304)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2304\n \
                "
Chao Liu's avatar
Chao Liu committed
435
436
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
437
438
439
440
441
442
    }
    if(offset == 2368)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2368\n \
                "
Chao Liu's avatar
Chao Liu committed
443
444
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
445
446
447
448
449
450
    }
    if(offset == 2432)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2432\n \
                "
Chao Liu's avatar
Chao Liu committed
451
452
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
453
454
455
456
457
458
    }
    if(offset == 2496)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2496\n \
                "
Chao Liu's avatar
Chao Liu committed
459
460
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
461
462
463
464
465
466
    }
    if(offset == 2560)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2560\n \
                "
Chao Liu's avatar
Chao Liu committed
467
468
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
469
470
471
472
473
474
    }
    if(offset == 2624)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2624\n \
                "
Chao Liu's avatar
Chao Liu committed
475
476
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
477
478
479
480
481
482
    }
    if(offset == 2688)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2688\n \
                "
Chao Liu's avatar
Chao Liu committed
483
484
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
485
486
487
488
489
490
    }
    if(offset == 2752)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2752\n \
                "
Chao Liu's avatar
Chao Liu committed
491
492
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
493
494
495
496
497
498
    }
    if(offset == 2816)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2816\n \
                "
Chao Liu's avatar
Chao Liu committed
499
500
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
501
502
503
504
505
506
    }
    if(offset == 2880)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2880\n \
                "
Chao Liu's avatar
Chao Liu committed
507
508
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
509
510
511
512
513
514
    }
    if(offset == 2944)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2944\n \
                "
Chao Liu's avatar
Chao Liu committed
515
516
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
517
518
519
520
521
522
    }
    if(offset == 3008)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3008\n \
                "
Chao Liu's avatar
Chao Liu committed
523
524
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
525
526
527
528
529
530
    }
    if(offset == 3072)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3072\n \
                "
Chao Liu's avatar
Chao Liu committed
531
532
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
533
534
535
536
537
538
    }
    if(offset == 3136)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3136\n \
                "
Chao Liu's avatar
Chao Liu committed
539
540
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
541
542
543
544
545
546
    }
    if(offset == 3200)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3200\n \
                "
Chao Liu's avatar
Chao Liu committed
547
548
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
549
550
551
552
553
554
    }
    if(offset == 3264)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3264\n \
                "
Chao Liu's avatar
Chao Liu committed
555
556
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
557
558
559
560
561
562
    }
    if(offset == 3328)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3328\n \
                "
Chao Liu's avatar
Chao Liu committed
563
564
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
565
566
567
568
569
570
    }
    if(offset == 3392)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3392\n \
                "
Chao Liu's avatar
Chao Liu committed
571
572
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
573
574
575
576
577
578
    }
    if(offset == 3456)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3456\n \
                "
Chao Liu's avatar
Chao Liu committed
579
580
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
581
582
583
584
585
586
    }
    if(offset == 3520)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3520\n \
                "
Chao Liu's avatar
Chao Liu committed
587
588
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
589
590
591
592
593
594
    }
    if(offset == 3584)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3584\n \
                "
Chao Liu's avatar
Chao Liu committed
595
596
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
597
598
599
600
601
602
    }
    if(offset == 3648)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3648\n \
                "
Chao Liu's avatar
Chao Liu committed
603
604
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
605
606
607
608
609
610
    }
    if(offset == 3712)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3712\n \
                "
Chao Liu's avatar
Chao Liu committed
611
612
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
613
614
615
616
617
618
    }
    if(offset == 3776)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3776\n \
                "
Chao Liu's avatar
Chao Liu committed
619
620
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
621
622
623
624
625
626
    }
    if(offset == 3840)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3840\n \
                "
Chao Liu's avatar
Chao Liu committed
627
628
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
629
630
631
632
633
634
    }
    if(offset == 3904)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3904\n \
                "
Chao Liu's avatar
Chao Liu committed
635
636
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
637
638
639
640
641
642
    }
    if(offset == 3968)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3968\n \
                "
Chao Liu's avatar
Chao Liu committed
643
644
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
645
646
647
648
649
650
    }
    if(offset == 4032)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4032\n \
                "
Chao Liu's avatar
Chao Liu committed
651
652
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
653
654
655
656
657
658
    }
    if(offset == 4096)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4096\n \
                "
Chao Liu's avatar
Chao Liu committed
659
660
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
661
    }
Jing Zhang's avatar
Jing Zhang committed
662
663
}

Chao Liu's avatar
Chao Liu committed
664
665
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
666
{
667
668
669
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
670
671
            ds_write_b128 %0, %1 \n \
            "
672
673
674
675
676
677
678
                     :
                     : "v"(__to_local(lds)), "v"(r));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
679
}
680
681
682

} // namespace ck
#endif