amd_inline_asm.hpp 19.4 KB
Newer Older
1
2
3
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP

Chao Liu's avatar
Chao Liu committed
4
#include "common.hpp"
Jing Zhang's avatar
Jing Zhang committed
5

Jing Zhang's avatar
Jing Zhang committed
6
7
8
9
10
11
#define NO_VM_WAIT 0
#define NO_LGKM_WAIT 0
#define NO_DS_READ 0
#define NO_DS_WRITE 0
#define NO_GLB_READ 0

12
13
namespace ck {

Chao Liu's avatar
Chao Liu committed
14
15
16
17
// cast a pointer of LDS to its address
extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];

__device__ void vmcnt(index_t cnt)
18
{
Jing Zhang's avatar
Jing Zhang committed
19
#if !NO_VM_WAIT
20
21
22
    if(cnt == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
23
                s_waitcnt vmcnt(0) \n \
24
                " ::);
Jing Zhang's avatar
Jing Zhang committed
25
    }
26
27
28
    else if(cnt == 1)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
29
                s_waitcnt vmcnt(1) \n \
30
                " ::);
Jing Zhang's avatar
Jing Zhang committed
31
    }
32
33
34
    else if(cnt == 2)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
35
                s_waitcnt vmcnt(2) \n \
36
                " ::);
Jing Zhang's avatar
Jing Zhang committed
37
    }
38
39
40
    else if(cnt == 4)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
41
                s_waitcnt vmcnt(2) \n \
42
                " ::);
Jing Zhang's avatar
Jing Zhang committed
43
    }
44
45
    else
    {
46
        assert(false);
Jing Zhang's avatar
Jing Zhang committed
47
    }
Jing Zhang's avatar
Jing Zhang committed
48
#endif
Jing Zhang's avatar
Jing Zhang committed
49
50
}

Chao Liu's avatar
Chao Liu committed
51
__device__ void lgkmcnt(index_t cnt)
Chao Liu's avatar
Chao Liu committed
52
{
Jing Zhang's avatar
Jing Zhang committed
53
#if !NO_LGKM_WAIT
Chao Liu's avatar
Chao Liu committed
54
55
    if(cnt == 0)
    {
Jing Zhang's avatar
Jing Zhang committed
56
57
        asm volatile("\n \
                s_waitcnt lgkmcnt(0) \n \
Chao Liu's avatar
Chao Liu committed
58
                " ::);
Jing Zhang's avatar
Jing Zhang committed
59
    }
Chao Liu's avatar
Chao Liu committed
60
61
    else if(cnt == 1)
    {
Jing Zhang's avatar
Jing Zhang committed
62
63
        asm volatile("\n \
                s_waitcnt lgkmcnt(1) \n \
Chao Liu's avatar
Chao Liu committed
64
                " ::);
Jing Zhang's avatar
Jing Zhang committed
65
    }
Chao Liu's avatar
Chao Liu committed
66
67
    else if(cnt == 2)
    {
Jing Zhang's avatar
Jing Zhang committed
68
69
        asm volatile("\n \
                s_waitcnt lgkmcnt(2) \n \
Chao Liu's avatar
Chao Liu committed
70
                " ::);
Jing Zhang's avatar
Jing Zhang committed
71
    }
Chao Liu's avatar
Chao Liu committed
72
73
    else if(cnt == 3)
    {
Jing Zhang's avatar
Jing Zhang committed
74
75
        asm volatile("\n \
                s_waitcnt lgkmcnt(3) \n \
Chao Liu's avatar
Chao Liu committed
76
                " ::);
Jing Zhang's avatar
Jing Zhang committed
77
    }
Chao Liu's avatar
Chao Liu committed
78
79
    else if(cnt == 4)
    {
Jing Zhang's avatar
Jing Zhang committed
80
81
        asm volatile("\n \
                s_waitcnt lgkmcnt(4) \n \
Chao Liu's avatar
Chao Liu committed
82
                " ::);
Jing Zhang's avatar
Jing Zhang committed
83
    }
Chao Liu's avatar
Chao Liu committed
84
85
    else
    {
86
        assert(false);
Jing Zhang's avatar
Jing Zhang committed
87
    }
Jing Zhang's avatar
Jing Zhang committed
88
#endif
Jing Zhang's avatar
Jing Zhang committed
89
90
}

Chao Liu's avatar
Chao Liu committed
91
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
Chao Liu's avatar
Chao Liu committed
92
{
Jing Zhang's avatar
Jing Zhang committed
93
94
95
96
97
98
    asm volatile("\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
Chao Liu's avatar
Chao Liu committed
99
100
101
102
103
104
105
106
107
108
                 : "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
                 : "v"(a[0]),
                   "v"(b[0]),
                   "v"(b[1]),
                   "v"(b[2]),
                   "v"(b[3]),
                   "0"(c[0]),
                   "1"(c[1]),
                   "2"(c[2]),
                   "3"(c[3]));
Jing Zhang's avatar
Jing Zhang committed
109
110
}

Chao Liu's avatar
Chao Liu committed
111
112
113
__device__ void outerProduct1x4(const float& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c)
Chao Liu's avatar
Chao Liu committed
114
{
Jing Zhang's avatar
Jing Zhang committed
115
116
117
118
119
120
121
122
123
124
125
126
127
#if 0
    asm volatile(
            "\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
            :
            :"v"(c.x),"v"(c.y),"v"(c.z),"v"(c.w), \
            "v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
            );
#else
Chao Liu's avatar
Chao Liu committed
128
    outerProduct1x4(&a, (float*)&b, (float*)&c);
Jing Zhang's avatar
Jing Zhang committed
129
130
131
#endif
}

Chao Liu's avatar
Chao Liu committed
132
133
134
135
136
137
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1,
                                vector_type<float, 4>::MemoryType& c2,
                                vector_type<float, 4>::MemoryType& c3)
Chao Liu's avatar
Chao Liu committed
138
{
Jing Zhang's avatar
Jing Zhang committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#if 0
    asm volatile(
            "\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
            :
            :"v"(c0.x),"v"(c0.y),"v"(c0.z),"v"(c0.w), \
            "v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
            );
    asm volatile(
            "\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
            :
            :"v"(c1.x),"v"(c1.y),"v"(c1.z),"v"(c1.w), \
            "v"(a.y),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
            );
    asm volatile(
            "\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
            :
            :"v"(c2.x),"v"(c2.y),"v"(c2.z),"v"(c2.w), \
            "v"(a.z),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
            );
    asm volatile(
            "\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
            :
            :"v"(c3.x),"v"(c3.y),"v"(c3.z),"v"(c3.w), \
            "v"(a.w),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
            );
#else
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
    outerProduct1x4(a.z, b, c2);
    outerProduct1x4(a.w, b, c3);
#endif
}

Chao Liu's avatar
Chao Liu committed
192
193
194
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
                                const vector_type<float, 4>::MemoryType* b,
                                vector_type<float, 4>::MemoryType* c)
Jing Zhang's avatar
Jing Zhang committed
195
196
197
198
199
200
201
{
    outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
    outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
    outerProduct4x4(a[1], b[0], c[8], c[10], c[12], c[14]);
    outerProduct4x4(a[1], b[1], c[9], c[11], c[13], c[15]);
}

Chao Liu's avatar
Chao Liu committed
202
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
203
{
Jing Zhang's avatar
Jing Zhang committed
204
#if !NO_DS_READ
Jing Zhang's avatar
Jing Zhang committed
205
206
207
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
208
                ds_read_b128 %0, %1 offset:0\n \
Jing Zhang's avatar
Jing Zhang committed
209
                "
Chao Liu's avatar
Chao Liu committed
210
211
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
212
    }
Jing Zhang's avatar
Jing Zhang committed
213
    if(offset == 64)
Jing Zhang's avatar
Jing Zhang committed
214
215
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
216
                ds_read_b128 %0, %1 offset:64\n \
Jing Zhang's avatar
Jing Zhang committed
217
                "
Chao Liu's avatar
Chao Liu committed
218
219
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
220
    }
Jing Zhang's avatar
Jing Zhang committed
221
    if(offset == 128)
Jing Zhang's avatar
Jing Zhang committed
222
223
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
224
                ds_read_b128 %0, %1 offset:128\n \
Jing Zhang's avatar
Jing Zhang committed
225
                "
Chao Liu's avatar
Chao Liu committed
226
227
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
228
    }
Jing Zhang's avatar
Jing Zhang committed
229
    if(offset == 192)
Jing Zhang's avatar
Jing Zhang committed
230
231
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
232
                ds_read_b128 %0, %1 offset:192\n \
Jing Zhang's avatar
Jing Zhang committed
233
                "
Chao Liu's avatar
Chao Liu committed
234
235
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
236
    }
Jing Zhang's avatar
Jing Zhang committed
237
    if(offset == 256)
Jing Zhang's avatar
Jing Zhang committed
238
239
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
240
                ds_read_b128 %0, %1 offset:256\n \
Jing Zhang's avatar
Jing Zhang committed
241
                "
Chao Liu's avatar
Chao Liu committed
242
243
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
244
    }
Jing Zhang's avatar
Jing Zhang committed
245
    if(offset == 320)
Jing Zhang's avatar
Jing Zhang committed
246
247
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
248
                ds_read_b128 %0, %1 offset:320\n \
Jing Zhang's avatar
Jing Zhang committed
249
                "
Chao Liu's avatar
Chao Liu committed
250
251
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
252
    }
Jing Zhang's avatar
Jing Zhang committed
253
    if(offset == 384)
Jing Zhang's avatar
Jing Zhang committed
254
255
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
256
                ds_read_b128 %0, %1 offset:384\n \
Jing Zhang's avatar
Jing Zhang committed
257
                "
Chao Liu's avatar
Chao Liu committed
258
259
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
260
    }
Jing Zhang's avatar
Jing Zhang committed
261
    if(offset == 448)
Jing Zhang's avatar
Jing Zhang committed
262
263
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
264
                ds_read_b128 %0, %1 offset:448\n \
Jing Zhang's avatar
Jing Zhang committed
265
                "
Chao Liu's avatar
Chao Liu committed
266
267
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
268
    }
Jing Zhang's avatar
Jing Zhang committed
269
    if(offset == 512)
Jing Zhang's avatar
Jing Zhang committed
270
271
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
272
                ds_read_b128 %0, %1 offset:512\n \
Jing Zhang's avatar
Jing Zhang committed
273
                "
Chao Liu's avatar
Chao Liu committed
274
275
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
276
    }
Jing Zhang's avatar
Jing Zhang committed
277
    if(offset == 576)
Jing Zhang's avatar
Jing Zhang committed
278
279
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
280
                ds_read_b128 %0, %1 offset:576\n \
Jing Zhang's avatar
Jing Zhang committed
281
                "
Chao Liu's avatar
Chao Liu committed
282
283
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
284
    }
Jing Zhang's avatar
Jing Zhang committed
285
    if(offset == 640)
Jing Zhang's avatar
Jing Zhang committed
286
287
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
288
                ds_read_b128 %0, %1 offset:640\n \
Jing Zhang's avatar
Jing Zhang committed
289
                "
Chao Liu's avatar
Chao Liu committed
290
291
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
292
    }
Jing Zhang's avatar
Jing Zhang committed
293
    if(offset == 704)
Jing Zhang's avatar
Jing Zhang committed
294
295
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
296
                ds_read_b128 %0, %1 offset:704\n \
Jing Zhang's avatar
Jing Zhang committed
297
                "
Chao Liu's avatar
Chao Liu committed
298
299
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
300
    }
Jing Zhang's avatar
Jing Zhang committed
301
    if(offset == 768)
Jing Zhang's avatar
Jing Zhang committed
302
303
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
304
                ds_read_b128 %0, %1 offset:768\n \
Jing Zhang's avatar
Jing Zhang committed
305
                "
Chao Liu's avatar
Chao Liu committed
306
307
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
308
    }
Jing Zhang's avatar
Jing Zhang committed
309
    if(offset == 832)
Jing Zhang's avatar
Jing Zhang committed
310
311
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
312
                ds_read_b128 %0, %1 offset:832\n \
Jing Zhang's avatar
Jing Zhang committed
313
                "
Chao Liu's avatar
Chao Liu committed
314
315
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
316
    }
Jing Zhang's avatar
Jing Zhang committed
317
    if(offset == 896)
Jing Zhang's avatar
Jing Zhang committed
318
319
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
320
                ds_read_b128 %0, %1 offset:896\n \
Jing Zhang's avatar
Jing Zhang committed
321
                "
Chao Liu's avatar
Chao Liu committed
322
323
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
324
    }
Jing Zhang's avatar
Jing Zhang committed
325
    if(offset == 960)
Jing Zhang's avatar
Jing Zhang committed
326
327
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
328
                ds_read_b128 %0, %1 offset:960\n \
Jing Zhang's avatar
Jing Zhang committed
329
                "
Chao Liu's avatar
Chao Liu committed
330
331
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
332
    }
Jing Zhang's avatar
Jing Zhang committed
333
    if(offset == 1024)
Jing Zhang's avatar
Jing Zhang committed
334
335
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
336
                ds_read_b128 %0, %1 offset:1024\n \
Jing Zhang's avatar
Jing Zhang committed
337
                "
Chao Liu's avatar
Chao Liu committed
338
339
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
340
    }
Jing Zhang's avatar
Jing Zhang committed
341
    if(offset == 1088)
Jing Zhang's avatar
Jing Zhang committed
342
343
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
344
                ds_read_b128 %0, %1 offset:1088\n \
Jing Zhang's avatar
Jing Zhang committed
345
                "
Chao Liu's avatar
Chao Liu committed
346
347
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
348
    }
Jing Zhang's avatar
Jing Zhang committed
349
    if(offset == 1152)
Jing Zhang's avatar
Jing Zhang committed
350
351
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
352
                ds_read_b128 %0, %1 offset:1152\n \
Jing Zhang's avatar
Jing Zhang committed
353
                "
Chao Liu's avatar
Chao Liu committed
354
355
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
356
    }
Jing Zhang's avatar
Jing Zhang committed
357
    if(offset == 1216)
Chao Liu's avatar
Chao Liu committed
358
359
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
360
                ds_read_b128 %0, %1 offset:1216\n \
Chao Liu's avatar
Chao Liu committed
361
                "
Chao Liu's avatar
Chao Liu committed
362
363
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
364
    }
Jing Zhang's avatar
Jing Zhang committed
365
    if(offset == 1280)
Jing Zhang's avatar
Jing Zhang committed
366
367
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
368
                ds_read_b128 %0, %1 offset:1280\n \
Jing Zhang's avatar
Jing Zhang committed
369
                "
Chao Liu's avatar
Chao Liu committed
370
371
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
372
    }
Jing Zhang's avatar
Jing Zhang committed
373
    if(offset == 1344)
Chao Liu's avatar
Chao Liu committed
374
375
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
376
                ds_read_b128 %0, %1 offset:1344\n \
Chao Liu's avatar
Chao Liu committed
377
                "
Chao Liu's avatar
Chao Liu committed
378
379
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
380
    }
Jing Zhang's avatar
Jing Zhang committed
381
    if(offset == 1408)
Jing Zhang's avatar
Jing Zhang committed
382
383
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
384
                ds_read_b128 %0, %1 offset:1408\n \
Jing Zhang's avatar
Jing Zhang committed
385
                "
Chao Liu's avatar
Chao Liu committed
386
387
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
388
    }
Jing Zhang's avatar
Jing Zhang committed
389
    if(offset == 1472)
Chao Liu's avatar
Chao Liu committed
390
391
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
392
                ds_read_b128 %0, %1 offset:1472\n \
Chao Liu's avatar
Chao Liu committed
393
                "
Chao Liu's avatar
Chao Liu committed
394
395
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
396
    }
Jing Zhang's avatar
Jing Zhang committed
397
    if(offset == 1536)
Jing Zhang's avatar
Jing Zhang committed
398
399
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
400
                ds_read_b128 %0, %1 offset:1536\n \
Jing Zhang's avatar
Jing Zhang committed
401
                "
Chao Liu's avatar
Chao Liu committed
402
403
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
404
    }
Jing Zhang's avatar
Jing Zhang committed
405
    if(offset == 1600)
Chao Liu's avatar
Chao Liu committed
406
407
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
408
                ds_read_b128 %0, %1 offset:1600\n \
Chao Liu's avatar
Chao Liu committed
409
                "
Chao Liu's avatar
Chao Liu committed
410
411
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
412
    }
Jing Zhang's avatar
Jing Zhang committed
413
    if(offset == 1664)
Jing Zhang's avatar
Jing Zhang committed
414
415
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
416
                ds_read_b128 %0, %1 offset:1664\n \
Jing Zhang's avatar
Jing Zhang committed
417
                "
Chao Liu's avatar
Chao Liu committed
418
419
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
420
    }
Jing Zhang's avatar
Jing Zhang committed
421
    if(offset == 1728)
Chao Liu's avatar
Chao Liu committed
422
423
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
424
                ds_read_b128 %0, %1 offset:1728\n \
Chao Liu's avatar
Chao Liu committed
425
                "
Chao Liu's avatar
Chao Liu committed
426
427
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
428
    }
Jing Zhang's avatar
Jing Zhang committed
429
    if(offset == 1792)
Jing Zhang's avatar
Jing Zhang committed
430
431
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
432
                ds_read_b128 %0, %1 offset:1792\n \
Jing Zhang's avatar
Jing Zhang committed
433
                "
Chao Liu's avatar
Chao Liu committed
434
435
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
436
    }
Jing Zhang's avatar
Jing Zhang committed
437
    if(offset == 1856)
Chao Liu's avatar
Chao Liu committed
438
439
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
440
                ds_read_b128 %0, %1 offset:1856\n \
Chao Liu's avatar
Chao Liu committed
441
                "
Chao Liu's avatar
Chao Liu committed
442
443
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
444
    }
Jing Zhang's avatar
Jing Zhang committed
445
    if(offset == 1920)
Jing Zhang's avatar
Jing Zhang committed
446
447
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
448
                ds_read_b128 %0, %1 offset:1920\n \
Jing Zhang's avatar
Jing Zhang committed
449
                "
Chao Liu's avatar
Chao Liu committed
450
451
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
452
    }
Jing Zhang's avatar
Jing Zhang committed
453
    if(offset == 1984)
Chao Liu's avatar
Chao Liu committed
454
455
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
456
                ds_read_b128 %0, %1 offset:1984\n \
Chao Liu's avatar
Chao Liu committed
457
                "
Chao Liu's avatar
Chao Liu committed
458
459
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
460
    }
Jing Zhang's avatar
Jing Zhang committed
461
    if(offset == 2048)
Jing Zhang's avatar
Jing Zhang committed
462
463
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
464
                ds_read_b128 %0, %1 offset:2048\n \
Jing Zhang's avatar
Jing Zhang committed
465
                "
Chao Liu's avatar
Chao Liu committed
466
467
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
468
    }
Jing Zhang's avatar
Jing Zhang committed
469
    if(offset == 2112)
Jing Zhang's avatar
Jing Zhang committed
470
471
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
472
                ds_read_b128 %0, %1 offset:2112\n \
Jing Zhang's avatar
Jing Zhang committed
473
                "
Chao Liu's avatar
Chao Liu committed
474
475
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
476
    }
Jing Zhang's avatar
Jing Zhang committed
477
    if(offset == 2176)
Jing Zhang's avatar
Jing Zhang committed
478
    {
Jing Zhang's avatar
Jing Zhang committed
479
480
481
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2176\n \
                "
Chao Liu's avatar
Chao Liu committed
482
483
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
484
485
486
487
488
489
    }
    if(offset == 2240)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2240\n \
                "
Chao Liu's avatar
Chao Liu committed
490
491
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
492
493
494
495
496
497
    }
    if(offset == 2304)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2304\n \
                "
Chao Liu's avatar
Chao Liu committed
498
499
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
500
501
502
503
504
505
    }
    if(offset == 2368)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2368\n \
                "
Chao Liu's avatar
Chao Liu committed
506
507
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
508
509
510
511
512
513
    }
    if(offset == 2432)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2432\n \
                "
Chao Liu's avatar
Chao Liu committed
514
515
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
516
517
518
519
520
521
    }
    if(offset == 2496)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2496\n \
                "
Chao Liu's avatar
Chao Liu committed
522
523
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
524
525
526
527
528
529
    }
    if(offset == 2560)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2560\n \
                "
Chao Liu's avatar
Chao Liu committed
530
531
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
532
533
534
535
536
537
    }
    if(offset == 2624)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2624\n \
                "
Chao Liu's avatar
Chao Liu committed
538
539
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
540
541
542
543
544
545
    }
    if(offset == 2688)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2688\n \
                "
Chao Liu's avatar
Chao Liu committed
546
547
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
548
549
550
551
552
553
    }
    if(offset == 2752)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2752\n \
                "
Chao Liu's avatar
Chao Liu committed
554
555
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
556
557
558
559
560
561
    }
    if(offset == 2816)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2816\n \
                "
Chao Liu's avatar
Chao Liu committed
562
563
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
564
565
566
567
568
569
    }
    if(offset == 2880)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2880\n \
                "
Chao Liu's avatar
Chao Liu committed
570
571
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
572
573
574
575
576
577
    }
    if(offset == 2944)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2944\n \
                "
Chao Liu's avatar
Chao Liu committed
578
579
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
580
581
582
583
584
585
    }
    if(offset == 3008)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3008\n \
                "
Chao Liu's avatar
Chao Liu committed
586
587
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
588
589
590
591
592
593
    }
    if(offset == 3072)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3072\n \
                "
Chao Liu's avatar
Chao Liu committed
594
595
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
596
597
598
599
600
601
    }
    if(offset == 3136)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3136\n \
                "
Chao Liu's avatar
Chao Liu committed
602
603
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
604
605
606
607
608
609
    }
    if(offset == 3200)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3200\n \
                "
Chao Liu's avatar
Chao Liu committed
610
611
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
612
613
614
615
616
617
    }
    if(offset == 3264)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3264\n \
                "
Chao Liu's avatar
Chao Liu committed
618
619
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
620
621
622
623
624
625
    }
    if(offset == 3328)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3328\n \
                "
Chao Liu's avatar
Chao Liu committed
626
627
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
628
629
630
631
632
633
    }
    if(offset == 3392)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3392\n \
                "
Chao Liu's avatar
Chao Liu committed
634
635
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
636
637
638
639
640
641
    }
    if(offset == 3456)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3456\n \
                "
Chao Liu's avatar
Chao Liu committed
642
643
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
644
645
646
647
648
649
    }
    if(offset == 3520)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3520\n \
                "
Chao Liu's avatar
Chao Liu committed
650
651
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
652
653
654
655
656
657
    }
    if(offset == 3584)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3584\n \
                "
Chao Liu's avatar
Chao Liu committed
658
659
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
660
661
662
663
664
665
    }
    if(offset == 3648)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3648\n \
                "
Chao Liu's avatar
Chao Liu committed
666
667
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
668
669
670
671
672
673
    }
    if(offset == 3712)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3712\n \
                "
Chao Liu's avatar
Chao Liu committed
674
675
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
676
677
678
679
680
681
    }
    if(offset == 3776)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3776\n \
                "
Chao Liu's avatar
Chao Liu committed
682
683
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
684
685
686
687
688
689
    }
    if(offset == 3840)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3840\n \
                "
Chao Liu's avatar
Chao Liu committed
690
691
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
692
693
694
695
696
697
    }
    if(offset == 3904)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3904\n \
                "
Chao Liu's avatar
Chao Liu committed
698
699
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
700
701
702
703
704
705
    }
    if(offset == 3968)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3968\n \
                "
Chao Liu's avatar
Chao Liu committed
706
707
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
708
709
710
711
712
713
    }
    if(offset == 4032)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4032\n \
                "
Chao Liu's avatar
Chao Liu committed
714
715
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
716
717
718
719
720
721
    }
    if(offset == 4096)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4096\n \
                "
Chao Liu's avatar
Chao Liu committed
722
723
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
724
    }
Jing Zhang's avatar
Jing Zhang committed
725
#endif
Jing Zhang's avatar
Jing Zhang committed
726
}
Jing Zhang's avatar
Jing Zhang committed
727

Chao Liu's avatar
Chao Liu committed
728
729
730
__device__ void global_load(vector_type<float, 4>::MemoryType& r,
                            const vector_type<float, 4>::MemoryType* ptr,
                            index_t offset = 0)
731
{
Jing Zhang's avatar
Jing Zhang committed
732
#if !NO_GLB_READ
733
734
735
736
737
738
739
740
741
742
743
744
    if(offset == 0)
    {
        asm volatile("\n \
                global_load_dwordx4 %0, %1, off \n \
                "
                     : "=v"(r)
                     : "v"(ptr));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
745
#endif
Jing Zhang's avatar
Jing Zhang committed
746
747
}

Chao Liu's avatar
Chao Liu committed
748
749
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
750
{
Jing Zhang's avatar
Jing Zhang committed
751
#if !NO_DS_WRITE
752
753
754
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
755
756
            ds_write_b128 %0, %1 \n \
            "
757
758
759
760
761
762
763
                     :
                     : "v"(__to_local(lds)), "v"(r));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
764
#endif
Jing Zhang's avatar
Jing Zhang committed
765
}
766
767
768

} // namespace ck
#endif