amd_inline_asm.hpp 15.6 KB
Newer Older
1
2
3
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP

Chao Liu's avatar
Chao Liu committed
4
#include "vector_type.hpp"
Jing Zhang's avatar
Jing Zhang committed
5

6
7
namespace ck {

Chao Liu's avatar
Chao Liu committed
8
__device__ void outerProduct1x4(const float* a, const float* b, float* c)
Chao Liu's avatar
Chao Liu committed
9
{
Jing Zhang's avatar
Jing Zhang committed
10
11
12
13
14
15
    asm volatile("\n \
            v_mac_f32 %0, %4, %5 \n \
            v_mac_f32 %1, %4, %6 \n \
            v_mac_f32 %2, %4, %7 \n \
            v_mac_f32 %3, %4, %8 \n \
            "
Chao Liu's avatar
Chao Liu committed
16
17
18
19
20
21
22
23
24
25
                 : "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
                 : "v"(a[0]),
                   "v"(b[0]),
                   "v"(b[1]),
                   "v"(b[2]),
                   "v"(b[3]),
                   "0"(c[0]),
                   "1"(c[1]),
                   "2"(c[2]),
                   "3"(c[3]));
Jing Zhang's avatar
Jing Zhang committed
26
27
}

Chao Liu's avatar
Chao Liu committed
28
29
30
__device__ void outerProduct1x4(const float& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c)
Chao Liu's avatar
Chao Liu committed
31
32
{
    outerProduct1x4(&a, (float*)&b, (float*)&c);
Jing Zhang's avatar
Jing Zhang committed
33
34
}

Chao Liu's avatar
Chao Liu committed
35
36
37
38
39
40
__device__ void outerProduct4x4(const vector_type<float, 4>::MemoryType& a,
                                const vector_type<float, 4>::MemoryType& b,
                                vector_type<float, 4>::MemoryType& c0,
                                vector_type<float, 4>::MemoryType& c1,
                                vector_type<float, 4>::MemoryType& c2,
                                vector_type<float, 4>::MemoryType& c3)
Chao Liu's avatar
Chao Liu committed
41
{
Jing Zhang's avatar
Jing Zhang committed
42
43
44
45
46
47
    outerProduct1x4(a.x, b, c0);
    outerProduct1x4(a.y, b, c1);
    outerProduct1x4(a.z, b, c2);
    outerProduct1x4(a.w, b, c3);
}

Chao Liu's avatar
Chao Liu committed
48
49
50
__device__ void outerProduct8x8(const vector_type<float, 4>::MemoryType* a,
                                const vector_type<float, 4>::MemoryType* b,
                                vector_type<float, 4>::MemoryType* c)
Jing Zhang's avatar
Jing Zhang committed
51
52
53
54
55
56
57
{
    outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
    outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
    outerProduct4x4(a[1], b[0], c[8], c[10], c[12], c[14]);
    outerProduct4x4(a[1], b[1], c[9], c[11], c[13], c[15]);
}

Chao Liu's avatar
Chao Liu committed
58
__device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
59
60
61
62
{
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
63
                ds_read_b128 %0, %1 offset:0\n \
Jing Zhang's avatar
Jing Zhang committed
64
                "
Chao Liu's avatar
Chao Liu committed
65
66
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
67
    }
Jing Zhang's avatar
Jing Zhang committed
68
    if(offset == 64)
Jing Zhang's avatar
Jing Zhang committed
69
70
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
71
                ds_read_b128 %0, %1 offset:64\n \
Jing Zhang's avatar
Jing Zhang committed
72
                "
Chao Liu's avatar
Chao Liu committed
73
74
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
75
    }
Jing Zhang's avatar
Jing Zhang committed
76
    if(offset == 128)
Jing Zhang's avatar
Jing Zhang committed
77
78
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
79
                ds_read_b128 %0, %1 offset:128\n \
Jing Zhang's avatar
Jing Zhang committed
80
                "
Chao Liu's avatar
Chao Liu committed
81
82
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
83
    }
Jing Zhang's avatar
Jing Zhang committed
84
    if(offset == 192)
Jing Zhang's avatar
Jing Zhang committed
85
86
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
87
                ds_read_b128 %0, %1 offset:192\n \
Jing Zhang's avatar
Jing Zhang committed
88
                "
Chao Liu's avatar
Chao Liu committed
89
90
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
91
    }
Jing Zhang's avatar
Jing Zhang committed
92
    if(offset == 256)
Jing Zhang's avatar
Jing Zhang committed
93
94
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
95
                ds_read_b128 %0, %1 offset:256\n \
Jing Zhang's avatar
Jing Zhang committed
96
                "
Chao Liu's avatar
Chao Liu committed
97
98
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
99
    }
Jing Zhang's avatar
Jing Zhang committed
100
    if(offset == 320)
Jing Zhang's avatar
Jing Zhang committed
101
102
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
103
                ds_read_b128 %0, %1 offset:320\n \
Jing Zhang's avatar
Jing Zhang committed
104
                "
Chao Liu's avatar
Chao Liu committed
105
106
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
107
    }
Jing Zhang's avatar
Jing Zhang committed
108
    if(offset == 384)
Jing Zhang's avatar
Jing Zhang committed
109
110
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
111
                ds_read_b128 %0, %1 offset:384\n \
Jing Zhang's avatar
Jing Zhang committed
112
                "
Chao Liu's avatar
Chao Liu committed
113
114
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
115
    }
Jing Zhang's avatar
Jing Zhang committed
116
    if(offset == 448)
Jing Zhang's avatar
Jing Zhang committed
117
118
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
119
                ds_read_b128 %0, %1 offset:448\n \
Jing Zhang's avatar
Jing Zhang committed
120
                "
Chao Liu's avatar
Chao Liu committed
121
122
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
123
    }
Jing Zhang's avatar
Jing Zhang committed
124
    if(offset == 512)
Jing Zhang's avatar
Jing Zhang committed
125
126
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
127
                ds_read_b128 %0, %1 offset:512\n \
Jing Zhang's avatar
Jing Zhang committed
128
                "
Chao Liu's avatar
Chao Liu committed
129
130
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
131
    }
Jing Zhang's avatar
Jing Zhang committed
132
    if(offset == 576)
Jing Zhang's avatar
Jing Zhang committed
133
134
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
135
                ds_read_b128 %0, %1 offset:576\n \
Jing Zhang's avatar
Jing Zhang committed
136
                "
Chao Liu's avatar
Chao Liu committed
137
138
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
139
    }
Jing Zhang's avatar
Jing Zhang committed
140
    if(offset == 640)
Jing Zhang's avatar
Jing Zhang committed
141
142
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
143
                ds_read_b128 %0, %1 offset:640\n \
Jing Zhang's avatar
Jing Zhang committed
144
                "
Chao Liu's avatar
Chao Liu committed
145
146
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
147
    }
Jing Zhang's avatar
Jing Zhang committed
148
    if(offset == 704)
Jing Zhang's avatar
Jing Zhang committed
149
150
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
151
                ds_read_b128 %0, %1 offset:704\n \
Jing Zhang's avatar
Jing Zhang committed
152
                "
Chao Liu's avatar
Chao Liu committed
153
154
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
155
    }
Jing Zhang's avatar
Jing Zhang committed
156
    if(offset == 768)
Jing Zhang's avatar
Jing Zhang committed
157
158
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
159
                ds_read_b128 %0, %1 offset:768\n \
Jing Zhang's avatar
Jing Zhang committed
160
                "
Chao Liu's avatar
Chao Liu committed
161
162
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
163
    }
Jing Zhang's avatar
Jing Zhang committed
164
    if(offset == 832)
Jing Zhang's avatar
Jing Zhang committed
165
166
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
167
                ds_read_b128 %0, %1 offset:832\n \
Jing Zhang's avatar
Jing Zhang committed
168
                "
Chao Liu's avatar
Chao Liu committed
169
170
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
171
    }
Jing Zhang's avatar
Jing Zhang committed
172
    if(offset == 896)
Jing Zhang's avatar
Jing Zhang committed
173
174
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
175
                ds_read_b128 %0, %1 offset:896\n \
Jing Zhang's avatar
Jing Zhang committed
176
                "
Chao Liu's avatar
Chao Liu committed
177
178
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
179
    }
Jing Zhang's avatar
Jing Zhang committed
180
    if(offset == 960)
Jing Zhang's avatar
Jing Zhang committed
181
182
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
183
                ds_read_b128 %0, %1 offset:960\n \
Jing Zhang's avatar
Jing Zhang committed
184
                "
Chao Liu's avatar
Chao Liu committed
185
186
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
187
    }
Jing Zhang's avatar
Jing Zhang committed
188
    if(offset == 1024)
Jing Zhang's avatar
Jing Zhang committed
189
190
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
191
                ds_read_b128 %0, %1 offset:1024\n \
Jing Zhang's avatar
Jing Zhang committed
192
                "
Chao Liu's avatar
Chao Liu committed
193
194
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
195
    }
Jing Zhang's avatar
Jing Zhang committed
196
    if(offset == 1088)
Jing Zhang's avatar
Jing Zhang committed
197
198
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
199
                ds_read_b128 %0, %1 offset:1088\n \
Jing Zhang's avatar
Jing Zhang committed
200
                "
Chao Liu's avatar
Chao Liu committed
201
202
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
203
    }
Jing Zhang's avatar
Jing Zhang committed
204
    if(offset == 1152)
Jing Zhang's avatar
Jing Zhang committed
205
206
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
207
                ds_read_b128 %0, %1 offset:1152\n \
Jing Zhang's avatar
Jing Zhang committed
208
                "
Chao Liu's avatar
Chao Liu committed
209
210
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
211
    }
Jing Zhang's avatar
Jing Zhang committed
212
    if(offset == 1216)
Chao Liu's avatar
Chao Liu committed
213
214
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
215
                ds_read_b128 %0, %1 offset:1216\n \
Chao Liu's avatar
Chao Liu committed
216
                "
Chao Liu's avatar
Chao Liu committed
217
218
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
219
    }
Jing Zhang's avatar
Jing Zhang committed
220
    if(offset == 1280)
Jing Zhang's avatar
Jing Zhang committed
221
222
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
223
                ds_read_b128 %0, %1 offset:1280\n \
Jing Zhang's avatar
Jing Zhang committed
224
                "
Chao Liu's avatar
Chao Liu committed
225
226
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
227
    }
Jing Zhang's avatar
Jing Zhang committed
228
    if(offset == 1344)
Chao Liu's avatar
Chao Liu committed
229
230
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
231
                ds_read_b128 %0, %1 offset:1344\n \
Chao Liu's avatar
Chao Liu committed
232
                "
Chao Liu's avatar
Chao Liu committed
233
234
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
235
    }
Jing Zhang's avatar
Jing Zhang committed
236
    if(offset == 1408)
Jing Zhang's avatar
Jing Zhang committed
237
238
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
239
                ds_read_b128 %0, %1 offset:1408\n \
Jing Zhang's avatar
Jing Zhang committed
240
                "
Chao Liu's avatar
Chao Liu committed
241
242
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
243
    }
Jing Zhang's avatar
Jing Zhang committed
244
    if(offset == 1472)
Chao Liu's avatar
Chao Liu committed
245
246
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
247
                ds_read_b128 %0, %1 offset:1472\n \
Chao Liu's avatar
Chao Liu committed
248
                "
Chao Liu's avatar
Chao Liu committed
249
250
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
251
    }
Jing Zhang's avatar
Jing Zhang committed
252
    if(offset == 1536)
Jing Zhang's avatar
Jing Zhang committed
253
254
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
255
                ds_read_b128 %0, %1 offset:1536\n \
Jing Zhang's avatar
Jing Zhang committed
256
                "
Chao Liu's avatar
Chao Liu committed
257
258
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
259
    }
Jing Zhang's avatar
Jing Zhang committed
260
    if(offset == 1600)
Chao Liu's avatar
Chao Liu committed
261
262
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
263
                ds_read_b128 %0, %1 offset:1600\n \
Chao Liu's avatar
Chao Liu committed
264
                "
Chao Liu's avatar
Chao Liu committed
265
266
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
267
    }
Jing Zhang's avatar
Jing Zhang committed
268
    if(offset == 1664)
Jing Zhang's avatar
Jing Zhang committed
269
270
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
271
                ds_read_b128 %0, %1 offset:1664\n \
Jing Zhang's avatar
Jing Zhang committed
272
                "
Chao Liu's avatar
Chao Liu committed
273
274
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
275
    }
Jing Zhang's avatar
Jing Zhang committed
276
    if(offset == 1728)
Chao Liu's avatar
Chao Liu committed
277
278
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
279
                ds_read_b128 %0, %1 offset:1728\n \
Chao Liu's avatar
Chao Liu committed
280
                "
Chao Liu's avatar
Chao Liu committed
281
282
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
283
    }
Jing Zhang's avatar
Jing Zhang committed
284
    if(offset == 1792)
Jing Zhang's avatar
Jing Zhang committed
285
286
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
287
                ds_read_b128 %0, %1 offset:1792\n \
Jing Zhang's avatar
Jing Zhang committed
288
                "
Chao Liu's avatar
Chao Liu committed
289
290
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
291
    }
Jing Zhang's avatar
Jing Zhang committed
292
    if(offset == 1856)
Chao Liu's avatar
Chao Liu committed
293
294
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
295
                ds_read_b128 %0, %1 offset:1856\n \
Chao Liu's avatar
Chao Liu committed
296
                "
Chao Liu's avatar
Chao Liu committed
297
298
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
299
    }
Jing Zhang's avatar
Jing Zhang committed
300
    if(offset == 1920)
Jing Zhang's avatar
Jing Zhang committed
301
302
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
303
                ds_read_b128 %0, %1 offset:1920\n \
Jing Zhang's avatar
Jing Zhang committed
304
                "
Chao Liu's avatar
Chao Liu committed
305
306
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
307
    }
Jing Zhang's avatar
Jing Zhang committed
308
    if(offset == 1984)
Chao Liu's avatar
Chao Liu committed
309
310
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
311
                ds_read_b128 %0, %1 offset:1984\n \
Chao Liu's avatar
Chao Liu committed
312
                "
Chao Liu's avatar
Chao Liu committed
313
314
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Chao Liu's avatar
Chao Liu committed
315
    }
Jing Zhang's avatar
Jing Zhang committed
316
    if(offset == 2048)
Jing Zhang's avatar
Jing Zhang committed
317
318
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
319
                ds_read_b128 %0, %1 offset:2048\n \
Jing Zhang's avatar
Jing Zhang committed
320
                "
Chao Liu's avatar
Chao Liu committed
321
322
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
323
    }
Jing Zhang's avatar
Jing Zhang committed
324
    if(offset == 2112)
Jing Zhang's avatar
Jing Zhang committed
325
326
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
327
                ds_read_b128 %0, %1 offset:2112\n \
Jing Zhang's avatar
Jing Zhang committed
328
                "
Chao Liu's avatar
Chao Liu committed
329
330
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
331
    }
Jing Zhang's avatar
Jing Zhang committed
332
    if(offset == 2176)
Jing Zhang's avatar
Jing Zhang committed
333
    {
Jing Zhang's avatar
Jing Zhang committed
334
335
336
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2176\n \
                "
Chao Liu's avatar
Chao Liu committed
337
338
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
339
340
341
342
343
344
    }
    if(offset == 2240)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2240\n \
                "
Chao Liu's avatar
Chao Liu committed
345
346
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
347
348
349
350
351
352
    }
    if(offset == 2304)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2304\n \
                "
Chao Liu's avatar
Chao Liu committed
353
354
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
355
356
357
358
359
360
    }
    if(offset == 2368)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2368\n \
                "
Chao Liu's avatar
Chao Liu committed
361
362
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
363
364
365
366
367
368
    }
    if(offset == 2432)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2432\n \
                "
Chao Liu's avatar
Chao Liu committed
369
370
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
371
372
373
374
375
376
    }
    if(offset == 2496)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2496\n \
                "
Chao Liu's avatar
Chao Liu committed
377
378
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
379
380
381
382
383
384
    }
    if(offset == 2560)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2560\n \
                "
Chao Liu's avatar
Chao Liu committed
385
386
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
387
388
389
390
391
392
    }
    if(offset == 2624)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2624\n \
                "
Chao Liu's avatar
Chao Liu committed
393
394
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
395
396
397
398
399
400
    }
    if(offset == 2688)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2688\n \
                "
Chao Liu's avatar
Chao Liu committed
401
402
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
403
404
405
406
407
408
    }
    if(offset == 2752)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2752\n \
                "
Chao Liu's avatar
Chao Liu committed
409
410
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
411
412
413
414
415
416
    }
    if(offset == 2816)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2816\n \
                "
Chao Liu's avatar
Chao Liu committed
417
418
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
419
420
421
422
423
424
    }
    if(offset == 2880)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2880\n \
                "
Chao Liu's avatar
Chao Liu committed
425
426
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
427
428
429
430
431
432
    }
    if(offset == 2944)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:2944\n \
                "
Chao Liu's avatar
Chao Liu committed
433
434
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
435
436
437
438
439
440
    }
    if(offset == 3008)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3008\n \
                "
Chao Liu's avatar
Chao Liu committed
441
442
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
443
444
445
446
447
448
    }
    if(offset == 3072)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3072\n \
                "
Chao Liu's avatar
Chao Liu committed
449
450
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
451
452
453
454
455
456
    }
    if(offset == 3136)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3136\n \
                "
Chao Liu's avatar
Chao Liu committed
457
458
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
459
460
461
462
463
464
    }
    if(offset == 3200)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3200\n \
                "
Chao Liu's avatar
Chao Liu committed
465
466
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
467
468
469
470
471
472
    }
    if(offset == 3264)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3264\n \
                "
Chao Liu's avatar
Chao Liu committed
473
474
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
475
476
477
478
479
480
    }
    if(offset == 3328)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3328\n \
                "
Chao Liu's avatar
Chao Liu committed
481
482
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
483
484
485
486
487
488
    }
    if(offset == 3392)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3392\n \
                "
Chao Liu's avatar
Chao Liu committed
489
490
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
491
492
493
494
495
496
    }
    if(offset == 3456)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3456\n \
                "
Chao Liu's avatar
Chao Liu committed
497
498
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
499
500
501
502
503
504
    }
    if(offset == 3520)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3520\n \
                "
Chao Liu's avatar
Chao Liu committed
505
506
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
507
508
509
510
511
512
    }
    if(offset == 3584)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3584\n \
                "
Chao Liu's avatar
Chao Liu committed
513
514
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
515
516
517
518
519
520
    }
    if(offset == 3648)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3648\n \
                "
Chao Liu's avatar
Chao Liu committed
521
522
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
523
524
525
526
527
528
    }
    if(offset == 3712)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3712\n \
                "
Chao Liu's avatar
Chao Liu committed
529
530
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
531
532
533
534
535
536
    }
    if(offset == 3776)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3776\n \
                "
Chao Liu's avatar
Chao Liu committed
537
538
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
539
540
541
542
543
544
    }
    if(offset == 3840)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3840\n \
                "
Chao Liu's avatar
Chao Liu committed
545
546
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
547
548
549
550
551
552
    }
    if(offset == 3904)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3904\n \
                "
Chao Liu's avatar
Chao Liu committed
553
554
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
555
556
557
558
559
560
    }
    if(offset == 3968)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:3968\n \
                "
Chao Liu's avatar
Chao Liu committed
561
562
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
563
564
565
566
567
568
    }
    if(offset == 4032)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4032\n \
                "
Chao Liu's avatar
Chao Liu committed
569
570
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
571
572
573
574
575
576
    }
    if(offset == 4096)
    {
        asm volatile("\n \
                ds_read_b128 %0, %1 offset:4096\n \
                "
Chao Liu's avatar
Chao Liu committed
577
578
                     : "=v"(r)
                     : "v"(__to_local(lds)));
Jing Zhang's avatar
Jing Zhang committed
579
    }
Jing Zhang's avatar
Jing Zhang committed
580
581
}

Chao Liu's avatar
Chao Liu committed
582
583
__device__ void
ds_write_b128(const vector_type<float, 4>::MemoryType& r, void* lds, index_t offset = 0)
Jing Zhang's avatar
Jing Zhang committed
584
{
585
586
587
    if(offset == 0)
    {
        asm volatile("\n \
Jing Zhang's avatar
Jing Zhang committed
588
589
            ds_write_b128 %0, %1 \n \
            "
590
591
592
593
594
595
596
                     :
                     : "v"(__to_local(lds)), "v"(r));
    }
    else
    {
        assert(false);
    }
Jing Zhang's avatar
Jing Zhang committed
597
}
598
599
600

} // namespace ck
#endif