Commit 49d5af10 authored by Jing Zhang's avatar Jing Zhang
Browse files

ds_read_offset

parent 3ce77700
for((i=0;i<=4096;i=i+64))
do
OFFSET=$i
echo "if(offset == $OFFSET)"
echo "{"
echo " asm volatile(\"\\n \\"
echo " ds_read_b128 %0, %1 offset:$OFFSET\n \\"
echo " \""
echo " : \"=v\"(r)"
echo " : \"v\"(__to_local(lds)));"
echo "}"
done
......@@ -201,278 +201,522 @@ __device__ void ds_read_b128(vector_type<float, 4>::MemoryType& r, void* lds, in
if(offset == 0)
{
asm volatile("\n \
ds_read_b128 %0, %1 \n \
ds_read_b128 %0, %1 offset:0\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 128)
if(offset == 64)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:128 \n \
ds_read_b128 %0, %1 offset:64\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 256)
if(offset == 128)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:256 \n \
ds_read_b128 %0, %1 offset:128\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 384)
if(offset == 192)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:384 \n \
ds_read_b128 %0, %1 offset:192\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 512)
if(offset == 256)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:512 \n \
ds_read_b128 %0, %1 offset:256\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 640)
if(offset == 320)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:640 \n \
ds_read_b128 %0, %1 offset:320\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 768)
if(offset == 384)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:768 \n \
ds_read_b128 %0, %1 offset:384\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 896)
if(offset == 448)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:896 \n \
ds_read_b128 %0, %1 offset:448\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1024)
if(offset == 512)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1024 \n \
ds_read_b128 %0, %1 offset:512\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1152)
if(offset == 576)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1152 \n \
ds_read_b128 %0, %1 offset:576\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1280)
if(offset == 640)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1280 \n \
ds_read_b128 %0, %1 offset:640\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1408)
if(offset == 704)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1408 \n \
ds_read_b128 %0, %1 offset:704\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1536)
if(offset == 768)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1536 \n \
ds_read_b128 %0, %1 offset:768\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1664)
if(offset == 832)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1664 \n \
ds_read_b128 %0, %1 offset:832\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1792)
if(offset == 896)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1792 \n \
ds_read_b128 %0, %1 offset:896\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1920)
if(offset == 960)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1920 \n \
ds_read_b128 %0, %1 offset:960\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2048)
if(offset == 1024)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2048 \n \
ds_read_b128 %0, %1 offset:1024\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2176)
if(offset == 1088)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2176 \n \
ds_read_b128 %0, %1 offset:1088\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2304)
if(offset == 1152)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2304 \n \
ds_read_b128 %0, %1 offset:1152\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2432)
if(offset == 1216)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2432 \n \
ds_read_b128 %0, %1 offset:1216\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2560)
if(offset == 1280)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2560 \n \
ds_read_b128 %0, %1 offset:1280\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2688)
if(offset == 1344)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2688 \n \
ds_read_b128 %0, %1 offset:1344\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2816)
if(offset == 1408)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2816 \n \
ds_read_b128 %0, %1 offset:1408\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2944)
if(offset == 1472)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2944 \n \
ds_read_b128 %0, %1 offset:1472\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3072)
if(offset == 1536)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3072 \n \
ds_read_b128 %0, %1 offset:1536\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3200)
if(offset == 1600)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3200 \n \
ds_read_b128 %0, %1 offset:1600\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3328)
if(offset == 1664)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3328 \n \
ds_read_b128 %0, %1 offset:1664\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3456)
if(offset == 1728)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3456 \n \
ds_read_b128 %0, %1 offset:1728\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3584)
if(offset == 1792)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3584 \n \
ds_read_b128 %0, %1 offset:1792\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3712)
if(offset == 1856)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3712 \n \
ds_read_b128 %0, %1 offset:1856\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3840)
if(offset == 1920)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3840 \n \
ds_read_b128 %0, %1 offset:1920\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3968)
if(offset == 1984)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3968 \n \
ds_read_b128 %0, %1 offset:1984\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 4096)
if(offset == 2048)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4096 \n \
ds_read_b128 %0, %1 offset:2048\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 4352)
if(offset == 2112)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4352 \n \
ds_read_b128 %0, %1 offset:2112\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
else
if(offset == 2176)
{
assert(false);
asm volatile("\n \
ds_read_b128 %0, %1 offset:2176\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2240)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2240\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2304)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2304\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2368)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2368\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2432)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2432\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2496)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2496\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2560)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2560\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2624)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2624\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2688)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2688\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2752)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2752\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2816)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2816\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2880)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2880\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 2944)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2944\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3008)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3008\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3072)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3072\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3136)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3136\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3200)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3200\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3264)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3264\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3328)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3328\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3392)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3392\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3456)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3456\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3520)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3520\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3584)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3584\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3648)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3648\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3712)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3712\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3776)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3776\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3840)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3840\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3904)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3904\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 3968)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3968\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 4032)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4032\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
if(offset == 4096)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4096\n \
"
: "=v"(r)
: "v"(__to_local(lds)));
}
#endif
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment