"src/include/utility.hpp" did not exist on "f6cb5b846d1eff1d1e35ab58273becfd40bd0831"
Commit 977fc1ed authored by Jing Zhang's avatar Jing Zhang
Browse files

clean

parent e6230689
......@@ -610,10 +610,10 @@ int main(int argc, char* argv[])
#elif 1
// 1x1 filter, 14x14 image, C = 512
constexpr index_t N = 128;
constexpr index_t C = 128;
constexpr index_t C = 512;
constexpr index_t HI = 14;
constexpr index_t WI = 14;
constexpr index_t K = 128;
constexpr index_t K = 512;
constexpr index_t Y = 1;
constexpr index_t X = 1;
......
......@@ -13,7 +13,7 @@ extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];
#define data4_t vector_type<float, 4>::MemoryType
#define data_t float
template<unsigned cnt>
template<unsigned cnt>
inline __device__ void vmcnt()
{
#if !NO_VM_WAIT
......@@ -95,8 +95,8 @@ inline __device__ void lgkmcnt(index_t cnt)
}
inline __device__ void outerProduct1x4(const data_t& a,
const data4_t& b,
data4_t& c)
const data4_t& b,
data4_t& c)
{
#if 0
asm volatile(
......@@ -111,8 +111,8 @@ inline __device__ void outerProduct1x4(const data_t& a,
"v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
#else
//hijack here due to a compiler issue that cannot perform proper register
//mapping for float4 c
//hijack here due to a compiler issue that cannot perform proper register
//mapping for float4 c
data_t *c_p = (data_t *)&c;
asm volatile("\n \
v_mac_f32 %0, %4, %5 \n \
......@@ -120,25 +120,25 @@ inline __device__ void outerProduct1x4(const data_t& a,
v_mac_f32 %2, %4, %7 \n \
v_mac_f32 %3, %4, %8 \n \
"
: "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
: "v"(a),
"v"(b.x),
"v"(b.y),
"v"(b.z),
"v"(b.w),
"0"(c[0]),
"1"(c[1]),
"2"(c[2]),
"3"(c[3]));
: "=v"(c[0]), "=v"(c[1]), "=v"(c[2]), "=v"(c[3])
: "v"(a),
"v"(b.x),
"v"(b.y),
"v"(b.z),
"v"(b.w),
"0"(c[0]),
"1"(c[1]),
"2"(c[2]),
"3"(c[3]));
#endif
}
inline __device__ void outerProduct4x4(const data4_t& a,
const data4_t& b,
data4_t& c0,
data4_t& c1,
data4_t& c2,
data4_t& c3)
const data4_t& b,
data4_t& c0,
data4_t& c1,
data4_t& c2,
data4_t& c3)
{
outerProduct1x4(a.x, b, c0);
outerProduct1x4(a.y, b, c1);
......@@ -147,8 +147,8 @@ inline __device__ void outerProduct4x4(const data4_t& a,
}
inline __device__ void outerProduct8x8(const data4_t* a,
const data4_t* b,
data4_t* c)
const data4_t* b,
data4_t* c)
{
outerProduct4x4(a[0], b[0], c[0], c[2], c[4], c[6]);
outerProduct4x4(a[0], b[1], c[1], c[3], c[5], c[7]);
......@@ -164,216 +164,216 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
asm volatile("\n \
ds_read_b128 %0, %1 offset:0 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 128)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:128 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 256)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:256 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 384)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:384 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 512)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:512 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 640)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:640 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 768)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:768 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 896)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:896 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1024)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1024 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1152)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1152 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1280)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1280 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1408)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1408 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1536)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1536 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1664)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1664 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1792)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1792 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 1920)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:1920 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2048)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2048 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2176)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2176 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2304)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2304 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2560)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2560 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 2816)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:2816 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3072)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3072 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3328)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3328 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3584)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3584 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 3840)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:3840 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 4096)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4096 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else if(offset == 4352)
{
asm volatile("\n \
ds_read_b128 %0, %1 offset:4352 \n \
"
: "=v"(r)
: "v"(__to_local(lds)));
: "=v"(r)
: "v"(__to_local(lds)));
}
else
{
......@@ -383,8 +383,8 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
}
inline __device__ void global_store(data4_t& r,
const void* vptr,
const void* sprt = 0)
const void* vptr,
const void* sprt = 0)
{
#if !NO_GLB_READ
if(sprt == 0)
......@@ -407,8 +407,8 @@ inline __device__ void global_store(data4_t& r,
inline __device__ void global_load(data4_t& r,
const void* vptr,
const void* sprt = 0)
const void* vptr,
const void* sprt = 0)
{
#if !NO_GLB_READ
if(sprt == 0)
......@@ -421,26 +421,26 @@ inline __device__ void global_load(data4_t& r,
}
else
{
asm volatile("\n \
global_load_dwordx4 %0, %1, %2 \n \
"
: "=v"(r)
: "v"(vptr), "s"(sprt));
asm volatile("\n \
global_load_dwordx4 %0, %1, %2 \n \
"
: "=v"(r)
: "v"(vptr), "s"(sprt));
}
#endif
}
inline __device__ void
inline __device__ void
ds_write_b128(const data4_t& r, void* lds, index_t offset = 0)
{
#if !NO_DS_WRITE
if(offset == 0)
{
asm volatile("\n \
ds_write_b128 %0, %1 \n \
"
:
: "v"(__to_local(lds)), "v"(r));
ds_write_b128 %0, %1 \n \
"
:
: "v"(__to_local(lds)), "v"(r));
}
else
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment