"vscode:/vscode.git/clone" did not exist on "da8c060873fb9120004627b2d3963154e1f8a68c"
Commit 22995e9a authored by Adam Osewski's avatar Adam Osewski
Browse files

Use bultin to only sync threads not LDS.

parent 2541812e
...@@ -9,26 +9,13 @@ struct workgroup_barrier ...@@ -9,26 +9,13 @@ struct workgroup_barrier
__device__ uint32_t ld(uint32_t offset) const __device__ uint32_t ld(uint32_t offset) const
{ {
#if 0
float d = llvm_amdgcn_raw_buffer_load_fp32(
amdgcn_make_buffer_resource(base_ptr),
0,
offset,
AMDGCN_BUFFER_GLC);
union cvt {
float f32;
uint32_t u32;
};
cvt x;
x.f32 = d;
return x.u32;
#endif
return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED); return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
} }
__device__ void st(uint32_t offset, uint32_t value) __device__ void st(uint32_t offset, uint32_t value)
{ {
__atomic_store_n(base_ptr + offset, value, __ATOMIC_RELEASE); __atomic_store_n(base_ptr + offset, value, __ATOMIC_RELEASE);
// __atomic_store_n(base_ptr + offset, value, __ATOMIC_SEQ_CST);
} }
__device__ void wait_eq(uint32_t offset, uint32_t value) __device__ void wait_eq(uint32_t offset, uint32_t value)
...@@ -37,7 +24,7 @@ struct workgroup_barrier ...@@ -37,7 +24,7 @@ struct workgroup_barrier
{ {
while(ld(offset) != value) {} while(ld(offset) != value) {}
} }
__syncthreads(); __builtin_amdgcn_s_barrier();
} }
__device__ void wait_lt(uint32_t offset, uint32_t value) __device__ void wait_lt(uint32_t offset, uint32_t value)
...@@ -46,7 +33,7 @@ struct workgroup_barrier ...@@ -46,7 +33,7 @@ struct workgroup_barrier
{ {
while(ld(offset) < value) {} while(ld(offset) < value) {}
} }
__syncthreads(); __builtin_amdgcn_s_barrier();
} }
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value) __device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
...@@ -55,7 +42,7 @@ struct workgroup_barrier ...@@ -55,7 +42,7 @@ struct workgroup_barrier
{ {
while(atomicCAS(base_ptr + offset, compare, value) != compare) {} while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
} }
__syncthreads(); __builtin_amdgcn_s_barrier();
} }
// enter critical zoon, assume buffer is zero when launch kernel // enter critical zoon, assume buffer is zero when launch kernel
...@@ -66,20 +53,20 @@ struct workgroup_barrier ...@@ -66,20 +53,20 @@ struct workgroup_barrier
__device__ void inc(uint32_t offset) __device__ void inc(uint32_t offset)
{ {
__builtin_amdgcn_s_barrier();
if(threadIdx.x == 0) if(threadIdx.x == 0)
{ {
atomicAdd(base_ptr + offset, 1); atomicAdd(base_ptr + offset, 1);
} }
__syncthreads();
} }
__device__ void reset(uint32_t offset) __device__ void reset(uint32_t offset)
{ {
__builtin_amdgcn_s_barrier();
if(threadIdx.x == 0) if(threadIdx.x == 0)
{ {
st(offset, 0); st(offset, 0);
} }
__syncthreads();
} }
uint32_t* base_ptr; uint32_t* base_ptr;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment