Unverified Commit 44daf269 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1986 from peastman/error

Fixed compilation error in kernel
parents 32400ee5 de666e30
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
*/ */
__device__ real reduceValue(real value, volatile real* temp) { __device__ real reduceValue(real value, volatile real* temp) {
const int thread = threadIdx.x; const int thread = threadIdx.x;
__syncthreads();
temp[thread] = value; temp[thread] = value;
__syncthreads(); __syncthreads();
for (uint step = 1; step < 32; step *= 2) { for (uint step = 1; step < 32; step *= 2) {
......
...@@ -6,12 +6,13 @@ ...@@ -6,12 +6,13 @@
*/ */
real reduceValue(real value, __local volatile real* temp) { real reduceValue(real value, __local volatile real* temp) {
const int thread = get_local_id(0); const int thread = get_local_id(0);
barrier(CLK_LOCAL_MEM_FENCE);
temp[thread] = value; temp[thread] = value;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (uint step = 1; step < 32; step *= 2) { for (uint step = 1; step < 32; step *= 2) {
if (thread+step < get_local_size(0) && thread%(2*step) == 0) if (thread+step < get_local_size(0) && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step]; temp[thread] = temp[thread] + temp[thread+step];
SYNC_WARPS SYNC_WARPS;
} }
for (uint step = 32; step < get_local_size(0); step *= 2) { for (uint step = 32; step < get_local_size(0); step *= 2) {
if (thread+step < get_local_size(0) && thread%(2*step) == 0) if (thread+step < get_local_size(0) && thread%(2*step) == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment