Commit 9abaa587 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed race condition in sorting

parent 73cac8e6
...@@ -144,6 +144,7 @@ __global__ void computeBucketPositions(unsigned int numBuckets, unsigned int* __ ...@@ -144,6 +144,7 @@ __global__ void computeBucketPositions(unsigned int numBuckets, unsigned int* __
// Load the bucket sizes into local memory. // Load the bucket sizes into local memory.
unsigned int globalIndex = startBucket+threadIdx.x; unsigned int globalIndex = startBucket+threadIdx.x;
__syncthreads();
posBuffer[threadIdx.x] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0); posBuffer[threadIdx.x] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0);
__syncthreads(); __syncthreads();
...@@ -218,6 +219,7 @@ __global__ void sortBuckets(DATA_TYPE* __restrict__ data, const DATA_TYPE* __res ...@@ -218,6 +219,7 @@ __global__ void sortBuckets(DATA_TYPE* __restrict__ data, const DATA_TYPE* __res
if (threadIdx.x < length) if (threadIdx.x < length)
data[startIndex+threadIdx.x] = dataBuffer[threadIdx.x]; data[startIndex+threadIdx.x] = dataBuffer[threadIdx.x];
__syncthreads();
} }
else { else {
// Copy the bucket data over to the output array. // Copy the bucket data over to the output array.
......
...@@ -147,6 +147,7 @@ __kernel void computeBucketPositions(uint numBuckets, __global uint* restrict bu ...@@ -147,6 +147,7 @@ __kernel void computeBucketPositions(uint numBuckets, __global uint* restrict bu
// Load the bucket sizes into local memory. // Load the bucket sizes into local memory.
uint globalIndex = startBucket+get_local_id(0); uint globalIndex = startBucket+get_local_id(0);
barrier(CLK_LOCAL_MEM_FENCE);
buffer[get_local_id(0)] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0); buffer[get_local_id(0)] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -220,6 +221,7 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA ...@@ -220,6 +221,7 @@ __kernel void sortBuckets(__global DATA_TYPE* restrict data, __global const DATA
if (get_local_id(0) < length) if (get_local_id(0) < length)
data[startIndex+get_local_id(0)] = buffer[get_local_id(0)]; data[startIndex+get_local_id(0)] = buffer[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
} }
else { else {
// Copy the bucket data over to the output array. // Copy the bucket data over to the output array.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment