Commit 0d34441d authored by Robert McGibbon's avatar Robert McGibbon
Browse files

Fix thread synchronization in customCentroidBond.cl

parent 8eaf3c9c
...@@ -8,7 +8,7 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global ...@@ -8,7 +8,7 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
__local volatile real3 temp[64]; __local volatile real3 temp[64];
for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) { for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) {
// The threads in this block work together to compute the center one group. // The threads in this block work together to compute the center one group.
int firstIndex = groupOffsets[group]; int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1]; int lastIndex = groupOffsets[group+1];
real3 center = (real3) 0; real3 center = (real3) 0;
...@@ -20,44 +20,48 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global ...@@ -20,44 +20,48 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
center.y += weight*pos.y; center.y += weight*pos.y;
center.z += weight*pos.z; center.z += weight*pos.z;
} }
// Sum the values. // Sum the values.
int thread = get_local_id(0); int thread = get_local_id(0);
temp[thread].x = center.x; temp[thread].x = center.x;
temp[thread].y = center.y; temp[thread].y = center.y;
temp[thread].z = center.z; temp[thread].z = center.z;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 32) { if (thread < 32) {
temp[thread].x += temp[thread+32].x; temp[thread].x += temp[thread+32].x;
temp[thread].y += temp[thread+32].y; temp[thread].y += temp[thread+32].y;
temp[thread].z += temp[thread+32].z; temp[thread].z += temp[thread+32].z;
SYNC_WARPS;
if (thread < 16) {
temp[thread].x += temp[thread+16].x;
temp[thread].y += temp[thread+16].y;
temp[thread].z += temp[thread+16].z;
SYNC_WARPS;
}
if (thread < 8) {
temp[thread].x += temp[thread+8].x;
temp[thread].y += temp[thread+8].y;
temp[thread].z += temp[thread+8].z;
SYNC_WARPS;
}
if (thread < 4) {
temp[thread].x += temp[thread+4].x;
temp[thread].y += temp[thread+4].y;
temp[thread].z += temp[thread+4].z;
SYNC_WARPS;
}
if (thread < 2) {
temp[thread].x += temp[thread+2].x;
temp[thread].y += temp[thread+2].y;
temp[thread].z += temp[thread+2].z;
SYNC_WARPS;
}
} }
SYNC_WARPS;
if (thread < 16) {
temp[thread].x += temp[thread+16].x;
temp[thread].y += temp[thread+16].y;
temp[thread].z += temp[thread+16].z;
}
SYNC_WARPS;
if (thread < 8) {
temp[thread].x += temp[thread+8].x;
temp[thread].y += temp[thread+8].y;
temp[thread].z += temp[thread+8].z;
}
SYNC_WARPS;
if (thread < 4) {
temp[thread].x += temp[thread+4].x;
temp[thread].y += temp[thread+4].y;
temp[thread].z += temp[thread+4].z;
}
SYNC_WARPS;
if (thread < 2) {
temp[thread].x += temp[thread+2].x;
temp[thread].y += temp[thread+2].y;
temp[thread].z += temp[thread+2].z;
}
SYNC_WARPS;
if (thread == 0) if (thread == 0)
centerPositions[group] = (real4) (temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0); centerPositions[group] = (real4) (temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment