Commit dacebfd3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Really(?) fixed illegal memory access

parent 21804e95
...@@ -70,9 +70,9 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -70,9 +70,9 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
float4 d1, d2, d3; float4 d1, d2, d3;
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
atoms = donorAtoms[donorIndex]; atoms = donorAtoms[donorIndex];
d1 = posq[atoms.x]; d1 = (atoms.x > -1 ? posq[atoms.x] : (float4) 0);
d2 = posq[atoms.y]; d2 = (atoms.y > -1 ? posq[atoms.y] : (float4) 0);
d3 = posq[atoms.z]; d3 = (atoms.z > -1 ? posq[atoms.z] : (float4) 0);
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[donorIndex]; exclusionIndices = exclusions[donorIndex];
#endif #endif
...@@ -83,11 +83,11 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -83,11 +83,11 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
// Load the next block of acceptors into local memory. // Load the next block of acceptors into local memory.
int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart); int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart);
if (get_local_id(0) < blockSize && acceptorStart+get_local_id(0) < NUM_ACCEPTORS) { if (get_local_id(0) < blockSize) {
int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)]; int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)];
posBuffer[3*get_local_id(0)] = posq[atoms2.x]; posBuffer[3*get_local_id(0)] = (atoms2.x > -1 ? posq[atoms2.x] : (float4) 0);
posBuffer[3*get_local_id(0)+1] = posq[atoms2.y]; posBuffer[3*get_local_id(0)+1] = (atoms2.y > -1 ? posq[atoms2.y] : (float4) 0);
posBuffer[3*get_local_id(0)+2] = posq[atoms2.z]; posBuffer[3*get_local_id(0)+2] = (atoms2.z > -1 ? posq[atoms2.z] : (float4) 0);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
...@@ -116,6 +116,7 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -116,6 +116,7 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
// Write results // Write results
if (donorIndex < NUM_DONORS) {
int4 bufferIndices = donorBufferIndices[donorIndex]; int4 bufferIndices = donorBufferIndices[donorIndex];
if (atoms.x > -1) { if (atoms.x > -1) {
unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS; unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
...@@ -136,6 +137,7 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -136,6 +137,7 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
forceBuffers[offset] = force; forceBuffers[offset] = force;
} }
} }
}
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
} }
/** /**
...@@ -155,9 +157,9 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -155,9 +157,9 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
float4 a1, a2, a3; float4 a1, a2, a3;
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
atoms = acceptorAtoms[acceptorIndex]; atoms = acceptorAtoms[acceptorIndex];
a1 = posq[atoms.x]; a1 = (atoms.x > -1 ? posq[atoms.x] : (float4) 0);
a2 = posq[atoms.y]; a2 = (atoms.y > -1 ? posq[atoms.y] : (float4) 0);
a3 = posq[atoms.z]; a3 = (atoms.z > -1 ? posq[atoms.z] : (float4) 0);
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[acceptorIndex]; exclusionIndices = exclusions[acceptorIndex];
#endif #endif
...@@ -168,11 +170,11 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -168,11 +170,11 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
// Load the next block of donors into local memory. // Load the next block of donors into local memory.
int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart); int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart);
if (get_local_id(0) < blockSize && donorStart+get_local_id(0) < NUM_DONORS) { if (get_local_id(0) < blockSize) {
int4 atoms2 = donorAtoms[donorStart+get_local_id(0)]; int4 atoms2 = donorAtoms[donorStart+get_local_id(0)];
posBuffer[3*get_local_id(0)] = posq[atoms2.x]; posBuffer[3*get_local_id(0)] = (atoms2.x > -1 ? posq[atoms2.x] : (float4) 0);
posBuffer[3*get_local_id(0)+1] = posq[atoms2.y]; posBuffer[3*get_local_id(0)+1] = (atoms2.y > -1 ? posq[atoms2.y] : (float4) 0);
posBuffer[3*get_local_id(0)+2] = posq[atoms2.z]; posBuffer[3*get_local_id(0)+2] = (atoms2.z > -1 ? posq[atoms2.z] : (float4) 0);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
...@@ -201,6 +203,7 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -201,6 +203,7 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
// Write results // Write results
if (acceptorIndex < NUM_ACCEPTORS) {
int4 bufferIndices = acceptorBufferIndices[acceptorIndex]; int4 bufferIndices = acceptorBufferIndices[acceptorIndex];
if (atoms.x > -1) { if (atoms.x > -1) {
unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS; unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
...@@ -221,4 +224,5 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -221,4 +224,5 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
forceBuffers[offset] = force; forceBuffers[offset] = force;
} }
} }
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment