Commit dacebfd3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Really(?) fixed illegal memory access

parent 21804e95
...@@ -70,9 +70,9 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -70,9 +70,9 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
float4 d1, d2, d3; float4 d1, d2, d3;
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
atoms = donorAtoms[donorIndex]; atoms = donorAtoms[donorIndex];
d1 = posq[atoms.x]; d1 = (atoms.x > -1 ? posq[atoms.x] : (float4) 0);
d2 = posq[atoms.y]; d2 = (atoms.y > -1 ? posq[atoms.y] : (float4) 0);
d3 = posq[atoms.z]; d3 = (atoms.z > -1 ? posq[atoms.z] : (float4) 0);
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[donorIndex]; exclusionIndices = exclusions[donorIndex];
#endif #endif
...@@ -83,11 +83,11 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -83,11 +83,11 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
// Load the next block of acceptors into local memory. // Load the next block of acceptors into local memory.
int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart); int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart);
if (get_local_id(0) < blockSize && acceptorStart+get_local_id(0) < NUM_ACCEPTORS) { if (get_local_id(0) < blockSize) {
int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)]; int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)];
posBuffer[3*get_local_id(0)] = posq[atoms2.x]; posBuffer[3*get_local_id(0)] = (atoms2.x > -1 ? posq[atoms2.x] : (float4) 0);
posBuffer[3*get_local_id(0)+1] = posq[atoms2.y]; posBuffer[3*get_local_id(0)+1] = (atoms2.y > -1 ? posq[atoms2.y] : (float4) 0);
posBuffer[3*get_local_id(0)+2] = posq[atoms2.z]; posBuffer[3*get_local_id(0)+2] = (atoms2.z > -1 ? posq[atoms2.z] : (float4) 0);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
...@@ -116,24 +116,26 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa ...@@ -116,24 +116,26 @@ __kernel void computeDonorForces(__global float4* restrict forceBuffers, __globa
// Write results // Write results
int4 bufferIndices = donorBufferIndices[donorIndex]; if (donorIndex < NUM_DONORS) {
if (atoms.x > -1) { int4 bufferIndices = donorBufferIndices[donorIndex];
unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS; if (atoms.x > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
force.xyz += f1.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f1.xyz;
} forceBuffers[offset] = force;
if (atoms.y > -1) { }
unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS; if (atoms.y > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS;
force.xyz += f2.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f2.xyz;
} forceBuffers[offset] = force;
if (atoms.z > -1) { }
unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS; if (atoms.z > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS;
force.xyz += f3.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f3.xyz;
forceBuffers[offset] = force;
}
} }
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
...@@ -155,9 +157,9 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -155,9 +157,9 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
float4 a1, a2, a3; float4 a1, a2, a3;
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
atoms = acceptorAtoms[acceptorIndex]; atoms = acceptorAtoms[acceptorIndex];
a1 = posq[atoms.x]; a1 = (atoms.x > -1 ? posq[atoms.x] : (float4) 0);
a2 = posq[atoms.y]; a2 = (atoms.y > -1 ? posq[atoms.y] : (float4) 0);
a3 = posq[atoms.z]; a3 = (atoms.z > -1 ? posq[atoms.z] : (float4) 0);
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[acceptorIndex]; exclusionIndices = exclusions[acceptorIndex];
#endif #endif
...@@ -168,11 +170,11 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -168,11 +170,11 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
// Load the next block of donors into local memory. // Load the next block of donors into local memory.
int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart); int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart);
if (get_local_id(0) < blockSize && donorStart+get_local_id(0) < NUM_DONORS) { if (get_local_id(0) < blockSize) {
int4 atoms2 = donorAtoms[donorStart+get_local_id(0)]; int4 atoms2 = donorAtoms[donorStart+get_local_id(0)];
posBuffer[3*get_local_id(0)] = posq[atoms2.x]; posBuffer[3*get_local_id(0)] = (atoms2.x > -1 ? posq[atoms2.x] : (float4) 0);
posBuffer[3*get_local_id(0)+1] = posq[atoms2.y]; posBuffer[3*get_local_id(0)+1] = (atoms2.y > -1 ? posq[atoms2.y] : (float4) 0);
posBuffer[3*get_local_id(0)+2] = posq[atoms2.z]; posBuffer[3*get_local_id(0)+2] = (atoms2.z > -1 ? posq[atoms2.z] : (float4) 0);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
...@@ -201,24 +203,26 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl ...@@ -201,24 +203,26 @@ __kernel void computeAcceptorForces(__global float4* restrict forceBuffers, __gl
// Write results // Write results
int4 bufferIndices = acceptorBufferIndices[acceptorIndex]; if (acceptorIndex < NUM_ACCEPTORS) {
if (atoms.x > -1) { int4 bufferIndices = acceptorBufferIndices[acceptorIndex];
unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS; if (atoms.x > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
force.xyz += f1.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f1.xyz;
} forceBuffers[offset] = force;
if (atoms.y > -1) { }
unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS; if (atoms.y > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS;
force.xyz += f2.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f2.xyz;
} forceBuffers[offset] = force;
if (atoms.z > -1) { }
unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS; if (atoms.z > -1) {
float4 force = forceBuffers[offset]; unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS;
force.xyz += f3.xyz; float4 force = forceBuffers[offset];
forceBuffers[offset] = force; force.xyz += f3.xyz;
forceBuffers[offset] = force;
}
} }
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment